├── .clang-format ├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── LICENSE.txt ├── README.md ├── build.py ├── format-all.py ├── img └── fft_performance.png ├── include └── kfr │ ├── all.hpp │ ├── base │ ├── abs.hpp │ ├── asin_acos.hpp │ ├── atan.hpp │ ├── complex.hpp │ ├── constants.hpp │ ├── digitreverse.hpp │ ├── dispatch.hpp │ ├── expression.hpp │ ├── function.hpp │ ├── gamma.hpp │ ├── hyperbolic.hpp │ ├── intrinsics.h │ ├── kfr.h │ ├── log_exp.hpp │ ├── logical.hpp │ ├── memory.hpp │ ├── min_max.hpp │ ├── operators.hpp │ ├── read_write.hpp │ ├── round.hpp │ ├── saturation.hpp │ ├── select.hpp │ ├── shuffle.hpp │ ├── sin_cos.hpp │ ├── sinh_cosh.hpp │ ├── specializations.i │ ├── sqrt.hpp │ ├── tan.hpp │ ├── types.hpp │ ├── univector.hpp │ └── vec.hpp │ ├── cident.h │ ├── cometa.hpp │ ├── cometa │ └── string.hpp │ ├── data │ ├── bitrev.hpp │ └── sincos.hpp │ ├── dft │ ├── bitrev.hpp │ ├── conv.hpp │ ├── fft.hpp │ ├── ft.hpp │ └── reference_dft.hpp │ ├── dispatch │ ├── cpuid.hpp │ ├── cpuid_auto.hpp │ └── runtimedispatch.hpp │ ├── expressions │ ├── basic.hpp │ ├── conversion.hpp │ ├── generators.hpp │ ├── operators.hpp │ ├── pointer.hpp │ └── reduce.hpp │ ├── io │ ├── audiofile.hpp │ ├── file.hpp │ ├── python_plot.hpp │ └── tostring.hpp │ ├── math.hpp │ ├── misc │ ├── compiletime.hpp │ ├── random.hpp │ ├── small_buffer.hpp │ └── sort.hpp │ ├── vec.hpp │ └── version.hpp ├── sources.cmake ├── syntax-check.py └── tests ├── CMakeLists.txt ├── dft_test.cpp └── testo ├── print_colored.hpp └── testo.hpp /.clang-format: -------------------------------------------------------------------------------- 1 | UseTab: Never 2 | IndentWidth: 4 3 | Language : Cpp 4 | BreakBeforeBraces: Allman 5 | MaxEmptyLinesToKeep: 1 6 | IndentCaseLabels: false 7 | NamespaceIndentation: None 8 | AccessModifierOffset: -4 9 | SpacesInParentheses: false 10 | SpaceInEmptyParentheses: false 11 | SpacesInCStyleCastParentheses: false 12 | PointerAlignment: Left 13 | Cpp11BracedListStyle: false 14 | AllowShortIfStatementsOnASingleLine: false 15 | AllowShortFunctionsOnASingleLine : true 16 | AlignOperands: true 17 | Standard: Cpp11 18 | IndentCaseLabels: false 19 | AlignTrailingComments : false 20 | ConstructorInitializerAllOnOneLineOrOnePerLine : false 21 | ColumnLimit: 110 22 | BinPackParameters : true 23 | BinPackArguments : true 24 | AlwaysBreakTemplateDeclarations : true 25 | AlignConsecutiveAssignments : true 26 | PenaltyReturnTypeOnItsOwnLine: 50000 27 | CommentPragmas: '^ >>>' 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | *.smod 19 | 20 | # Compiled Static libraries 21 | *.lai 22 | *.la 23 | *.a 24 | *.lib 25 | 26 | # Executables 27 | *.exe 28 | *.out 29 | *.app 30 | 31 | # CMake files 32 | CMakeCache.txt 33 | CMakeFiles 34 | CMakeScripts 35 | Makefile 36 | cmake_install.cmake 37 | install_manifest.txt 38 | CTestTestfile.cmake 39 | 40 | # build directory 41 | build/ 42 | 43 | # test directory 44 | svg/ 45 | 46 | # Byte-compiled / optimized / DLL files 47 | __pycache__/ 48 | *.py[cod] 49 | *$py.class 50 | 51 | # Distribution / packaging 52 | .Python 53 | env/ 54 | build/ 55 | develop-eggs/ 56 | dist/ 57 | downloads/ 58 | eggs/ 59 | .eggs/ 60 | lib/ 61 | lib64/ 62 | parts/ 63 | sdist/ 64 | var/ 65 | *.egg-info/ 66 | .installed.cfg 67 | *.egg 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # CLion 73 | .idea/ 74 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: precise 3 | language: cpp 4 | 5 | matrix: 6 | include: 7 | - compiler: clang 8 | addons: 9 | apt: 10 | sources: 11 | - ubuntu-toolchain-r-test 12 | - llvm-toolchain-precise-3.8 13 | - george-edison55-precise-backports 14 | packages: 15 | - cmake 16 | - cmake-data 17 | - g++-5 18 | - clang-3.8 19 | env: 20 | - CXXCOMPILER=clang++-3.8 CCOMPILER=clang-3.8 21 | 22 | before_install: 23 | - sudo apt-get update -qq 24 | script: 25 | - mkdir build 26 | - cd build 27 | - cmake -DCMAKE_CXX_COMPILER=$CXXCOMPILER -DCMAKE_C_COMPILER=$CCOMPILER -DCMAKE_BUILD_TYPE=Release .. && make 28 | - cd tests 29 | - ctest 30 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 D Levin (http://www.kfrlib.com) 2 | # This file is part of KFR 3 | # 4 | # KFR is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # KFR is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with KFR. 16 | 17 | 18 | cmake_minimum_required(VERSION 3.0) 19 | 20 | set(OPT_BITNESS "") # cmake -DOPT_BITNESS="-m32" or -m64 21 | set(OPT_STATIC "") # cmake -DOPT_STATIC="-static" 22 | 23 | if (CMAKE_BUILD_TYPE_INITIALIZED_TO_DEFAULT) 24 | set(CMAKE_BUILD_TYPE Release) 25 | endif () 26 | if (${CMAKE_GENERATOR} STREQUAL "MinGW Makefiles" OR ${CMAKE_GENERATOR} STREQUAL "MSYS Makefiles") 27 | set(OPT_TARGET "--target=x86_64-w64-windows-gnu") 28 | set(CMAKE_CXX_COMPILER clang++) 29 | set(CMAKE_C_COMPILER clang) 30 | else () 31 | set(OPT_TARGET "") # default target 32 | endif () 33 | 34 | if (${CMAKE_GENERATOR} STREQUAL "Visual Studio 14 2015") 35 | set(CMAKE_GENERATOR_TOOLSET LLVM-vs2014) 36 | endif () 37 | set(CMAKE_CXX_FLAGS "${OPT_TARGET} ${OPT_BITNESS} ${OPT_STATIC}" CACHE STRING "compile flags" FORCE) 38 | set(CMAKE_C_FLAGS "${OPT_TARGET} ${OPT_BITNESS} ${OPT_STATIC}" CACHE STRING "compile flags" FORCE) 39 | #set(CMAKE_EXE_LINKER_FLAGS "${OPT_TARGET} ${OPT_BITNESS}") 40 | #set(CMAKE_SHARED_LINKER_FLAGS "${OPT_TARGET} ${OPT_BITNESS}") 41 | #set(CMAKE_STATIC_LINKER_FLAGS "${OPT_TARGET} ${OPT_BITNESS}") 42 | 43 | project(kfr) 44 | 45 | include(sources.cmake) 46 | 47 | set(ALL_WARNINGS -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c99-extensions -Wno-padded) 48 | 49 | if (NOT MSVC) 50 | add_compile_options(-std=c++1y) 51 | else () 52 | add_compile_options(/EHsc /D_HAS_EXCEPTIONS=0) 53 | endif () 54 | 55 | add_subdirectory(tests) 56 | 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kfr-fft 2 | Highly optimized FFT 3 | 4 | KFR is a fast, modern C++ DSP framework, DFT/FFT, Audio resampling, FIR/IIR Filtering, Biquad, vector functions (SSE, AVX) 5 | 6 | ## Features 7 | 8 | * FFT is optimized for SSE2, SSE3, SSE4.x, AVX and AVX2 processors 9 | * Both double and single precision 10 | 11 | ## Performace 12 | 13 | FFT (double precision, sizes from 1024 to 16777216) 14 | See [fft benchmark](https://github.com/kfrlib/fft-benchmark) for details about benchmarking process. 15 | 16 | ![FFT Performance](img/fft_performance.png) 17 | 18 | ## Prerequisities 19 | 20 | * macOS: XCode 6.3, 6.4, 7.x, 8.x 21 | * Windows: MinGW 5.2 and Clang 3.7 or newer 22 | * Ubuntu: GCC 5.1 and Clang 3.7 or newer 23 | * CoMeta metaprogramming library (already included) 24 | 25 | ## Tests 26 | 27 | Execute `build.py` to run the tests or run tests manually from the `tests` directory 28 | 29 | Tested on the following systems: 30 | 31 | * OS X 10.11.4 / AppleClang 7.3.0.7030031 32 | * Ubuntu 14.04 / gcc-5 (Ubuntu 5.3.0-3ubuntu1~14.04) 5.3.0 20151204 / clang version 3.8.0 (tags/RELEASE_380/final) 33 | * Windows 8.1 / MinGW-W64 / clang version 3.8.0 (branches/release_38) 34 | 35 | ## Planned for future versions 36 | 37 | * DFT for any lengths (not only powers of two) 38 | 39 | ## License 40 | 41 | KFR is dual-licensed, available under both commercial and open-source GPL license. 42 | 43 | If you want to use KFR in commercial product or a closed-source project, you need to [purchase a Commercial License](http://kfrlib.com/purchase-license) 44 | -------------------------------------------------------------------------------- /build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (C) 2016 D Levin (http://www.kfrlib.com) 4 | # This file is part of KFR 5 | # 6 | # KFR is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # KFR is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with KFR. 18 | 19 | 20 | from __future__ import print_function 21 | 22 | import os 23 | import subprocess 24 | import sys 25 | 26 | path = os.path.dirname(os.path.realpath(__file__)) 27 | build_dir = os.path.join(path, 'build') 28 | 29 | try: 30 | os.makedirs(build_dir) 31 | except: 32 | pass 33 | 34 | print('Checking clang...', end=' ') 35 | if subprocess.call(['clang', '--version'], stdout=subprocess.PIPE): 36 | raise Exception('clang is not on your PATH') 37 | print('ok') 38 | print('Checking clang++...', end=' ') 39 | if subprocess.call(['clang++', '--version'], stdout=subprocess.PIPE): 40 | raise Exception('clang++ is not on your PATH') 41 | print('ok') 42 | 43 | if sys.platform.startswith('win32'): 44 | generator = 'MinGW Makefiles' 45 | else: 46 | generator = 'Unix Makefiles' 47 | 48 | options = [ 49 | '-DCMAKE_BUILD_TYPE=Release', 50 | ] 51 | 52 | if subprocess.call(['cmake', '-G', generator, '..'] + options, cwd=build_dir): raise Exception('Can\'t make project') 53 | if subprocess.call(['cmake', '--build', '.', '--', '-j4'], cwd=build_dir): raise Exception('Can\'t build project') 54 | if subprocess.call(['ctest'], cwd=os.path.join(build_dir, 'tests')): raise Exception('Can\'t test project') 55 | -------------------------------------------------------------------------------- /format-all.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import fnmatch 5 | import os 6 | import subprocess 7 | import sys 8 | import glob 9 | 10 | path = os.path.dirname(os.path.realpath(__file__)) 11 | 12 | filenames = [] 13 | for root, dirnames, files in os.walk(path, path): 14 | for filename in fnmatch.filter(files, '*.hpp'): 15 | filenames.append(os.path.join(root, filename)) 16 | for filename in fnmatch.filter(files, '*.h'): 17 | filenames.append(os.path.join(root, filename)) 18 | for filename in fnmatch.filter(files, '*.cpp'): 19 | filenames.append(os.path.join(root, filename)) 20 | 21 | for filename in filenames: 22 | print( filename, '...' ) 23 | subprocess.call(['clang-format', '-i', filename]) 24 | # Fix clang-format bug: https://llvm.org/bugs/show_bug.cgi?id=26125 25 | for tmp_file in glob.glob(filename+'*.tmp'): 26 | os.remove(tmp_file) 27 | -------------------------------------------------------------------------------- /img/fft_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfrlib/fft/6b6a9315d8b690766b6e121611f68727ea7b2112/img/fft_performance.png -------------------------------------------------------------------------------- /include/kfr/all.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | 24 | #include "cometa/string.hpp" 25 | 26 | #include "base/abs.hpp" 27 | #include "base/asin_acos.hpp" 28 | #include "base/atan.hpp" 29 | #include "base/complex.hpp" 30 | #include "base/constants.hpp" 31 | #include "base/digitreverse.hpp" 32 | #include "base/dispatch.hpp" 33 | #include "base/function.hpp" 34 | #include "base/gamma.hpp" 35 | #include "base/hyperbolic.hpp" 36 | #include "base/log_exp.hpp" 37 | #include "base/logical.hpp" 38 | #include "base/memory.hpp" 39 | #include "base/min_max.hpp" 40 | #include "base/operators.hpp" 41 | #include "base/read_write.hpp" 42 | #include "base/round.hpp" 43 | #include "base/saturation.hpp" 44 | #include "base/select.hpp" 45 | #include "base/shuffle.hpp" 46 | #include "base/sin_cos.hpp" 47 | #include "base/sqrt.hpp" 48 | #include "base/tan.hpp" 49 | #include "base/types.hpp" 50 | #include "base/univector.hpp" 51 | #include "base/vec.hpp" 52 | #include "dispatch/cpuid.hpp" 53 | #include "dispatch/runtimedispatch.hpp" 54 | #include "expressions/basic.hpp" 55 | #include "expressions/conversion.hpp" 56 | #include "expressions/generators.hpp" 57 | #include "expressions/operators.hpp" 58 | #include "expressions/pointer.hpp" 59 | #include "expressions/reduce.hpp" 60 | #include "version.hpp" 61 | 62 | #include "misc/compiletime.hpp" 63 | #include "misc/random.hpp" 64 | #include "misc/small_buffer.hpp" 65 | #include "misc/sort.hpp" 66 | 67 | #include "data/bitrev.hpp" 68 | #include "data/sincos.hpp" 69 | #include "dsp/biquad.hpp" 70 | #include "dsp/biquad_design.hpp" 71 | #include "dsp/fir.hpp" 72 | #include "dsp/fir_design.hpp" 73 | #include "dsp/fracdelay.hpp" 74 | #include "dsp/goertzel.hpp" 75 | #include "dsp/impulse.hpp" 76 | #include "dsp/interpolation.hpp" 77 | #include "dsp/oscillators.hpp" 78 | #include "dsp/resample.hpp" 79 | #include "dsp/speaker.hpp" 80 | #include "dsp/units.hpp" 81 | #include "dsp/waveshaper.hpp" 82 | #include "dsp/weighting.hpp" 83 | #include "dsp/window.hpp" 84 | #include "io/audiofile.hpp" 85 | #include "io/file.hpp" 86 | #include "io/python_plot.hpp" 87 | #include "io/tostring.hpp" 88 | #include "math.hpp" 89 | #include "vec.hpp" 90 | 91 | #include "dft/bitrev.hpp" 92 | #include "dft/conv.hpp" 93 | #include "dft/fft.hpp" 94 | #include "dft/ft.hpp" 95 | #include "dft/reference_dft.hpp" 96 | -------------------------------------------------------------------------------- /include/kfr/base/abs.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "function.hpp" 26 | #include "operators.hpp" 27 | #include "select.hpp" 28 | 29 | #pragma clang diagnostic push 30 | #if CID_HAS_WARNING("-Winaccessible-base") 31 | #pragma clang diagnostic ignored "-Winaccessible-base" 32 | #endif 33 | 34 | namespace kfr 35 | { 36 | 37 | namespace internal 38 | { 39 | 40 | template 41 | struct in_abs : in_abs 42 | { 43 | struct fn_abs : in_abs::fn_abs, fn_disabled 44 | { 45 | }; 46 | }; 47 | 48 | template <> 49 | struct in_abs : in_select 50 | { 51 | constexpr static cpu_t cpu = cpu_t::sse2; 52 | 53 | private: 54 | using in_select::select; 55 | 56 | public: 57 | template ::value)> 58 | KFR_SINTRIN vec abs(vec value) 59 | { 60 | return select(value >= T(), value, -value); 61 | } 62 | template ::value)> 63 | KFR_SINTRIN vec abs(vec value) 64 | { 65 | return value & invhighbitmask; 66 | } 67 | 68 | KFR_HANDLE_ALL(abs) 69 | KFR_HANDLE_SCALAR(abs) 70 | KFR_SPEC_FN(in_abs, abs) 71 | }; 72 | 73 | template <> 74 | struct in_abs : in_abs, in_select 75 | { 76 | constexpr static cpu_t cpu = cpu_t::ssse3; 77 | 78 | private: 79 | using in_select::select; 80 | 81 | public: 82 | template 83 | KFR_SINTRIN vec abs(vec value) 84 | { 85 | return select(value >= 0, value, -value); 86 | } 87 | 88 | KFR_CPU_INTRIN(ssse3) i32sse abs(i32sse value) { return _mm_abs_epi32(*value); } 89 | KFR_CPU_INTRIN(ssse3) i16sse abs(i16sse value) { return _mm_abs_epi16(*value); } 90 | KFR_CPU_INTRIN(ssse3) i8sse abs(i8sse value) { return _mm_abs_epi8(*value); } 91 | 92 | template ::value)> 93 | KFR_SINTRIN vec abs(vec value) 94 | { 95 | return value & invhighbitmask; 96 | } 97 | 98 | KFR_HANDLE_ALL(abs) 99 | KFR_HANDLE_SCALAR(abs) 100 | KFR_SPEC_FN(in_abs, abs) 101 | }; 102 | 103 | template <> 104 | struct in_abs : in_abs 105 | { 106 | constexpr static cpu_t cpu = cpu_t::avx2; 107 | using in_abs::abs; 108 | 109 | KFR_CPU_INTRIN(avx2) i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); } 110 | KFR_CPU_INTRIN(avx2) i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); } 111 | KFR_CPU_INTRIN(avx2) i8avx abs(i8avx value) { return _mm256_abs_epi8(*value); } 112 | 113 | KFR_HANDLE_ALL(abs) 114 | KFR_HANDLE_SCALAR(abs) 115 | KFR_SPEC_FN(in_abs, abs) 116 | }; 117 | } 118 | 119 | namespace native 120 | { 121 | using fn_abs = internal::in_abs<>::fn_abs; 122 | template ::value)> 123 | 124 | KFR_INTRIN ftype abs(const T1& x) 125 | { 126 | return internal::in_abs<>::abs(x); 127 | } 128 | 129 | template ::value)> 130 | 131 | KFR_INTRIN expr_func abs(E1&& x) 132 | { 133 | return { fn_abs(), std::forward(x) }; 134 | } 135 | } 136 | } 137 | 138 | #pragma clang diagnostic pop 139 | -------------------------------------------------------------------------------- /include/kfr/base/asin_acos.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "abs.hpp" 26 | #include "atan.hpp" 27 | #include "constants.hpp" 28 | #include "function.hpp" 29 | #include "min_max.hpp" 30 | #include "operators.hpp" 31 | #include "select.hpp" 32 | #include "shuffle.hpp" 33 | #include "sqrt.hpp" 34 | 35 | #pragma clang diagnostic push 36 | #if CID_HAS_WARNING("-Winaccessible-base") 37 | #pragma clang diagnostic ignored "-Winaccessible-base" 38 | #endif 39 | 40 | namespace kfr 41 | { 42 | 43 | namespace internal 44 | { 45 | 46 | template 47 | struct in_asin_acos : private in_select, private in_atan, private in_sqrt 48 | { 49 | private: 50 | using in_atan::atan2; 51 | using in_sqrt::sqrt; 52 | 53 | public: 54 | template 55 | KFR_SINTRIN vec asin(vec x) 56 | { 57 | return atan2(x, sqrt(T(1) - x * x)); 58 | } 59 | 60 | template 61 | KFR_SINTRIN vec acos(vec x) 62 | { 63 | return atan2(sqrt(T(1) - x * x), x); 64 | } 65 | KFR_SPEC_FN(in_asin_acos, asin) 66 | KFR_SPEC_FN(in_asin_acos, acos) 67 | }; 68 | } 69 | 70 | namespace native 71 | { 72 | using fn_asin = internal::in_asin_acos<>::fn_asin; 73 | template ::value)> 74 | KFR_INTRIN ftype asin(const T1& x) 75 | { 76 | return internal::in_asin_acos<>::asin(x); 77 | } 78 | 79 | template ::value)> 80 | KFR_INTRIN expr_func asin(E1&& x) 81 | { 82 | return { fn_asin(), std::forward(x) }; 83 | } 84 | 85 | using fn_acos = internal::in_asin_acos<>::fn_acos; 86 | template ::value)> 87 | KFR_INTRIN ftype acos(const T1& x) 88 | { 89 | return internal::in_asin_acos<>::acos(x); 90 | } 91 | 92 | template ::value)> 93 | KFR_INTRIN expr_func acos(E1&& x) 94 | { 95 | return { fn_acos(), std::forward(x) }; 96 | } 97 | } 98 | } 99 | 100 | #pragma clang diagnostic pop 101 | -------------------------------------------------------------------------------- /include/kfr/base/constants.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "types.hpp" 26 | #include 27 | 28 | namespace kfr 29 | { 30 | 31 | // π (pi) 32 | // c_pi = 4pi 33 | // c_pi = 3/4pi 34 | template > 35 | constexpr Tsub c_pi = Tsub(3.1415926535897932384626433832795 * m / d); 36 | 37 | // π² (pi²) 38 | // c_sqr_pi = 4pi² 39 | // c_sqr_pi = 3/4pi² 40 | template > 41 | constexpr Tsub c_sqr_pi = Tsub(9.8696044010893586188344909998762 * m / d); 42 | 43 | // 1/π (1/pi) 44 | // c_recip_pi 1/pi 45 | // c_recip_pi 4/pi 46 | template > 47 | constexpr Tsub c_recip_pi = Tsub(0.31830988618379067153776752674503 * m / d); 48 | 49 | // degree to radian conversion factor 50 | template > 51 | constexpr Tsub c_degtorad = c_pi; 52 | 53 | // radian to degree conversion factor 54 | template > 55 | constexpr Tsub c_radtodeg = c_recip_pi; 56 | 57 | // e, Euler's number 58 | template > 59 | constexpr Tsub c_e = Tsub(2.718281828459045235360287471352662 * m / d); 60 | 61 | template > 62 | constexpr unsigned c_mantissa_bits = sizeof(Tsub) == 32 ? 23 : 52; 63 | 64 | template > 65 | constexpr Tsub c_mantissa_mask = (Tsub(1) << c_mantissa_bits)-1; 66 | 67 | template > 68 | constexpr Tsub c_epsilon = (std::numeric_limits::epsilon()); 69 | 70 | template > 71 | constexpr Tsub c_infinity = std::numeric_limits::infinity(); 72 | 73 | template > 74 | constexpr Tsub c_neginfinity = -std::numeric_limits::infinity(); 75 | 76 | template > 77 | constexpr Tsub c_qnan = std::numeric_limits::quiet_NaN(); 78 | 79 | template > 80 | constexpr Tsub c_recip_log_2 = Tsub(1.442695040888963407359924681001892137426645954); 81 | 82 | template > 83 | constexpr Tsub c_recip_log_10 = Tsub(0.43429448190325182765112891891661); 84 | 85 | template > 86 | constexpr Tsub c_log_2 = Tsub(0.69314718055994530941723212145818); 87 | 88 | template > 89 | constexpr Tsub c_log_10 = Tsub(2.3025850929940456840179914546844); 90 | 91 | template > 92 | constexpr Tsub c_sqrt_2 = Tsub(1.4142135623730950488016887242097 * m / d); 93 | } 94 | -------------------------------------------------------------------------------- /include/kfr/base/digitreverse.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | #include "shuffle.hpp" 25 | #include "types.hpp" 26 | 27 | namespace kfr 28 | { 29 | 30 | namespace internal 31 | { 32 | 33 | template 34 | constexpr enable_if digitreverse(u32 x) 35 | { 36 | x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1)); 37 | x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2)); 38 | x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4)); 39 | x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8)); 40 | return ((x >> 16) | (x << 16)) >> (32 - bits); 41 | } 42 | 43 | constexpr inline u32 bit_permute_step_simple(u32 x, u32 m, u32 shift) 44 | { 45 | return ((x & m) << shift) | ((x >> shift) & m); 46 | } 47 | 48 | template 49 | constexpr enable_if digitreverse(u32 x) 50 | { 51 | if (bits <= 2) 52 | return x; 53 | if (bits <= 4) 54 | { 55 | x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits 56 | return x >> (4 - bits); 57 | } 58 | if (bits <= 8) 59 | { 60 | x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits 61 | x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2 regroups 8 bits 62 | return x >> (8 - bits); 63 | } 64 | if (bits <= 16) 65 | { 66 | x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits 67 | x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2 regroups 8 bits 68 | x = bit_permute_step_simple(x, 0x00ff00ff, 8); // Bit index complement 3 regroups 16 bits 69 | return x >> (16 - bits); 70 | } 71 | if (bits <= 32) 72 | { 73 | x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1 regroups 4 bits 74 | x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2 regroups 8 bits 75 | x = bit_permute_step_simple(x, 0x00ff00ff, 8); // Bit index complement 3 regroups 16 bits 76 | x = bit_permute_step_simple(x, 0x0000ffff, 16); // Bit index complement 4 regroups 32 bits 77 | return x >> (32 - bits); 78 | } 79 | return x; 80 | } 81 | 82 | template 83 | struct shuffle_index_digitreverse 84 | { 85 | constexpr inline size_t operator()(size_t index) const 86 | { 87 | return digitreverse(static_cast(index)); 88 | } 89 | }; 90 | } 91 | 92 | template 93 | KFR_INLINE vec digitreverse(vec x) 94 | { 95 | return shufflevector, groupsize>(x); 96 | } 97 | 98 | template 99 | KFR_INLINE vec bitreverse(vec x) 100 | { 101 | return digitreverse<2, groupsize>(x); 102 | } 103 | 104 | template 105 | KFR_INLINE vec digitreverse4(vec x) 106 | { 107 | return digitreverse<4, groupsize>(x); 108 | } 109 | 110 | template 111 | constexpr inline u32 bitreverse(u32 x) 112 | { 113 | return internal::digitreverse<2, bits>(x); 114 | } 115 | 116 | template 117 | constexpr inline u32 digitreverse4(u32 x) 118 | { 119 | return internal::digitreverse<4, bits>(x); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /include/kfr/base/dispatch.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "kfr.h" 26 | 27 | #include "types.hpp" 28 | 29 | namespace kfr 30 | { 31 | 32 | namespace internal 33 | { 34 | 35 | template 36 | struct retarget_impl 37 | { 38 | using type = Fn; 39 | }; 40 | 41 | template 42 | struct retarget_impl>> 43 | { 44 | using type = typename Fn::template retarget_this; 45 | }; 46 | } 47 | 48 | template 49 | using retarget = typename internal::retarget_impl::type; 50 | 51 | template , 52 | KFR_ENABLE_IF(std::is_constructible::value)> 53 | KFR_INLINE NewFn retarget_func(Fn&& fn) 54 | { 55 | return NewFn(std::move(fn)); 56 | } 57 | 58 | template , 59 | KFR_ENABLE_IF(!std::is_constructible::value && std::is_empty::value && 60 | std::is_constructible::value)> 61 | KFR_INLINE NewEmptyFn retarget_func(Fn&&) 62 | { 63 | return NewEmptyFn(); 64 | } 65 | 66 | namespace internal 67 | { 68 | 69 | template 70 | struct cpu_caller; 71 | 72 | template <> 73 | struct cpu_caller 74 | { 75 | constexpr static cpu_t a = cpu_t::avx2; 76 | 77 | template 78 | KFR_NOINLINE static KFR_USE_CPU(avx2) result_of call(Fn&& fn, Args&&... args) 79 | { 80 | return fn(std::forward(args)...); 81 | } 82 | 83 | template 84 | KFR_NOINLINE static KFR_USE_CPU(avx2) result_of retarget_call(Fn&& fn, Args&&... args) 85 | { 86 | return (retarget_func(std::forward(fn)))(std::forward(args)...); 87 | } 88 | }; 89 | 90 | template <> 91 | struct cpu_caller 92 | { 93 | constexpr static cpu_t a = cpu_t::avx1; 94 | 95 | template 96 | KFR_NOINLINE static KFR_USE_CPU(avx) result_of call(Fn&& fn, Args&&... args) 97 | { 98 | return fn(std::forward(args)...); 99 | } 100 | 101 | template 102 | KFR_NOINLINE static KFR_USE_CPU(avx) result_of retarget_call(Fn&& fn, Args&&... args) 103 | { 104 | return (retarget_func(std::forward(fn)))(std::forward(args)...); 105 | } 106 | }; 107 | 108 | template <> 109 | struct cpu_caller 110 | { 111 | constexpr static cpu_t a = cpu_t::sse41; 112 | 113 | template 114 | KFR_NOINLINE static KFR_USE_CPU(sse41) result_of call(Fn&& fn, Args&&... args) 115 | { 116 | return fn(std::forward(args)...); 117 | } 118 | 119 | template 120 | KFR_NOINLINE static KFR_USE_CPU(sse41) result_of retarget_call(Fn&& fn, Args&&... args) 121 | { 122 | return (retarget_func(std::forward(fn)))(std::forward(args)...); 123 | } 124 | }; 125 | 126 | template <> 127 | struct cpu_caller 128 | { 129 | constexpr static cpu_t a = cpu_t::ssse3; 130 | 131 | template 132 | KFR_NOINLINE static KFR_USE_CPU(ssse3) result_of call(Fn&& fn, Args&&... args) 133 | { 134 | return fn(std::forward(args)...); 135 | } 136 | 137 | template 138 | KFR_NOINLINE static KFR_USE_CPU(ssse3) result_of retarget_call(Fn&& fn, Args&&... args) 139 | { 140 | return (retarget_func(std::forward(fn)))(std::forward(args)...); 141 | } 142 | }; 143 | 144 | template <> 145 | struct cpu_caller 146 | { 147 | constexpr static cpu_t a = cpu_t::sse3; 148 | 149 | template 150 | KFR_NOINLINE static KFR_USE_CPU(sse3) result_of call(Fn&& fn, Args&&... args) 151 | { 152 | return fn(std::forward(args)...); 153 | } 154 | 155 | template 156 | KFR_NOINLINE static KFR_USE_CPU(sse3) result_of retarget_call(Fn&& fn, Args&&... args) 157 | { 158 | return (retarget_func(std::forward(fn)))(std::forward(args)...); 159 | } 160 | }; 161 | 162 | template <> 163 | struct cpu_caller 164 | { 165 | constexpr static cpu_t a = cpu_t::sse2; 166 | 167 | template 168 | KFR_NOINLINE static KFR_USE_CPU(sse2) result_of call(Fn&& fn, Args&&... args) 169 | { 170 | return fn(std::forward(args)...); 171 | } 172 | 173 | template 174 | KFR_NOINLINE static KFR_USE_CPU(sse2) result_of retarget_call(Fn&& fn, Args&&... args) 175 | { 176 | return (retarget_func(std::forward(fn)))(std::forward(args)...); 177 | } 178 | }; 179 | 180 | template 181 | KFR_INLINE auto dispatch_impl(Fn&& fn, Args&&... args) -> decltype(fn(std::forward(args)...)) 182 | { 183 | using targetFn = retarget; 184 | targetFn newfn = retarget_func(std::forward(fn)); 185 | return newfn(std::forward(args)...); 186 | } 187 | 188 | template 189 | KFR_INLINE auto dispatch_impl(Fn&& fn, Args&&... args) -> decltype(fn(std::forward(args)...)) 190 | { 191 | return internal::cpu_caller::retarget_call(std::forward(fn), std::forward(args)...); 192 | } 193 | } 194 | 195 | template 196 | KFR_INLINE auto dispatch(Fn&& fn, Args&&... args) -> decltype(fn(std::forward(args)...)) 197 | { 198 | return internal::dispatch_impl(std::forward(fn), std::forward(args)...); 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /include/kfr/base/function.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "dispatch.hpp" 26 | #include "expression.hpp" 27 | #include "shuffle.hpp" 28 | #include "types.hpp" 29 | #include "vec.hpp" 30 | 31 | #pragma clang diagnostic push 32 | #pragma clang diagnostic ignored "-Wshadow" 33 | 34 | namespace kfr 35 | { 36 | 37 | #define KFR_HANDLE_ALL(fn) \ 38 | template \ 39 | KFR_SINTRIN vec fn(vec x, Args&&... args) \ 40 | { \ 41 | return handle_all(x, std::forward(args)...); \ 42 | } 43 | #define KFR_HANDLE_ALL_REDUCE(redfn, fn) \ 44 | template \ 45 | KFR_SINTRIN auto fn(vec x, Args&&... args) \ 46 | { \ 47 | return handle_all_reduce(x, std::forward(args)...); \ 48 | } 49 | 50 | #define KFR_HANDLE_SCALAR(fn) \ 51 | template ::value)> \ 52 | KFR_SINTRIN auto fn(const T& x, const Ts&... rest) \ 53 | { \ 54 | return fn(make_vector(x), make_vector(rest)...)[0]; \ 55 | } 56 | 57 | namespace internal 58 | { 59 | 60 | struct fn_disabled 61 | { 62 | constexpr static bool disabled = true; 63 | }; 64 | 65 | template 66 | constexpr inline size_t next_fast_width(size_t n) 67 | { 68 | return n > vector_width ? vector_width : vector_width; 69 | } 70 | 71 | template (N)> 72 | KFR_INLINE vec extend_reg(vec x) 73 | { 74 | return extend(x); 75 | } 76 | template (N)> 77 | KFR_INLINE vec extend_reg(vec x, T value) 78 | { 79 | return widen(x, value); 80 | } 81 | 82 | template )> 84 | KFR_INLINE auto handle_all_f(Fn&& fn, vec x, Args&&... args) 85 | { 86 | return narrow(fn(extend_reg(x), extend_reg(args)...)); 87 | } 88 | template vector_width)> 90 | KFR_INLINE auto handle_all_f(Fn&& fn, vec x, Args&&... args) 91 | { 92 | return concat(fn(low(x), low(args)...), fn(high(x), high(args)...)); 93 | } 94 | 95 | template 96 | KFR_INLINE auto handle_all(vec x, Args&&... args) 97 | { 98 | Fn fn{}; 99 | return handle_all_f(fn, x, std::forward(args)...); 100 | } 101 | 102 | template ]> 104 | KFR_INLINE auto handle_all_reduce_f(RedFn&& redfn, Fn&& fn, vec x, Args&&... args) 105 | { 106 | return fn(extend_reg(x, redfn(initialvalue())), 107 | extend_reg(args, redfn(initialvalue()))...); 108 | } 109 | template vector_width], typename = void> 111 | KFR_INLINE auto handle_all_reduce_f(RedFn&& redfn, Fn&& fn, vec x, Args&&... args) 112 | { 113 | return redfn(fn(low(x), low(args)...), fn(high(x), high(args)...)); 114 | } 115 | template 116 | KFR_INLINE auto handle_all_reduce(vec x, Args&&... args) 117 | { 118 | RedFn redfn{}; 119 | Fn fn{}; 120 | return handle_all_reduce_f(redfn, fn, x, std::forward(args)...); 121 | } 122 | } 123 | } 124 | #pragma clang diagnostic pop 125 | -------------------------------------------------------------------------------- /include/kfr/base/gamma.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | #include "function.hpp" 25 | #include "log_exp.hpp" 26 | 27 | #pragma clang diagnostic push 28 | #if CID_HAS_WARNING("-Wc99-extensions") 29 | #pragma clang diagnostic ignored "-Wc99-extensions" 30 | #endif 31 | 32 | namespace kfr 33 | { 34 | 35 | namespace internal 36 | { 37 | template 38 | constexpr T gamma_precalc[] = { 39 | 0x2.81b263fec4e08p+0, 0x3.07b4100e04448p+16, -0xa.a0da01d4d4e2p+16, 0xf.05ccb27bb9dbp+16, 40 | -0xa.fa79616b7c6ep+16, 0x4.6dd6c10d4df5p+16, -0xf.a2304199eb4ap+12, 0x1.c21dd4aade3dp+12, 41 | -0x1.62f981f01cf84p+8, 0x5.a937aa5c48d98p+0, -0x3.c640bf82e2104p-8, 0xc.914c540f959cp-24, 42 | }; 43 | 44 | template 45 | struct in_gamma : in_log_exp 46 | { 47 | private: 48 | using in_log_exp::exp; 49 | using in_log_exp::pow; 50 | 51 | public: 52 | template 53 | KFR_SINTRIN vec gamma(vec z) 54 | { 55 | constexpr size_t Count = arraysize(internal::gamma_precalc); 56 | vec accm = gamma_precalc[0]; 57 | KFR_LOOP_UNROLL 58 | for (size_t k = 1; k < Count; k++) 59 | accm += gamma_precalc[k] / (z + cast>(k)); 60 | accm *= exp(-(z + Count)) * pow(z + Count, z + 0.5); 61 | return accm / z; 62 | } 63 | 64 | template 65 | KFR_SINTRIN vec factorial_approx(vec x) 66 | { 67 | return gamma(x + T(1)); 68 | } 69 | KFR_SPEC_FN(in_gamma, gamma) 70 | KFR_SPEC_FN(in_gamma, factorial_approx) 71 | }; 72 | } 73 | 74 | namespace native 75 | { 76 | using fn_gamma = internal::in_gamma<>::fn_gamma; 77 | template ::value)> 78 | KFR_INTRIN ftype gamma(const T1& x) 79 | { 80 | return internal::in_gamma<>::gamma(x); 81 | } 82 | 83 | template ::value)> 84 | KFR_INTRIN expr_func gamma(E1&& x) 85 | { 86 | return { fn_gamma(), std::forward(x) }; 87 | } 88 | 89 | using fn_factorial_approx = internal::in_gamma<>::fn_factorial_approx; 90 | template ::value)> 91 | KFR_INTRIN ftype factorial_approx(const T1& x) 92 | { 93 | return internal::in_gamma<>::factorial_approx(x); 94 | } 95 | 96 | template ::value)> 97 | KFR_INTRIN expr_func factorial_approx(E1&& x) 98 | { 99 | return { fn_factorial_approx(), std::forward(x) }; 100 | } 101 | } 102 | } 103 | 104 | #pragma clang diagnostic pop 105 | -------------------------------------------------------------------------------- /include/kfr/base/hyperbolic.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | #include "abs.hpp" 25 | #include "constants.hpp" 26 | #include "function.hpp" 27 | #include "log_exp.hpp" 28 | #include "min_max.hpp" 29 | #include "operators.hpp" 30 | #include "select.hpp" 31 | 32 | namespace kfr 33 | { 34 | 35 | namespace internal 36 | { 37 | 38 | template 39 | struct in_hyperbolic : in_log_exp 40 | { 41 | constexpr static cpu_t cur = c; 42 | 43 | private: 44 | using in_log_exp::exp; 45 | 46 | public: 47 | template 48 | KFR_SINTRIN vec sinh(vec x) 49 | { 50 | return (exp(x) - exp(-x)) * T(0.5); 51 | } 52 | 53 | template 54 | KFR_SINTRIN vec cosh(vec x) 55 | { 56 | return (exp(x) + exp(-x)) * T(0.5); 57 | } 58 | 59 | template 60 | KFR_SINTRIN vec tanh(vec x) 61 | { 62 | x = -2 * x; 63 | return (1 - exp(x)) / (1 + exp(x)); 64 | } 65 | 66 | template 67 | KFR_SINTRIN vec coth(vec x) 68 | { 69 | x = -2 * x; 70 | return (1 + exp(x)) / (1 - exp(x)); 71 | } 72 | 73 | template 1)> 74 | KFR_SINTRIN vec sinhcosh(vec x) 75 | { 76 | const vec a = exp(x); 77 | const vec b = exp(-x); 78 | return subadd(a, b) * T(0.5); 79 | } 80 | 81 | template 1)> 82 | KFR_SINTRIN vec coshsinh(vec x) 83 | { 84 | const vec a = exp(x); 85 | const vec b = exp(-x); 86 | return addsub(a, b) * T(0.5); 87 | } 88 | KFR_HANDLE_SCALAR(sinh) 89 | KFR_HANDLE_SCALAR(cosh) 90 | KFR_HANDLE_SCALAR(tanh) 91 | KFR_HANDLE_SCALAR(coth) 92 | KFR_HANDLE_SCALAR(sinhcosh) 93 | KFR_HANDLE_SCALAR(coshsinh) 94 | KFR_SPEC_FN(in_hyperbolic, sinh) 95 | KFR_SPEC_FN(in_hyperbolic, cosh) 96 | KFR_SPEC_FN(in_hyperbolic, tanh) 97 | KFR_SPEC_FN(in_hyperbolic, coth) 98 | KFR_SPEC_FN(in_hyperbolic, sinhcosh) 99 | KFR_SPEC_FN(in_hyperbolic, coshsinh) 100 | }; 101 | } 102 | 103 | namespace native 104 | { 105 | using fn_sinh = internal::in_hyperbolic<>::fn_sinh; 106 | template ::value)> 107 | KFR_INTRIN ftype sinh(const T1& x) 108 | { 109 | return internal::in_hyperbolic<>::sinh(x); 110 | } 111 | 112 | template ::value)> 113 | KFR_INTRIN expr_func sinh(E1&& x) 114 | { 115 | return { fn_sinh(), std::forward(x) }; 116 | } 117 | 118 | using fn_cosh = internal::in_hyperbolic<>::fn_cosh; 119 | template ::value)> 120 | KFR_INTRIN ftype cosh(const T1& x) 121 | { 122 | return internal::in_hyperbolic<>::cosh(x); 123 | } 124 | 125 | template ::value)> 126 | KFR_INTRIN expr_func cosh(E1&& x) 127 | { 128 | return { fn_cosh(), std::forward(x) }; 129 | } 130 | 131 | using fn_tanh = internal::in_hyperbolic<>::fn_tanh; 132 | template ::value)> 133 | KFR_INTRIN ftype tanh(const T1& x) 134 | { 135 | return internal::in_hyperbolic<>::tanh(x); 136 | } 137 | 138 | template ::value)> 139 | KFR_INTRIN expr_func tanh(E1&& x) 140 | { 141 | return { fn_tanh(), std::forward(x) }; 142 | } 143 | 144 | using fn_coth = internal::in_hyperbolic<>::fn_coth; 145 | template ::value)> 146 | KFR_INTRIN ftype coth(const T1& x) 147 | { 148 | return internal::in_hyperbolic<>::coth(x); 149 | } 150 | 151 | template ::value)> 152 | KFR_INTRIN expr_func coth(E1&& x) 153 | { 154 | return { fn_coth(), std::forward(x) }; 155 | } 156 | 157 | using fn_sinhcosh = internal::in_hyperbolic<>::fn_sinhcosh; 158 | template ::value)> 159 | KFR_INTRIN ftype sinhcosh(const T1& x) 160 | { 161 | return internal::in_hyperbolic<>::sinhcosh(x); 162 | } 163 | 164 | template ::value)> 165 | KFR_INTRIN expr_func sinhcosh(E1&& x) 166 | { 167 | return { fn_sinhcosh(), std::forward(x) }; 168 | } 169 | 170 | using fn_coshsinh = internal::in_hyperbolic<>::fn_coshsinh; 171 | template ::value)> 172 | KFR_INTRIN ftype coshsinh(const T1& x) 173 | { 174 | return internal::in_hyperbolic<>::coshsinh(x); 175 | } 176 | 177 | template ::value)> 178 | KFR_INTRIN expr_func coshsinh(E1&& x) 179 | { 180 | return { fn_coshsinh(), std::forward(x) }; 181 | } 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /include/kfr/base/intrinsics.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "kfr.h" 4 | 5 | #if KFR_COMPILER_CLANG 6 | 7 | #pragma clang diagnostic push 8 | #pragma clang diagnostic ignored "-Wreserved-id-macro" 9 | 10 | #ifdef __AVX2__ 11 | #define KFR_AVX2_DEFINED 12 | #endif 13 | #ifdef __AVX__ 14 | #define KFR_AVX1_DEFINED 15 | #endif 16 | #ifdef __SSE4_2__ 17 | #define KFR_SSE42_DEFINED 18 | #endif 19 | #ifdef __SSE4_1__ 20 | #define KFR_SSE41_DEFINED 21 | #endif 22 | #ifdef __SSSE3__ 23 | #define KFR_SSSE3_DEFINED 24 | #endif 25 | #ifdef __SSE3__ 26 | #define KFR_SSE3_DEFINED 27 | #endif 28 | #ifdef __SSE2__ 29 | #define KFR_SSE2_DEFINED 30 | #endif 31 | #ifdef __SSE__ 32 | #define KFR_SSE1_DEFINED 33 | #endif 34 | #ifdef __MMX__ 35 | #define KFR_MMX_DEFINED 36 | #endif 37 | 38 | #ifndef KFR_AVX2_DEFINED 39 | #define __AVX2__ 40 | #endif 41 | #ifndef KFR_AVX1_DEFINED 42 | #define __AVX__ 43 | #endif 44 | #ifndef KFR_SSE42_DEFINED 45 | #define __SSE4_2__ 46 | #endif 47 | #ifndef KFR_SSE41_DEFINED 48 | #define __SSE4_1__ 49 | #endif 50 | #ifndef KFR_SSSE3_DEFINED 51 | #define __SSSE3__ 52 | #endif 53 | #ifndef KFR_SSE3_DEFINED 54 | #define __SSE3__ 55 | #endif 56 | #ifndef KFR_SSE2_DEFINED 57 | #define __SSE2__ 58 | #endif 59 | #ifndef KFR_SSE1_DEFINED 60 | #define __SSE__ 61 | #endif 62 | #ifndef KFR_MMX_DEFINED 63 | #define __MMX__ 64 | #endif 65 | 66 | #ifdef KFR_SKIP_AVX512 67 | #ifndef __AVX512FINTRIN_H 68 | #define __AVX512FINTRIN_H 69 | #endif 70 | #ifndef __AVX512VLINTRIN_H 71 | #define __AVX512VLINTRIN_H 72 | #endif 73 | #ifndef __AVX512BWINTRIN_H 74 | #define __AVX512BWINTRIN_H 75 | #endif 76 | #ifndef __AVX512CDINTRIN_H 77 | #define __AVX512CDINTRIN_H 78 | #endif 79 | #ifndef __AVX512DQINTRIN_H 80 | #define __AVX512DQINTRIN_H 81 | #endif 82 | #ifndef __AVX512VLBWINTRIN_H 83 | #define __AVX512VLBWINTRIN_H 84 | #endif 85 | #ifndef __AVX512VLDQINTRIN_H 86 | #define __AVX512VLDQINTRIN_H 87 | #endif 88 | #ifndef __AVX512ERINTRIN_H 89 | #define __AVX512ERINTRIN_H 90 | #endif 91 | #ifndef __IFMAINTRIN_H 92 | #define __IFMAINTRIN_H 93 | #endif 94 | #ifndef __IFMAVLINTRIN_H 95 | #define __IFMAVLINTRIN_H 96 | #endif 97 | #ifndef __VBMIINTRIN_H 98 | #define __VBMIINTRIN_H 99 | #endif 100 | #ifndef __VBMIVLINTRIN_H 101 | #define __VBMIVLINTRIN_H 102 | #endif 103 | 104 | #endif 105 | 106 | #include 107 | #ifdef KFR_OS_WIN 108 | #include 109 | #endif 110 | 111 | #ifndef KFR_AVX2_DEFINED 112 | #undef __AVX2__ 113 | #endif 114 | #ifndef KFR_AVX1_DEFINED 115 | #undef __AVX__ 116 | #endif 117 | #ifndef KFR_SSE42_DEFINED 118 | #undef __SSE4_2__ 119 | #endif 120 | #ifndef KFR_SSE41_DEFINED 121 | #undef __SSE4_1__ 122 | #endif 123 | #ifndef KFR_SSSE3_DEFINED 124 | #undef __SSSE3__ 125 | #endif 126 | #ifndef KFR_SSE3_DEFINED 127 | #undef __SSE3__ 128 | #endif 129 | #ifndef KFR_SSE2_DEFINED 130 | #undef __SSE2__ 131 | #endif 132 | #ifndef KFR_SSE1_DEFINED 133 | #undef __SSE__ 134 | #endif 135 | #ifndef KFR_MMX_DEFINED 136 | #undef __MMX__ 137 | #endif 138 | 139 | #pragma clang diagnostic pop 140 | 141 | #else 142 | 143 | #include 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /include/kfr/base/kfr.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "../cident.h" 7 | 8 | #define KFR_INLINE CID_INLINE 9 | #define KFR_INLINE_MEMBER CID_INLINE_MEMBER 10 | #define KFR_INLINE_LAMBDA CID_INLINE_LAMBDA 11 | #define KFR_NOINLINE CID_NOINLINE 12 | #define KFR_FLATTEN CID_FLATTEN 13 | #define KFR_RESTRICT CID_RESTRICT 14 | 15 | #ifdef CID_COMPILER_CLANG 16 | #define KFR_COMPILER_CLANG CID_COMPILER_CLANG 17 | #endif 18 | 19 | #ifdef CID_OS_WIN 20 | #define KFR_OS_WIN CID_OS_WIN 21 | #endif 22 | 23 | #ifdef CID_OS_OSX 24 | #define KFR_OS_OSX CID_OS_OSX 25 | #endif 26 | 27 | #ifdef CID_OS_LINUX 28 | #define KFR_OS_LINUX CID_OS_LINUX 29 | #endif 30 | 31 | #ifdef CID_GNU_ATTRIBUTES 32 | #define KFR_GNU_ATTRIBUTES CID_GNU_ATTRIBUTES 33 | #endif 34 | 35 | #ifdef CID_MSVC_ATTRIBUTES 36 | #define KFR_MSVC_ATTRIBUTES CID_MSVC_ATTRIBUTES 37 | #endif 38 | 39 | #ifdef CID_ARCH_X64 40 | #define KFR_ARCH_X64 CID_ARCH_X64 41 | #endif 42 | 43 | #ifdef CID_ARCH_X32 44 | #define KFR_ARCH_X32 CID_ARCH_X32 45 | #endif 46 | 47 | #define KFR_ARCH_NAME CID_ARCH_NAME 48 | 49 | #define KFR_CDECL CID_CDECL 50 | 51 | #define KFR_PUBLIC_C CID_PUBLIC_C 52 | 53 | #ifdef __cplusplus 54 | namespace kfr 55 | { 56 | using ::cid::arraysize; 57 | } 58 | #endif 59 | 60 | #define KFR_VERSION_STRING "0.9.1" 61 | #define KFR_VERSION_MAJOR 0 62 | #define KFR_VERSION_MINOR 9 63 | #define KFR_VERSION_BUILD 1 64 | #define KFR_VERSION 901 65 | 66 | #ifdef __cplusplus 67 | namespace kfr 68 | { 69 | constexpr const char version_string[] = KFR_VERSION_STRING; 70 | constexpr int version_major = KFR_VERSION_MAJOR; 71 | constexpr int version_minor = KFR_VERSION_MINOR; 72 | constexpr int version_build = KFR_VERSION_BUILD; 73 | constexpr int version = KFR_VERSION; 74 | } 75 | #endif 76 | 77 | //#define KFR_MEMORY_ALIGNMENT 64 78 | 79 | #if KFR_COMPILER_CLANG 80 | #define KFR_LOOP_NOUNROLL \ 81 | _Pragma("clang loop vectorize( disable )") _Pragma("clang loop interleave( disable )") \ 82 | _Pragma("clang loop unroll( disable )") 83 | 84 | #define KFR_LOOP_UNROLL _Pragma("clang loop unroll( full )") 85 | 86 | #define KFR_VEC_CC __attribute__((vectorcall)) 87 | #else 88 | #define KFR_LOOP_NOUNROLL 89 | #define KFR_LOOP_UNROLL 90 | #ifdef KFR_COMPILER_MSVC 91 | #define KFR_VEC_CC __vectorcall 92 | #endif 93 | 94 | #endif 95 | 96 | #define KFR_AVAIL_AVX2 1 97 | #define KFR_AVAIL_AVX 1 98 | #define KFR_AVAIL_SSE42 1 99 | #define KFR_AVAIL_SSE41 1 100 | #define KFR_AVAIL_SSSE3 1 101 | #define KFR_AVAIL_SSE3 1 102 | #define KFR_AVAIL_SSE2 1 103 | #define KFR_AVAIL_SSE 1 104 | 105 | #if defined(KFR_GNU_ATTRIBUTES) 106 | 107 | #define KFR_CPU_NAME_avx2 "avx2" 108 | #define KFR_CPU_NAME_avx "avx" 109 | #define KFR_CPU_NAME_sse42 "sse4.2" 110 | #define KFR_CPU_NAME_sse41 "sse4.1" 111 | #define KFR_CPU_NAME_ssse3 "ssse3" 112 | #define KFR_CPU_NAME_sse3 "sse3" 113 | #define KFR_CPU_NAME_sse2 "sse2" 114 | 115 | #define KFR_USE_CPU(arch) __attribute__((target(KFR_CPU_NAME_##arch))) 116 | 117 | #else 118 | #define KFR_USE_CPU(arch) 119 | #endif 120 | 121 | #if defined(KFR_GNU_ATTRIBUTES) 122 | #define KFR_FAST_CC __attribute__((fastcall)) 123 | #else 124 | #define KFR_FAST_CC __fastcall 125 | #endif 126 | 127 | #define KFR_INTRIN CID_INTRIN 128 | #define KFR_SINTRIN CID_INTRIN CID_NODEBUG static 129 | #define KFR_AINTRIN inline CID_NODEBUG static 130 | #define KFR_FAST_NOINLINE CID_NOINLINE 131 | 132 | #define KFR_CPU_INTRIN(c) KFR_AINTRIN KFR_USE_CPU(c) 133 | -------------------------------------------------------------------------------- /include/kfr/base/memory.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/read_write.hpp" 26 | #include "../base/types.hpp" 27 | #include 28 | #include 29 | 30 | namespace kfr 31 | { 32 | 33 | namespace internal 34 | { 35 | 36 | struct memory_statistics 37 | { 38 | std::atomic_uintptr_t allocation_count = ATOMIC_VAR_INIT(0); 39 | std::atomic_uintptr_t allocation_size = ATOMIC_VAR_INIT(0); 40 | std::atomic_uintptr_t deallocation_count = ATOMIC_VAR_INIT(0); 41 | std::atomic_uintptr_t deallocation_size = ATOMIC_VAR_INIT(0); 42 | }; 43 | 44 | inline memory_statistics& get_memory_statistics() 45 | { 46 | static memory_statistics ms; 47 | return ms; 48 | } 49 | 50 | struct mem_header 51 | { 52 | u8 offset; 53 | u8 alignment; 54 | u8 reserved1; 55 | u8 reserved2; 56 | size_t size; 57 | } __attribute__((__packed__)); 58 | 59 | inline mem_header* aligned_header(void* ptr) { return ptr_cast(ptr) - 1; } 60 | 61 | inline size_t aligned_size(void* ptr) { return aligned_header(ptr)->size; } 62 | 63 | inline void* aligned_malloc(size_t size, size_t alignment) 64 | { 65 | get_memory_statistics().allocation_count++; 66 | get_memory_statistics().allocation_size += size; 67 | void* ptr = malloc(size + (alignment - 1) + sizeof(mem_header)); 68 | if (ptr == nullptr) 69 | return nullptr; 70 | void* aligned_ptr = advance(ptr, sizeof(mem_header)); 71 | aligned_ptr = align_up(aligned_ptr, alignment); 72 | aligned_header(aligned_ptr)->alignment = static_cast(alignment > 255 ? 255 : alignment); 73 | aligned_header(aligned_ptr)->offset = static_cast(distance(aligned_ptr, ptr)); 74 | aligned_header(aligned_ptr)->size = size; 75 | return aligned_ptr; 76 | } 77 | inline void aligned_free(void* ptr) 78 | { 79 | get_memory_statistics().deallocation_count++; 80 | get_memory_statistics().deallocation_size += aligned_size(ptr); 81 | free(advance(ptr, -static_cast(aligned_header(ptr)->offset))); 82 | } 83 | } 84 | 85 | template 86 | KFR_INLINE T* aligned_allocate(size_t size = 1) 87 | { 88 | T* ptr = static_cast(__builtin_assume_aligned( 89 | internal::aligned_malloc(std::max(alignment, size * details::elementsize), alignment), alignment)); 90 | return ptr; 91 | } 92 | 93 | template 94 | KFR_INLINE void aligned_deallocate(T* ptr) 95 | { 96 | return internal::aligned_free(ptr); 97 | } 98 | 99 | namespace internal 100 | { 101 | template 102 | struct aligned_deleter 103 | { 104 | KFR_INLINE void operator()(T* ptr) const { aligned_deallocate(ptr); } 105 | }; 106 | } 107 | 108 | template 109 | struct autofree 110 | { 111 | KFR_INLINE autofree() {} 112 | explicit KFR_INLINE autofree(size_t size) : ptr(aligned_allocate(size)) {} 113 | autofree(const autofree&) = delete; 114 | autofree& operator=(const autofree&) = delete; 115 | autofree(autofree&&) noexcept = default; 116 | autofree& operator=(autofree&&) noexcept = default; 117 | KFR_INLINE T& operator[](size_t index) noexcept { return ptr[index]; } 118 | KFR_INLINE const T& operator[](size_t index) const noexcept { return ptr[index]; } 119 | 120 | template 121 | KFR_INLINE U* data() noexcept 122 | { 123 | return ptr_cast(ptr.get()); 124 | } 125 | template 126 | KFR_INLINE const U* data() const noexcept 127 | { 128 | return ptr_cast(ptr.get()); 129 | } 130 | 131 | std::unique_ptr> ptr; 132 | }; 133 | 134 | template 135 | struct allocator 136 | { 137 | using value_type = T; 138 | using pointer = T*; 139 | using const_pointer = const T*; 140 | using reference = T&; 141 | using const_reference = const T&; 142 | using size_type = std::size_t; 143 | using difference_type = std::ptrdiff_t; 144 | 145 | template 146 | struct rebind 147 | { 148 | using other = allocator; 149 | }; 150 | constexpr allocator() noexcept = default; 151 | constexpr allocator(const allocator&) noexcept = default; 152 | template 153 | constexpr allocator(const allocator&) noexcept 154 | { 155 | } 156 | pointer address(reference x) const noexcept { return std::addressof(x); } 157 | const_pointer address(const_reference x) const noexcept { return std::addressof(x); } 158 | pointer allocate(size_type n, std::allocator::const_pointer = 0) const 159 | { 160 | pointer result = aligned_allocate(n); 161 | if (!result) 162 | CID_THROW(std::bad_alloc()); 163 | return result; 164 | } 165 | void deallocate(pointer p, size_type) { aligned_deallocate(p); } 166 | size_type max_size() const { return std::numeric_limits::max() / sizeof(value_type); } 167 | template 168 | void construct(U* p, Args&&... args) 169 | { 170 | ::new (pvoid(p)) U(std::forward(args)...); 171 | } 172 | template 173 | void destroy(U* p) 174 | { 175 | p->~U(); 176 | } 177 | }; 178 | 179 | template 180 | constexpr inline bool operator==(const allocator&, const allocator&) noexcept 181 | { 182 | return true; 183 | } 184 | template 185 | constexpr inline bool operator!=(const allocator&, const allocator&) noexcept 186 | { 187 | return false; 188 | } 189 | 190 | struct aligned_new 191 | { 192 | inline static void* operator new(size_t size) { return aligned_allocate(size); } 193 | inline static void operator delete(void* ptr) { return aligned_deallocate(ptr); } 194 | }; 195 | 196 | #define KFR_CLASS_REFCOUNT(cl) \ 197 | public: \ 198 | void addref() const { m_refcount++; } \ 199 | void release() const \ 200 | { \ 201 | if (--m_refcount == 0) \ 202 | { \ 203 | delete this; \ 204 | } \ 205 | } \ 206 | \ 207 | private: \ 208 | mutable std::atomic_uintptr_t m_refcount = ATOMIC_VAR_INIT(0); 209 | } 210 | -------------------------------------------------------------------------------- /include/kfr/base/read_write.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "shuffle.hpp" 26 | #include "types.hpp" 27 | #include "vec.hpp" 28 | 29 | namespace kfr 30 | { 31 | 32 | template 33 | KFR_INLINE vec read(const T* src) 34 | { 35 | return internal_read_write::read(src); 36 | } 37 | 38 | template 39 | KFR_INLINE void write(T* dest, vec value) 40 | { 41 | internal_read_write::write(dest, value); 42 | } 43 | 44 | template 45 | KFR_INLINE vec gather(const T* base, size_t index, Indices... indices) 46 | { 47 | return make_vector(base[index], base[indices]...); 48 | } 49 | 50 | template 51 | KFR_INLINE vec gather(const T* base) 52 | { 53 | return make_vector(base[Index], base[Indices]...); 54 | } 55 | 56 | template 57 | KFR_INLINE void scatter(const T* base, vec value) 58 | { 59 | base[Index] = value[InIndex]; 60 | scatter(base, value); 61 | } 62 | 63 | namespace internal 64 | { 65 | template 66 | KFR_INLINE vec gather(const T* base, vec indices, csizes_t) 67 | { 68 | return make_vector(base[indices[Indices]]...); 69 | } 70 | template 71 | KFR_INLINE vec gather_stride(const T* base, csizes_t) 72 | { 73 | return make_vector(base[Indices * Stride]...); 74 | } 75 | template 76 | KFR_INLINE vec gather_stride_s(const T* base, size_t stride, csizes_t) 77 | { 78 | return make_vector(base[Indices * stride]...); 79 | } 80 | } 81 | 82 | template 83 | KFR_INLINE vec gather(const T* base, vec indices) 84 | { 85 | return internal::gather(base, indices, csizeseq); 86 | } 87 | 88 | template 89 | KFR_INLINE vec gather_stride(const T* base, size_t stride) 90 | { 91 | return internal::gather_stride_s(base, stride, csizeseq); 92 | } 93 | 94 | template 95 | KFR_INLINE vec gather_stride(const T* base) 96 | { 97 | return internal::gather_stride(base, csizeseq); 98 | } 99 | 100 | template 101 | KFR_INLINE vec gather_helper(const T* base, vec offset, csizes_t) 102 | { 103 | return concat(read(base + groupsize * (*offset)[Indices])...); 104 | } 105 | template 106 | KFR_INLINE vec gather(const T* base, vec offset) 107 | { 108 | return gather_helper(base, offset, csizeseq); 109 | } 110 | 111 | template 112 | KFR_INLINE void scatter_helper(T* base, vec offset, vec value, csizes_t) 113 | { 114 | swallow{ (write(base + groupsize * (*offset)[Indices], slice(value)), 115 | 0)... }; 116 | } 117 | template 118 | KFR_INLINE void scatter(T* base, vec offset, vec value) 119 | { 120 | return scatter_helper(base, offset, value, csizeseq); 121 | } 122 | 123 | template 124 | constexpr T partial_masks[] = { internal::allones, 125 | internal::allones, 126 | internal::allones, 127 | internal::allones, 128 | internal::allones, 129 | internal::allones, 130 | internal::allones, 131 | internal::allones, 132 | internal::allones, 133 | internal::allones, 134 | internal::allones, 135 | internal::allones, 136 | internal::allones, 137 | internal::allones, 138 | internal::allones, 139 | internal::allones, 140 | internal::allones, 141 | internal::allones, 142 | internal::allones, 143 | internal::allones, 144 | internal::allones, 145 | internal::allones, 146 | internal::allones, 147 | internal::allones, 148 | internal::allones, 149 | internal::allones, 150 | internal::allones, 151 | internal::allones, 152 | internal::allones, 153 | internal::allones, 154 | internal::allones, 155 | internal::allones, 156 | T(), 157 | T(), 158 | T(), 159 | T(), 160 | T(), 161 | T(), 162 | T(), 163 | T(), 164 | T(), 165 | T(), 166 | T(), 167 | T(), 168 | T(), 169 | T(), 170 | T(), 171 | T(), 172 | T(), 173 | T(), 174 | T(), 175 | T(), 176 | T(), 177 | T(), 178 | T(), 179 | T(), 180 | T(), 181 | T(), 182 | T(), 183 | T(), 184 | T(), 185 | T(), 186 | T(), 187 | T() }; 188 | 189 | template 190 | KFR_INLINE vec partial_mask(size_t index) 191 | { 192 | static_assert(N <= arraysize(partial_masks) / 2, 193 | "N must not be greater than half of partial_masks expression_array"); 194 | return read(&partial_masks[0] + arraysize(partial_masks) / 2 - index); 195 | } 196 | template 197 | KFR_INLINE vec partial_mask(size_t index, vec_t) 198 | { 199 | return partial_mask(index); 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /include/kfr/base/round.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "function.hpp" 26 | #include "operators.hpp" 27 | 28 | namespace kfr 29 | { 30 | 31 | #define KFR_mm_trunc_ps(V) _mm_round_ps((V), _MM_FROUND_TRUNC) 32 | #define KFR_mm_roundnearest_ps(V) _mm_round_ps((V), _MM_FROUND_NINT) 33 | #define KFR_mm_trunc_pd(V) _mm_round_pd((V), _MM_FROUND_TRUNC) 34 | #define KFR_mm_roundnearest_pd(V) _mm_round_pd((V), _MM_FROUND_NINT) 35 | 36 | #define KFR_mm_trunc_ss(V) _mm_round_ss(_mm_setzero_ps(), (V), _MM_FROUND_TRUNC) 37 | #define KFR_mm_roundnearest_ss(V) _mm_round_ss(_mm_setzero_ps(), (V), _MM_FROUND_NINT) 38 | #define KFR_mm_trunc_sd(V) _mm_round_sd(_mm_setzero_pd(), (V), _MM_FROUND_TRUNC) 39 | #define KFR_mm_roundnearest_sd(V) _mm_round_sd(_mm_setzero_pd(), (V), _MM_FROUND_NINT) 40 | 41 | #define KFR_mm_floor_ss(V) _mm_floor_ss(_mm_setzero_ps(), (V)) 42 | #define KFR_mm_floor_sd(V) _mm_floor_sd(_mm_setzero_pd(), (V)) 43 | #define KFR_mm_ceil_ss(V) _mm_ceil_ss(_mm_setzero_ps(), (V)) 44 | #define KFR_mm_ceil_sd(V) _mm_ceil_sd(_mm_setzero_pd(), (V)) 45 | 46 | #define KFR_mm256_trunc_ps(V) _mm256_round_ps((V), _MM_FROUND_TRUNC) 47 | #define KFR_mm256_roundnearest_ps(V) _mm256_round_ps((V), _MM_FROUND_NINT) 48 | #define KFR_mm256_trunc_pd(V) _mm256_round_pd((V), _MM_FROUND_TRUNC) 49 | #define KFR_mm256_roundnearest_pd(V) _mm256_round_pd((V), _MM_FROUND_NINT) 50 | 51 | namespace internal 52 | { 53 | 54 | template 55 | struct in_round : in_round 56 | { 57 | struct fn_floor : in_round::fn_floor, fn_disabled 58 | { 59 | }; 60 | struct fn_ceil : in_round::fn_ceil, fn_disabled 61 | { 62 | }; 63 | struct fn_round : in_round::fn_round, fn_disabled 64 | { 65 | }; 66 | struct fn_trunc : in_round::fn_trunc, fn_disabled 67 | { 68 | }; 69 | struct fn_fract : in_round::fn_fract, fn_disabled 70 | { 71 | }; 72 | }; 73 | 74 | template <> 75 | struct in_round 76 | { 77 | constexpr static cpu_t cpu = cpu_t::sse2; 78 | 79 | template ::value)> 80 | KFR_SINTRIN vec floor(vec value) 81 | { 82 | return value; 83 | } 84 | template ::value)> 85 | KFR_SINTRIN vec ceil(vec value) 86 | { 87 | return value; 88 | } 89 | template ::value)> 90 | KFR_SINTRIN vec trunc(vec value) 91 | { 92 | return value; 93 | } 94 | template ::value)> 95 | KFR_SINTRIN vec round(vec value) 96 | { 97 | return value; 98 | } 99 | template ::value)> 100 | KFR_SINTRIN vec fract(vec) 101 | { 102 | return T(); 103 | } 104 | 105 | KFR_SINTRIN f32sse floor(f32sse x) 106 | { 107 | f32sse t = cast(cast(x)); 108 | return t - (bitcast(x < t) & 1.f); 109 | } 110 | KFR_SINTRIN f64sse floor(f64sse x) 111 | { 112 | f64sse t = cast(cast(x)); 113 | return t - (bitcast(x < t) & 1.0); 114 | } 115 | KFR_SINTRIN f32sse ceil(f32sse x) 116 | { 117 | f32sse t = cast(cast(x)); 118 | return t + (bitcast(x > t) & 1.f); 119 | } 120 | KFR_SINTRIN f64sse ceil(f64sse x) 121 | { 122 | f64sse t = cast(cast(x)); 123 | return t + (bitcast(x > t) & 1.0); 124 | } 125 | KFR_SINTRIN f32sse round(f32sse x) { return cast(cast(x + mulsign(f32x4(0.5f), x))); } 126 | KFR_SINTRIN f64sse round(f64sse x) { return cast(cast(x + mulsign(f64x2(0.5), x))); } 127 | KFR_SINTRIN f32sse trunc(f32sse x) { return cast(cast(x)); } 128 | KFR_SINTRIN f64sse trunc(f64sse x) { return cast(cast(x)); } 129 | KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); } 130 | KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); } 131 | 132 | KFR_HANDLE_ALL(floor) 133 | KFR_HANDLE_ALL(ceil) 134 | KFR_HANDLE_ALL(round) 135 | KFR_HANDLE_ALL(trunc) 136 | KFR_HANDLE_ALL(fract) 137 | KFR_HANDLE_SCALAR(floor) 138 | KFR_HANDLE_SCALAR(ceil) 139 | KFR_HANDLE_SCALAR(round) 140 | KFR_HANDLE_SCALAR(trunc) 141 | KFR_HANDLE_SCALAR(fract) 142 | KFR_SPEC_FN(in_round, floor) 143 | KFR_SPEC_FN(in_round, ceil) 144 | KFR_SPEC_FN(in_round, round) 145 | KFR_SPEC_FN(in_round, trunc) 146 | KFR_SPEC_FN(in_round, fract) 147 | }; 148 | 149 | template <> 150 | struct in_round : in_round 151 | { 152 | constexpr static cpu_t cpu = cpu_t::sse41; 153 | 154 | KFR_SINTRIN f32sse floor(f32sse value) { return _mm_floor_ps(*value); } 155 | KFR_SINTRIN f32sse ceil(f32sse value) { return _mm_ceil_ps(*value); } 156 | KFR_SINTRIN f32sse trunc(f32sse value) { return KFR_mm_trunc_ps(*value); } 157 | KFR_SINTRIN f32sse round(f32sse value) { return KFR_mm_roundnearest_ps(*value); } 158 | KFR_SINTRIN f64sse floor(f64sse value) { return _mm_floor_pd(*value); } 159 | KFR_SINTRIN f64sse ceil(f64sse value) { return _mm_ceil_pd(*value); } 160 | KFR_SINTRIN f64sse trunc(f64sse value) { return KFR_mm_trunc_pd(*value); } 161 | KFR_SINTRIN f64sse round(f64sse value) { return KFR_mm_roundnearest_pd(*value); } 162 | KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); } 163 | KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); } 164 | 165 | KFR_HANDLE_ALL(floor) 166 | KFR_HANDLE_ALL(ceil) 167 | KFR_HANDLE_ALL(round) 168 | KFR_HANDLE_ALL(trunc) 169 | KFR_HANDLE_ALL(fract) 170 | KFR_HANDLE_SCALAR(floor) 171 | KFR_HANDLE_SCALAR(ceil) 172 | KFR_HANDLE_SCALAR(round) 173 | KFR_HANDLE_SCALAR(trunc) 174 | KFR_HANDLE_SCALAR(fract) 175 | KFR_SPEC_FN(in_round, floor) 176 | KFR_SPEC_FN(in_round, ceil) 177 | KFR_SPEC_FN(in_round, round) 178 | KFR_SPEC_FN(in_round, trunc) 179 | KFR_SPEC_FN(in_round, fract) 180 | }; 181 | 182 | template <> 183 | struct in_round : in_round 184 | { 185 | constexpr static cpu_t cpu = cpu_t::avx1; 186 | using in_round::floor; 187 | using in_round::ceil; 188 | using in_round::trunc; 189 | using in_round::round; 190 | using in_round::fract; 191 | 192 | KFR_SINTRIN f32avx floor(f32avx value) { return _mm256_floor_ps(*value); } 193 | KFR_SINTRIN f32avx ceil(f32avx value) { return _mm256_ceil_ps(*value); } 194 | KFR_SINTRIN f32avx trunc(f32avx value) { return KFR_mm256_trunc_ps(*value); } 195 | KFR_SINTRIN f32avx round(f32avx value) { return KFR_mm256_roundnearest_ps(*value); } 196 | KFR_SINTRIN f64avx floor(f64avx value) { return _mm256_floor_pd(*value); } 197 | KFR_SINTRIN f64avx ceil(f64avx value) { return _mm256_ceil_pd(*value); } 198 | KFR_SINTRIN f64avx trunc(f64avx value) { return KFR_mm256_trunc_pd(*value); } 199 | KFR_SINTRIN f64avx round(f64avx value) { return KFR_mm256_roundnearest_pd(*value); } 200 | KFR_SINTRIN f32avx fract(f32avx x) { return x - floor(x); } 201 | KFR_SINTRIN f64avx fract(f64avx x) { return x - floor(x); } 202 | 203 | KFR_HANDLE_ALL(floor) 204 | KFR_HANDLE_ALL(ceil) 205 | KFR_HANDLE_ALL(round) 206 | KFR_HANDLE_ALL(trunc) 207 | KFR_HANDLE_ALL(fract) 208 | KFR_HANDLE_SCALAR(floor) 209 | KFR_HANDLE_SCALAR(ceil) 210 | KFR_HANDLE_SCALAR(round) 211 | KFR_HANDLE_SCALAR(trunc) 212 | KFR_HANDLE_SCALAR(fract) 213 | KFR_SPEC_FN(in_round, floor) 214 | KFR_SPEC_FN(in_round, ceil) 215 | KFR_SPEC_FN(in_round, round) 216 | KFR_SPEC_FN(in_round, trunc) 217 | KFR_SPEC_FN(in_round, fract) 218 | }; 219 | 220 | #undef KFR_mm_trunc_ps 221 | #undef KFR_mm_roundnearest_ps 222 | #undef KFR_mm_trunc_pd 223 | #undef KFR_mm_roundnearest_pd 224 | #undef KFR_mm_trunc_ss 225 | #undef KFR_mm_roundnearest_ss 226 | #undef KFR_mm_trunc_sd 227 | #undef KFR_mm_roundnearest_sd 228 | #undef KFR_mm_floor_ss 229 | #undef KFR_mm_floor_sd 230 | #undef KFR_mm_ceil_ss 231 | #undef KFR_mm_ceil_sd 232 | #undef KFR_mm256_trunc_ps 233 | #undef KFR_mm256_roundnearest_ps 234 | #undef KFR_mm256_trunc_pd 235 | #undef KFR_mm256_roundnearest_pd 236 | } 237 | 238 | namespace native 239 | { 240 | using fn_floor = internal::in_round<>::fn_floor; 241 | template ::value)> 242 | KFR_INTRIN ftype floor(const T1& x) 243 | { 244 | return internal::in_round<>::floor(x); 245 | } 246 | 247 | template ::value)> 248 | KFR_INTRIN expr_func floor(E1&& x) 249 | { 250 | return { fn_floor(), std::forward(x) }; 251 | } 252 | 253 | using fn_ceil = internal::in_round<>::fn_ceil; 254 | template ::value)> 255 | KFR_INTRIN ftype ceil(const T1& x) 256 | { 257 | return internal::in_round<>::ceil(x); 258 | } 259 | 260 | template ::value)> 261 | KFR_INTRIN expr_func ceil(E1&& x) 262 | { 263 | return { fn_ceil(), std::forward(x) }; 264 | } 265 | 266 | using fn_round = internal::in_round<>::fn_round; 267 | template ::value)> 268 | KFR_INTRIN ftype round(const T1& x) 269 | { 270 | return internal::in_round<>::round(x); 271 | } 272 | 273 | template ::value)> 274 | KFR_INTRIN expr_func round(E1&& x) 275 | { 276 | return { fn_round(), std::forward(x) }; 277 | } 278 | 279 | using fn_trunc = internal::in_round<>::fn_trunc; 280 | template ::value)> 281 | KFR_INTRIN ftype trunc(const T1& x) 282 | { 283 | return internal::in_round<>::trunc(x); 284 | } 285 | 286 | template ::value)> 287 | KFR_INTRIN expr_func trunc(E1&& x) 288 | { 289 | return { fn_trunc(), std::forward(x) }; 290 | } 291 | 292 | using fn_fract = internal::in_round<>::fn_fract; 293 | template ::value)> 294 | KFR_INTRIN ftype fract(const T1& x) 295 | { 296 | return internal::in_round<>::fract(x); 297 | } 298 | 299 | template ::value)> 300 | KFR_INTRIN expr_func fract(E1&& x) 301 | { 302 | return { fn_fract(), std::forward(x) }; 303 | } 304 | } 305 | } 306 | -------------------------------------------------------------------------------- /include/kfr/base/saturation.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "function.hpp" 26 | #include "select.hpp" 27 | 28 | #pragma clang diagnostic push 29 | #if CID_HAS_WARNING("-Winaccessible-base") 30 | #pragma clang diagnostic ignored "-Winaccessible-base" 31 | #endif 32 | 33 | namespace kfr 34 | { 35 | 36 | namespace internal 37 | { 38 | 39 | template 40 | struct in_saturated : in_saturated 41 | { 42 | struct fn_satadd : in_saturated::fn_satadd, fn_disabled 43 | { 44 | }; 45 | }; 46 | 47 | template 48 | struct in_saturated : in_select 49 | { 50 | constexpr static cpu_t cpu = cpu_t::sse2; 51 | 52 | private: 53 | using in_select::select; 54 | 55 | public: 56 | KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); } 57 | KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); } 58 | KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); } 59 | KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); } 60 | 61 | KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); } 62 | KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); } 63 | KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); } 64 | KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); } 65 | 66 | KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); } 67 | KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); } 68 | KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); } 69 | KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); } 70 | 71 | KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); } 72 | KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); } 73 | KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); } 74 | KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); } 75 | 76 | private: 77 | template 78 | KFR_SINTRIN vec saturated_signed_add(vec a, vec b) 79 | { 80 | constexpr size_t shift = typebits::bits - 1; 81 | const vec sum = a + b; 82 | a = (a >> shift) + allonesvector(a); 83 | 84 | return select(((a ^ b) | ~(b ^ sum)) >= 0, a, sum); 85 | } 86 | template 87 | KFR_SINTRIN vec saturated_signed_sub(vec a, vec b) 88 | { 89 | constexpr size_t shift = typebits::bits - 1; 90 | const vec diff = a - b; 91 | a = (a >> shift) + allonesvector(a); 92 | 93 | return select(((a ^ b) & (a ^ diff)) < 0, a, diff); 94 | } 95 | template 96 | KFR_SINTRIN vec saturated_unsigned_add(vec a, vec b) 97 | { 98 | constexpr vec t = allonesvector(a); 99 | return select(a > t - b, t, a + b); 100 | } 101 | template 102 | KFR_SINTRIN vec saturated_unsigned_sub(vec a, vec b) 103 | { 104 | return select(a < b, zerovector(a), a - b); 105 | } 106 | 107 | public: 108 | KFR_HANDLE_ALL(satadd) 109 | KFR_HANDLE_ALL(satsub) 110 | KFR_SPEC_FN(in_saturated, satadd) 111 | KFR_SPEC_FN(in_saturated, satsub) 112 | }; 113 | 114 | template 115 | struct in_saturated : in_saturated 116 | { 117 | constexpr static cpu_t cpu = cpu_t::avx2; 118 | using in_saturated::satadd; 119 | using in_saturated::satsub; 120 | 121 | KFR_SINTRIN u8avx satadd(u8avx x, u8avx y) { return _mm256_adds_epu8(*x, *y); } 122 | KFR_SINTRIN i8avx satadd(i8avx x, i8avx y) { return _mm256_adds_epi8(*x, *y); } 123 | KFR_SINTRIN u16avx satadd(u16avx x, u16avx y) { return _mm256_adds_epu16(*x, *y); } 124 | KFR_SINTRIN i16avx satadd(i16avx x, i16avx y) { return _mm256_adds_epi16(*x, *y); } 125 | 126 | KFR_SINTRIN u8avx satsub(u8avx x, u8avx y) { return _mm256_subs_epu8(*x, *y); } 127 | KFR_SINTRIN i8avx satsub(i8avx x, i8avx y) { return _mm256_subs_epi8(*x, *y); } 128 | KFR_SINTRIN u16avx satsub(u16avx x, u16avx y) { return _mm256_subs_epu16(*x, *y); } 129 | KFR_SINTRIN i16avx satsub(i16avx x, i16avx y) { return _mm256_subs_epi16(*x, *y); } 130 | 131 | KFR_HANDLE_ALL(satadd) 132 | KFR_HANDLE_ALL(satsub) 133 | KFR_SPEC_FN(in_saturated, satadd) 134 | KFR_SPEC_FN(in_saturated, satsub) 135 | }; 136 | } 137 | namespace native 138 | { 139 | using fn_satadd = internal::in_saturated<>::fn_satadd; 140 | template ::value)> 141 | KFR_INLINE ftype> satadd(const T1& x, const T2& y) 142 | { 143 | return internal::in_saturated<>::satadd(x, y); 144 | } 145 | 146 | template ::value)> 147 | KFR_INLINE expr_func satadd(E1&& x, E2&& y) 148 | { 149 | return { fn_satadd(), std::forward(x), std::forward(y) }; 150 | } 151 | using fn_satsub = internal::in_saturated<>::fn_satsub; 152 | template ::value)> 153 | KFR_INLINE ftype> satsub(const T1& x, const T2& y) 154 | { 155 | return internal::in_saturated<>::satsub(x, y); 156 | } 157 | 158 | template ::value)> 159 | KFR_INLINE expr_func satsub(E1&& x, E2&& y) 160 | { 161 | return { fn_satsub(), std::forward(x), std::forward(y) }; 162 | } 163 | } 164 | } 165 | 166 | #pragma clang diagnostic pop 167 | -------------------------------------------------------------------------------- /include/kfr/base/select.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "function.hpp" 26 | 27 | namespace kfr 28 | { 29 | namespace internal 30 | { 31 | 32 | template 33 | struct in_select_impl : in_select_impl 34 | { 35 | struct fn_select : fn_disabled 36 | { 37 | }; 38 | }; 39 | 40 | template <> 41 | struct in_select_impl 42 | { 43 | constexpr static cpu_t cur = cpu_t::sse2; 44 | 45 | template 46 | KFR_SINTRIN vec select(vec m, vec x, vec y) 47 | { 48 | return y ^ ((x ^ y) & m); 49 | } 50 | KFR_SPEC_FN(in_select_impl, select) 51 | }; 52 | 53 | template <> 54 | struct in_select_impl : in_select_impl 55 | { 56 | constexpr static cpu_t cpu = cpu_t::sse41; 57 | 58 | KFR_CPU_INTRIN(sse41) u8sse select(u8sse m, u8sse x, u8sse y) { return _mm_blendv_epi8(*y, *x, *m); } 59 | KFR_CPU_INTRIN(sse41) u16sse select(u16sse m, u16sse x, u16sse y) { return _mm_blendv_epi8(*y, *x, *m); } 60 | KFR_CPU_INTRIN(sse41) u32sse select(u32sse m, u32sse x, u32sse y) { return _mm_blendv_epi8(*y, *x, *m); } 61 | KFR_CPU_INTRIN(sse41) u64sse select(u64sse m, u64sse x, u64sse y) { return _mm_blendv_epi8(*y, *x, *m); } 62 | KFR_CPU_INTRIN(sse41) i8sse select(i8sse m, i8sse x, i8sse y) { return _mm_blendv_epi8(*y, *x, *m); } 63 | KFR_CPU_INTRIN(sse41) i16sse select(i16sse m, i16sse x, i16sse y) { return _mm_blendv_epi8(*y, *x, *m); } 64 | KFR_CPU_INTRIN(sse41) i32sse select(i32sse m, i32sse x, i32sse y) { return _mm_blendv_epi8(*y, *x, *m); } 65 | KFR_CPU_INTRIN(sse41) i64sse select(i64sse m, i64sse x, i64sse y) { return _mm_blendv_epi8(*y, *x, *m); } 66 | KFR_CPU_INTRIN(sse41) f32sse select(f32sse m, f32sse x, f32sse y) { return _mm_blendv_ps(*y, *x, *m); } 67 | KFR_CPU_INTRIN(sse41) f64sse select(f64sse m, f64sse x, f64sse y) { return _mm_blendv_pd(*y, *x, *m); } 68 | 69 | KFR_HANDLE_ALL(select) 70 | KFR_SPEC_FN(in_select_impl, select) 71 | }; 72 | 73 | template <> 74 | struct in_select_impl : in_select_impl 75 | { 76 | constexpr static cpu_t cpu = cpu_t::avx1; 77 | using in_select_impl::select; 78 | 79 | KFR_CPU_INTRIN(avx) f64avx select(f64avx m, f64avx x, f64avx y) { return _mm256_blendv_pd(*y, *x, *m); } 80 | KFR_CPU_INTRIN(avx) f32avx select(f32avx m, f32avx x, f32avx y) { return _mm256_blendv_ps(*y, *x, *m); } 81 | 82 | KFR_HANDLE_ALL(select) 83 | KFR_SPEC_FN(in_select_impl, select) 84 | }; 85 | 86 | template <> 87 | struct in_select_impl : in_select_impl 88 | { 89 | constexpr static cpu_t cpu = cpu_t::avx2; 90 | using in_select_impl::select; 91 | 92 | KFR_CPU_INTRIN(avx2) u8avx select(u8avx m, u8avx x, u8avx y) { return _mm256_blendv_epi8(*y, *x, *m); } 93 | KFR_CPU_INTRIN(avx2) u16avx select(u16avx m, u16avx x, u16avx y) 94 | { 95 | return _mm256_blendv_epi8(*y, *x, *m); 96 | } 97 | KFR_CPU_INTRIN(avx2) u32avx select(u32avx m, u32avx x, u32avx y) 98 | { 99 | return _mm256_blendv_epi8(*y, *x, *m); 100 | } 101 | KFR_CPU_INTRIN(avx2) u64avx select(u64avx m, u64avx x, u64avx y) 102 | { 103 | return _mm256_blendv_epi8(*y, *x, *m); 104 | } 105 | KFR_CPU_INTRIN(avx2) i8avx select(i8avx m, i8avx x, i8avx y) { return _mm256_blendv_epi8(*y, *x, *m); } 106 | KFR_CPU_INTRIN(avx2) i16avx select(i16avx m, i16avx x, i16avx y) 107 | { 108 | return _mm256_blendv_epi8(*y, *x, *m); 109 | } 110 | KFR_CPU_INTRIN(avx2) i32avx select(i32avx m, i32avx x, i32avx y) 111 | { 112 | return _mm256_blendv_epi8(*y, *x, *m); 113 | } 114 | KFR_CPU_INTRIN(avx2) i64avx select(i64avx m, i64avx x, i64avx y) 115 | { 116 | return _mm256_blendv_epi8(*y, *x, *m); 117 | } 118 | 119 | KFR_HANDLE_ALL(select) 120 | KFR_SPEC_FN(in_select_impl, select) 121 | }; 122 | 123 | template 124 | struct in_select : in_select_impl 125 | { 126 | using in_select_impl::select; 127 | 128 | template 129 | KFR_SINTRIN vec select(mask m, vec x, vec y) 130 | { 131 | static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); 132 | return in_select_impl::select(bitcast(m), x, y); 133 | } 134 | template 135 | KFR_SINTRIN vec select(mask m, mask x, mask y) 136 | { 137 | static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); 138 | return in_select_impl::select(bitcast(m), ref_cast>(x), ref_cast>(y)); 139 | } 140 | 141 | template 142 | KFR_SINTRIN vec select(mask m, T x, T y) 143 | { 144 | static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); 145 | return in_select_impl::select(bitcast(m), broadcast(x), broadcast(y)); 146 | } 147 | 148 | template 149 | KFR_SINTRIN vec select(mask m, vec x, T y) 150 | { 151 | static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); 152 | return in_select_impl::select(bitcast(m), x, broadcast(y)); 153 | } 154 | 155 | template 156 | KFR_SINTRIN vec select(mask m, T x, vec y) 157 | { 158 | static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); 159 | return in_select_impl::select(bitcast(m), broadcast(x), y); 160 | } 161 | template 162 | KFR_SINTRIN vec select(mask m, mask x, T y) 163 | { 164 | static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); 165 | return in_select_impl::select(bitcast(m), ref_cast>(x), broadcast(y)); 166 | } 167 | 168 | template 169 | KFR_SINTRIN vec select(mask m, T x, mask y) 170 | { 171 | static_assert(sizeof(M) == sizeof(T), "select: Incompatible types"); 172 | return in_select_impl::select(m, broadcast(x), ref_cast>(y)); 173 | } 174 | KFR_SPEC_FN(in_select, select) 175 | 176 | template 177 | KFR_SINTRIN vec sign(vec x) 178 | { 179 | return select(x > T(), T(1), select(x < T(), T(-1), T(0))); 180 | } 181 | }; 182 | } 183 | 184 | namespace native 185 | { 186 | using fn_select = internal::in_select<>::fn_select; 187 | template ::value)> 188 | KFR_INLINE ftype> select(const T1& arg1, const T2& arg2, const T3& arg3) 189 | { 190 | return internal::in_select<>::select(arg1, arg2, arg3); 191 | } 192 | template ::value)> 193 | KFR_INLINE expr_func select(E1&& arg1, E2&& arg2, E3&& arg3) 194 | { 195 | return { fn_select(), std::forward(arg1), std::forward(arg2), std::forward(arg3) }; 196 | } 197 | } 198 | } 199 | -------------------------------------------------------------------------------- /include/kfr/base/sinh_cosh.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | #include "abs.hpp" 25 | #include "constants.hpp" 26 | #include "function.hpp" 27 | #include "log_exp.hpp" 28 | #include "min_max.hpp" 29 | #include "operators.hpp" 30 | #include "select.hpp" 31 | 32 | namespace kfr 33 | { 34 | 35 | namespace internal 36 | { 37 | 38 | template 39 | struct in_sinh_cosh : in_log_exp 40 | { 41 | constexpr static cpu_t cur = c; 42 | 43 | private: 44 | using in_log_exp::exp; 45 | 46 | public: 47 | template 48 | KFR_SINTRIN vec sinh(vec x) 49 | { 50 | return (exp(x) - exp(-x)) * T(0.5); 51 | } 52 | 53 | template 54 | KFR_SINTRIN vec cosh(vec x) 55 | { 56 | return (exp(x) + exp(-x)) * T(0.5); 57 | } 58 | 59 | template 1)> 60 | KFR_SINTRIN vec sinhcosh(vec x) 61 | { 62 | const vec a = exp(x); 63 | const vec b = exp(-x); 64 | return subadd(a, b) * T(0.5); 65 | } 66 | 67 | template 1)> 68 | KFR_SINTRIN vec coshsinh(vec x) 69 | { 70 | const vec a = exp(x); 71 | const vec b = exp(-x); 72 | return addsub(a, b) * T(0.5); 73 | } 74 | KFR_SPEC_FN(in_sinh_cosh, sinh) 75 | KFR_SPEC_FN(in_sinh_cosh, cosh) 76 | KFR_SPEC_FN(in_sinh_cosh, sinhcosh) 77 | KFR_SPEC_FN(in_sinh_cosh, coshsinh) 78 | }; 79 | } 80 | 81 | namespace native 82 | { 83 | using fn_sinh = internal::in_sinh_cosh<>::fn_sinh; 84 | template ::value)> 85 | 86 | KFR_INTRIN ftype sinh(const T1& x) 87 | { 88 | return internal::in_sinh_cosh<>::sinh(x); 89 | } 90 | 91 | template ::value)> 92 | 93 | KFR_INTRIN expr_func sinh(E1&& x) 94 | { 95 | return { fn_sinh(), std::forward(x) }; 96 | } 97 | 98 | using fn_cosh = internal::in_sinh_cosh<>::fn_cosh; 99 | template ::value)> 100 | 101 | KFR_INTRIN ftype cosh(const T1& x) 102 | { 103 | return internal::in_sinh_cosh<>::cosh(x); 104 | } 105 | 106 | template ::value)> 107 | 108 | KFR_INTRIN expr_func cosh(E1&& x) 109 | { 110 | return { fn_cosh(), std::forward(x) }; 111 | } 112 | 113 | using fn_sinhcosh = internal::in_sinh_cosh<>::fn_sinhcosh; 114 | template ::value)> 115 | 116 | KFR_INTRIN ftype sinhcosh(const T1& x) 117 | { 118 | return internal::in_sinh_cosh<>::sinhcosh(x); 119 | } 120 | 121 | template ::value)> 122 | 123 | KFR_INTRIN expr_func sinhcosh(E1&& x) 124 | { 125 | return { fn_sinhcosh(), std::forward(x) }; 126 | } 127 | 128 | using fn_coshsinh = internal::in_sinh_cosh<>::fn_coshsinh; 129 | template ::value)> 130 | 131 | KFR_INTRIN ftype coshsinh(const T1& x) 132 | { 133 | return internal::in_sinh_cosh<>::coshsinh(x); 134 | } 135 | 136 | template ::value)> 137 | 138 | KFR_INTRIN expr_func coshsinh(E1&& x) 139 | { 140 | return { fn_coshsinh(), std::forward(x) }; 141 | } 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /include/kfr/base/specializations.i: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | */ 18 | #pragma once 19 | 20 | #include "vec.hpp" 21 | #ifndef KFR_SHUFFLE_SPECIALIZATIONS 22 | #include "shuffle.hpp" 23 | #endif 24 | 25 | namespace kfr 26 | { 27 | namespace internal 28 | { 29 | template <> 30 | inline vec shufflevector( 31 | csizes_t<0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 32 | 15, 22, 23, 30, 31>, 33 | vec x, vec) 34 | { 35 | f32x32 w = x; 36 | 37 | w = concat(permute<0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15>(low(w)), 38 | permute<0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15>(high(w))); 39 | 40 | w = permutegroups<(4), 0, 4, 2, 6, 1, 5, 3, 7>(w); // avx: vperm2f128 & vinsertf128, sse: no-op 41 | return w; 42 | } 43 | 44 | template <> 45 | inline vec shufflevector( 46 | csizes_t<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18, 19, 10, 11, 26, 27, 6, 7, 22, 47 | 23, 14, 15, 30, 31>, 48 | vec x, vec) 49 | { 50 | f32x32 w = x; 51 | 52 | w = concat(permute<0, 1, 8, 9, 4, 5, 12, 13, /**/ 2, 3, 10, 11, 6, 7, 14, 15>(even<8>(w)), 53 | permute<0, 1, 8, 9, 4, 5, 12, 13, /**/ 2, 3, 10, 11, 6, 7, 14, 15>(odd<8>(w))); 54 | 55 | w = permutegroups<(4), 0, 4, 1, 5, 2, 6, 3, 7>(w); // avx: vperm2f128 & vinsertf128, sse: no-op 56 | return w; 57 | } 58 | 59 | inline vec bitreverse_2(vec x) 60 | { 61 | return shufflevector(csizes<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18, 62 | 19, 10, 11, 26, 27, 6, 7, 22, 23, 14, 15, 30, 31>, 63 | x, x); 64 | } 65 | 66 | template <> 67 | inline vec shufflevector( 68 | csizes_t<0, 1, 32, 33, 16, 17, 48, 49, 8, 9, 40, 41, 24, 25, 56, 57, 4, 5, 36, 37, 20, 21, 52, 53, 12, 13, 69 | 44, 45, 28, 29, 60, 61, 2, 3, 34, 35, 18, 19, 50, 51, 10, 11, 42, 43, 26, 27, 58, 59, 6, 7, 38, 70 | 39, 22, 23, 54, 55, 14, 15, 46, 47, 30, 31, 62, 63>, 71 | vec x, vec) 72 | { 73 | x = concat(bitreverse_2(even<8>(x)), bitreverse_2(odd<8>(x))); 74 | return permutegroups<(8), 0, 4, 1, 5, 2, 6, 3, 7>(x); 75 | } 76 | 77 | template <> 78 | inline vec shufflevector(csizes_t<0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15>, 79 | vec x, vec) 80 | { 81 | // asm volatile("int $3"); 82 | x = permutegroups<(4), 0, 2, 1, 3>(x); 83 | 84 | x = concat(shuffle<0, 2, 8 + 0, 8 + 2>(low(x), high(x)), shuffle<1, 3, 8 + 1, 8 + 3>(low(x), high(x))); 85 | 86 | return x; 87 | } 88 | 89 | template <> 90 | inline vec shufflevector(csizes_t<0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15>, 91 | vec x, vec) 92 | { 93 | x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x))); 94 | 95 | x = permutegroups<(4), 0, 2, 1, 3>(x); 96 | 97 | return x; 98 | } 99 | 100 | template <> 101 | inline vec shufflevector( 102 | csizes_t<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 103 | 29, 14, 30, 15, 31>, 104 | vec x, vec) 105 | { 106 | x = permutegroups<(8), 0, 2, 1, 3>(x); 107 | 108 | x = concat(interleavehalfs(low(x)), interleavehalfs(high(x))); 109 | 110 | return x; 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /include/kfr/base/sqrt.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "function.hpp" 26 | 27 | namespace kfr 28 | { 29 | 30 | namespace internal 31 | { 32 | 33 | template 34 | struct in_sqrt : in_sqrt 35 | { 36 | struct fn_sqrt : fn_disabled 37 | { 38 | }; 39 | }; 40 | 41 | template <> 42 | struct in_sqrt 43 | { 44 | constexpr static cpu_t cpu = cpu_t::sse2; 45 | 46 | KFR_SINTRIN f32sse sqrt(f32sse x) { return _mm_sqrt_ps(*x); } 47 | KFR_SINTRIN f64sse sqrt(f64sse x) { return _mm_sqrt_pd(*x); } 48 | 49 | KFR_HANDLE_ALL(sqrt) 50 | KFR_HANDLE_SCALAR(sqrt) 51 | KFR_SPEC_FN(in_sqrt, sqrt) 52 | }; 53 | 54 | template <> 55 | struct in_sqrt : in_sqrt 56 | { 57 | constexpr static cpu_t cpu = cpu_t::avx1; 58 | using in_sqrt::sqrt; 59 | 60 | KFR_SINTRIN f32avx KFR_USE_CPU(avx) sqrt(f32avx x) { return _mm256_sqrt_ps(*x); } 61 | KFR_SINTRIN f64avx KFR_USE_CPU(avx) sqrt(f64avx x) { return _mm256_sqrt_pd(*x); } 62 | 63 | KFR_HANDLE_ALL(sqrt) 64 | KFR_HANDLE_SCALAR(sqrt) 65 | KFR_SPEC_FN(in_sqrt, sqrt) 66 | }; 67 | } 68 | namespace native 69 | { 70 | using fn_sqrt = internal::in_sqrt<>::fn_sqrt; 71 | template ::value)> 72 | KFR_INTRIN ftype sqrt(const T1& x) 73 | { 74 | return internal::in_sqrt<>::sqrt(x); 75 | } 76 | 77 | template ::value)> 78 | KFR_INTRIN expr_func sqrt(E1&& x) 79 | { 80 | return { fn_sqrt(), std::forward(x) }; 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /include/kfr/base/tan.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | #include "abs.hpp" 25 | #include "constants.hpp" 26 | #include "function.hpp" 27 | #include "operators.hpp" 28 | #include "select.hpp" 29 | #include "sin_cos.hpp" 30 | 31 | #pragma clang diagnostic push 32 | #if CID_HAS_WARNING("-Winaccessible-base") 33 | #pragma clang diagnostic ignored "-Winaccessible-base" 34 | #endif 35 | #if CID_HAS_WARNING("-Wc99-extensions") 36 | #pragma clang diagnostic ignored "-Wc99-extensions" 37 | #endif 38 | 39 | namespace kfr 40 | { 41 | 42 | namespace internal 43 | { 44 | 45 | template 46 | struct in_tan : in_trig, in_select, in_round, in_abs 47 | { 48 | private: 49 | using in_abs::abs; 50 | using in_round::floor; 51 | using in_select::select; 52 | using in_trig::mask_horner; 53 | 54 | template > 55 | KFR_SINTRIN vec trig_fold(vec x_full, mask& inverse) 56 | { 57 | constexpr T pi_14 = c_pi; 58 | 59 | vec y = abs(x_full); 60 | vec scaled = y / pi_14; 61 | 62 | vec k_real = floor(scaled); 63 | vec k = cast(k_real); 64 | 65 | vec x = y - k_real * pi_14; 66 | 67 | mask need_offset = (k & 1) != 0; 68 | x = select(need_offset, x - pi_14, x); 69 | 70 | vec k_mod4 = k & 3; 71 | inverse = (k_mod4 == 1) || (k_mod4 == 2); 72 | return x; 73 | } 74 | 75 | public: 76 | template 77 | KFR_SINTRIN vec tan(vec x_full) 78 | { 79 | mask inverse; 80 | const vec x = trig_fold(x_full, inverse); 81 | 82 | constexpr f32 tan_c2 = 0x5.555378p-4; 83 | constexpr f32 tan_c4 = 0x2.225bb8p-4; 84 | constexpr f32 tan_c6 = 0xd.ac3fep-8; 85 | constexpr f32 tan_c8 = 0x6.41644p-8; 86 | constexpr f32 tan_c10 = 0xc.bfe7ep-12; 87 | constexpr f32 tan_c12 = 0x2.6754dp-8; 88 | 89 | constexpr f32 cot_c2 = -0x5.555558p-4; 90 | constexpr f32 cot_c4 = -0x5.b0581p-8; 91 | constexpr f32 cot_c6 = -0x8.ac5ccp-12; 92 | constexpr f32 cot_c8 = -0xd.aaa01p-16; 93 | constexpr f32 cot_c10 = -0x1.a9a9b4p-16; 94 | constexpr f32 cot_c12 = -0x6.f7d4dp-24; 95 | 96 | const vec x2 = x * x; 97 | const vec val = mask_horner(x2, inverse, 1.0f, 1.0f, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, 98 | tan_c6, cot_c8, tan_c8, cot_c10, tan_c10, cot_c12, tan_c12); 99 | 100 | const vec z = select(inverse, val / -x, val * x); 101 | return mulsign(z, x_full); 102 | } 103 | 104 | template 105 | KFR_SINTRIN vec tan(vec x_full) 106 | { 107 | mask inverse; 108 | const vec x = trig_fold(x_full, inverse); 109 | 110 | constexpr f64 tan_c2 = 0x5.5555554d8e5b8p-4; 111 | constexpr f64 tan_c4 = 0x2.222224820264p-4; 112 | constexpr f64 tan_c6 = 0xd.d0d90de32b3e8p-8; 113 | constexpr f64 tan_c8 = 0x5.99723bdcf5cacp-8; 114 | constexpr f64 tan_c10 = 0x2.434a142e413ap-8; 115 | constexpr f64 tan_c12 = 0xf.2b59061305efp-12; 116 | constexpr f64 tan_c14 = 0x4.a12565071a664p-12; 117 | constexpr f64 tan_c16 = 0x4.dada3797ac1bcp-12; 118 | constexpr f64 tan_c18 = -0x1.a74976b6ea3f3p-12; 119 | constexpr f64 tan_c20 = 0x1.d06a5ae5e4a74p-12; 120 | 121 | constexpr f64 cot_c2 = -0x5.5555555555554p-4; 122 | constexpr f64 cot_c4 = -0x5.b05b05b05b758p-8; 123 | constexpr f64 cot_c6 = -0x8.ab355dffc79a8p-12; 124 | constexpr f64 cot_c8 = -0xd.debbca405c9f8p-16; 125 | constexpr f64 cot_c10 = -0x1.66a8edb99b15p-16; 126 | constexpr f64 cot_c12 = -0x2.450239be0ee92p-20; 127 | constexpr f64 cot_c14 = -0x3.ad6ddb4719438p-24; 128 | constexpr f64 cot_c16 = -0x5.ff4c42741356p-28; 129 | constexpr f64 cot_c18 = -0x9.06881bcdf3108p-32; 130 | constexpr f64 cot_c20 = -0x1.644abedc113cap-32; 131 | 132 | const vec x2 = x * x; 133 | const vec val = 134 | mask_horner(x2, inverse, 1.0, 1.0, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, tan_c6, cot_c8, tan_c8, 135 | cot_c10, tan_c10, cot_c12, tan_c12, cot_c14, tan_c14, cot_c16, tan_c16, cot_c18, 136 | tan_c18, cot_c20, tan_c20); 137 | 138 | const vec z = select(inverse, val / -x, val * x); 139 | return mulsign(z, x_full); 140 | } 141 | template 142 | KFR_SINTRIN T tandeg(const T& x) 143 | { 144 | return tan(x * c_degtorad); 145 | } 146 | 147 | KFR_HANDLE_SCALAR(tan) 148 | KFR_SPEC_FN(in_tan, tan) 149 | KFR_SPEC_FN(in_tan, tandeg) 150 | }; 151 | } 152 | 153 | namespace native 154 | { 155 | using fn_tan = internal::in_tan<>::fn_tan; 156 | template ::value)> 157 | KFR_INTRIN ftype tan(const T1& x) 158 | { 159 | return internal::in_tan<>::tan(x); 160 | } 161 | 162 | template ::value)> 163 | KFR_INTRIN expr_func tan(E1&& x) 164 | { 165 | return { fn_tan(), std::forward(x) }; 166 | } 167 | 168 | using fn_tandeg = internal::in_tan<>::fn_tandeg; 169 | template ::value)> 170 | KFR_INTRIN ftype tandeg(const T1& x) 171 | { 172 | return internal::in_tan<>::tandeg(x); 173 | } 174 | 175 | template ::value)> 176 | KFR_INTRIN expr_func tandeg(E1&& x) 177 | { 178 | return { fn_tandeg(), std::forward(x) }; 179 | } 180 | } 181 | } 182 | 183 | #pragma clang diagnostic pop 184 | -------------------------------------------------------------------------------- /include/kfr/cident.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #if defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__x86_64__) 4 | #define CID_ARCH_X86 1 5 | #endif 6 | 7 | #ifdef CID_ARCH_X86 8 | #if defined(_M_X64) || defined(__x86_64__) 9 | #define CID_ARCH_X64 1 10 | #else 11 | #define CID_ARCH_X32 1 12 | #endif 13 | 14 | #if defined __AVX512F__ && !defined CID_ARCH_AVX512 15 | #define CID_ARCH_AVX512 1 16 | #define CID_ARCH_AVX2 1 17 | #define CID_ARCH_AVX 1 18 | #define CID_ARCH_SSE42 1 19 | #define CID_ARCH_SSE41 1 20 | #define CID_ARCH_SSSE3 1 21 | #define CID_ARCH_SSE3 1 22 | #define CID_ARCH_SSE2 1 23 | #define CID_ARCH_SSE 1 24 | #endif 25 | #if defined __AVX2__ && !defined CID_ARCH_AVX2 26 | #define CID_ARCH_AVX2 1 27 | #define CID_ARCH_AVX 1 28 | #define CID_ARCH_SSE42 1 29 | #define CID_ARCH_SSE41 1 30 | #define CID_ARCH_SSSE3 1 31 | #define CID_ARCH_SSE3 1 32 | #define CID_ARCH_SSE2 1 33 | #define CID_ARCH_SSE 1 34 | #endif 35 | #if defined __AVX__ && !defined CID_ARCH_AVX 36 | #define CID_ARCH_AVX 1 37 | #define CID_ARCH_SSE42 1 38 | #define CID_ARCH_SSE41 1 39 | #define CID_ARCH_SSSE3 1 40 | #define CID_ARCH_SSE3 1 41 | #define CID_ARCH_SSE2 1 42 | #define CID_ARCH_SSE 1 43 | #endif 44 | #if defined __SSE4_2__ && !defined CID_ARCH_SSE4_2 45 | #define CID_ARCH_SSE4_2 1 46 | #define CID_ARCH_SSE41 1 47 | #define CID_ARCH_SSSE3 1 48 | #define CID_ARCH_SSE3 1 49 | #define CID_ARCH_SSE2 1 50 | #define CID_ARCH_SSE 1 51 | #endif 52 | #if defined __SSE4_1__ && !defined CID_ARCH_SSE4_1 53 | #define CID_ARCH_SSE4_1 1 54 | #define CID_ARCH_SSSE3 1 55 | #define CID_ARCH_SSE3 1 56 | #define CID_ARCH_SSE2 1 57 | #define CID_ARCH_SSE 1 58 | #endif 59 | #if defined __SSSE3__ && !defined CID_ARCH_SSSE3 60 | #define CID_ARCH_SSSE3 1 61 | #define CID_ARCH_SSE3 1 62 | #define CID_ARCH_SSE2 1 63 | #define CID_ARCH_SSE 1 64 | #endif 65 | #if defined __SSE3__ && !defined CID_ARCH_SSE3 66 | #define CID_ARCH_SSE3 1 67 | #define CID_ARCH_SSE2 1 68 | #define CID_ARCH_SSE 1 69 | #endif 70 | #if (defined CID_ARCH_X64 || defined __SSE2__) && !defined CID_ARCH_SSE2 71 | #define CID_ARCH_SSE2 1 72 | #define CID_ARCH_SSE 1 73 | #endif 74 | 75 | #if (defined CID_ARCH_X64 || defined __SSE__) && !defined CID_ARCH_SSE1 76 | #define CID_ARCH_SSE 1 77 | #endif 78 | 79 | #if defined __FMA__ && !defined CID_ARCH_FMA 80 | #define CID_ARCH_FMA 1 81 | #endif 82 | 83 | #if defined __AES__ && !defined CID_ARCH_AES 84 | #define CID_ARCH_AES 1 85 | #endif 86 | 87 | #if defined __BMI__ && !defined CID_ARCH_BMI 88 | #define CID_ARCH_BMI 1 89 | #endif 90 | 91 | #if defined __BMI2__ && !defined CID_ARCH_BMI2 92 | #define CID_ARCH_BMI2 1 93 | #endif 94 | 95 | #if defined __LZCNT__ && !defined CID_ARCH_LZCNT 96 | #define CID_ARCH_LZCNT 1 97 | #endif 98 | 99 | #if defined CID_ARCH_AVX512 100 | #define CID_ARCH_NAME avx512 101 | #elif defined CID_ARCH_AVX2 102 | #define CID_ARCH_NAME avx2 103 | #elif defined CID_ARCH_AVX 104 | #define CID_ARCH_NAME avx 105 | #elif defined CID_ARCH_SSE4_1 106 | #define CID_ARCH_NAME sse41 107 | #elif defined CID_ARCH_SSSE3 108 | #define CID_ARCH_NAME ssse3 109 | #elif defined CID_ARCH_SSE3 110 | #define CID_ARCH_NAME sse3 111 | #elif defined CID_ARCH_SSE2 112 | #define CID_ARCH_NAME sse2 113 | #elif defined CID_ARCH_SSE 114 | #define CID_ARCH_NAME sse 115 | #else 116 | #define CID_ARCH_NAME legacy 117 | #endif 118 | 119 | #endif 120 | 121 | #define CID_STRINGIFY2(x) #x 122 | #define CID_STRINGIFY(x) CID_STRINGIFY2(x) 123 | 124 | #if defined(_WIN32) // Windows 125 | #define CID_OS_WIN 1 126 | #endif 127 | 128 | #if defined(__APPLE__) 129 | #include "TargetConditionals.h" 130 | #ifdef TARGET_OS_IPHONE 131 | #define CID_OS_IOS 1 132 | #define CID_OS_MOBILE 1 133 | #elif TARGET_IPHONE_SIMULATOR 134 | #define CID_OS_IOS 1 135 | #define CID_OS_IOS_SIMULATOR 1 136 | #define CID_OS_MOBILE 1 137 | #elif TARGET_OS_MAC 138 | #define CID_OS_MAC 1 139 | #define CID_OS_MACOS 1 140 | #define CID_OS_OSX 1 141 | #endif 142 | #define CID_OS_POSIX 1 143 | #endif 144 | 145 | #if defined(__ANDROID__) 146 | #define CID_OS_ANDROID 1 147 | #define CID_OS_MOBILE 1 148 | #define CID_OS_POSIX 1 149 | #endif 150 | 151 | #if defined(__linux__) 152 | #define CID_OS_LINUX 1 153 | #define CID_OS_POSIX 1 154 | #endif 155 | 156 | #if defined(_MSC_VER) // Visual C/C++ 157 | #define CID_COMPILER_MSVC 1 158 | #define CID_MSVC_ATTRIBUTES 1 159 | #define CID_MSC_VER _MSC_VER 160 | #else 161 | #define CID_MSC_VER 0 162 | #endif 163 | 164 | #if defined(__GNUC__) || defined(__clang__) // GCC, Clang 165 | #define CID_COMPILER_GNU 1 166 | #define CID_GNU_ATTRIBUTES 1 167 | #define CID_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 168 | #if __cplusplus >= 201103L || defined __GXX_EXPERIMENTAL_CXX0X__ 169 | #define CID_HAS_GXX_CXX11 1 170 | #endif 171 | #else 172 | #define CID_GCC_VERSION 0 173 | #endif 174 | 175 | #if defined(__INTEL_COMPILER) // Intel Compiler 176 | #define CID_COMPILER_INTEL 1 177 | #define CID_ICC_VERSION __INTEL_COMPILER 178 | #elif defined(__ICL) 179 | #define CID_COMPILER_INTEL 1 180 | #define CID_ICC_VERSION __ICL 181 | #else 182 | #define CID_ICC_VERSION 0 183 | #endif 184 | 185 | #if defined(__clang__) // Clang 186 | #define CID_COMPILER_CLANG 1 187 | #ifndef CID_GNU_ATTRIBUTES 188 | #define CID_GNU_ATTRIBUTES 1 189 | #endif 190 | #endif 191 | 192 | #if defined(CID_GNU_ATTRIBUTES) 193 | 194 | #define CID_NODEBUG 195 | // __attribute__((__nodebug__)) 196 | #define CID_INLINE __inline__ __attribute__((__always_inline__)) 197 | #define CID_INTRIN CID_INLINE CID_NODEBUG 198 | #define CID_INLINE_MEMBER __attribute__((__always_inline__)) 199 | #define CID_INLINE_LAMBDA CID_INLINE_MEMBER 200 | #define CID_NOINLINE __attribute__((__noinline__)) 201 | #define CID_FLATTEN __attribute__((__flatten__)) 202 | #define CID_RESTRICT __restrict__ 203 | 204 | #elif defined(CID_MSVC_ATTRIBUTES) 205 | 206 | #define CID_NODEBUG 207 | #define CID_INLINE inline __forceinline 208 | #define CID_INTRIN CID_INLINE CID_NODEBUG 209 | #define CID_INLINE_MEMBER __forceinline 210 | #define CID_INLINE_LAMBDA 211 | #define CID_NOINLINE __declspec(noinline) 212 | #define CID_FLATTEN 213 | #define CID_RESTRICT __restrict 214 | 215 | #endif 216 | 217 | #define CID_INLINE_STATIC CID_INLINE static 218 | 219 | #define CID_EXTERN_C extern "C" 220 | 221 | #define CID_PUBLIC_C CID_EXTERN_C CID_NOINLINE 222 | 223 | #define CID_ALWAYS_INLINE_STATIC CID_ALWAYS_INLINE static 224 | 225 | #ifdef CID_OS_WIN 226 | #define CID_CDECL __cdecl 227 | #else 228 | #define CID_CDECL __attribute__((cdecl)) 229 | #endif 230 | 231 | #ifdef CID_OS_WIN 232 | #if defined(CID_MSVC_ATTRIBUTES) 233 | #define CID_DLL_EXPORT __declspec(dllexport) 234 | #define CID_DLL_IMPORT __declspec(dllimport) 235 | #else 236 | #define CID_DLL_EXPORT __attribute__((dllexport)) 237 | #define CID_DLL_IMPORT __attribute__((dllimport)) 238 | #endif 239 | #else 240 | #define CID_DLL_EXPORT 241 | #define CID_DLL_IMPORT 242 | #endif 243 | 244 | #ifdef __has_builtin 245 | #define CID_HAS_BUILTIN(builtin) __has_builtin(builtin) 246 | #else 247 | #define CID_HAS_BUILTIN(builtin) 0 248 | #endif 249 | 250 | #ifdef __has_feature 251 | #define CID_HAS_FEATURE(feature) __has_feature(feature) 252 | #else 253 | #define CID_HAS_FEATURE(feature) 0 254 | #endif 255 | 256 | #ifdef __has_extension 257 | #define CID_HAS_EXTENSION(extension) __has_extension(extension) 258 | #else 259 | #define CID_HAS_EXTENSION(extension) 0 260 | #endif 261 | 262 | #ifdef __has_attribute 263 | #define CID_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) 264 | #else 265 | #define CID_HAS_ATTRIBUTE(attribute) 0 266 | #endif 267 | 268 | #ifdef __has_warning 269 | #define CID_HAS_WARNING(warning) __has_warning(warning) 270 | #else 271 | #define CID_HAS_WARNING(warning) 0 272 | #endif 273 | 274 | #define CID_HAS_VARIADIC_TEMPLATES \ 275 | (CID_HAS_FEATURE(cxx_variadic_templates) || (CID_GCC_VERSION >= 404 && CID_HAS_GXX_CXX11) || \ 276 | CID_MSC_VER >= 1800) 277 | 278 | #ifdef CID_BUILDING_DLL 279 | #define CID_C_API CID_DLL_EXPORT 280 | #else 281 | #define CID_C_API CID_DLL_IMPORT 282 | #endif 283 | 284 | #if __cplusplus >= 201103L || CID_MSC_VER >= 1900 || CID_HAS_FEATURE(cxx_constexpr) 285 | #define CID_HAS_CONSTEXPR 1 286 | #endif 287 | 288 | #if __cpp_constexpr >= 201304 || CID_HAS_FEATURE(cxx_constexpr) 289 | #define CID_HAS_FULL_CONSTEXPR 1 290 | #endif 291 | 292 | #if CID_HAS_CONSTEXPR 293 | #define CID_CONSTEXPR constexpr 294 | #else 295 | #define CID_CONSTEXPR 296 | #endif 297 | 298 | #if CID_HAS_FEATURE(cxx_noexcept) || (CID_GCC_VERSION >= 408 && CID_HAS_GXX_CXX11) || CID_MSC_VER >= 1900 299 | #define CID_HAS_NOEXCEPT 1 300 | #endif 301 | 302 | #if CID_HAS_NOEXCEPT 303 | #define CID_NOEXCEPT noexcept 304 | #else 305 | #define CID_NOEXCEPT 306 | #endif 307 | 308 | #if CID_COMPILER_GNU && !defined(__EXCEPTIONS) 309 | #define CID_HAS_EXCEPTIONS 0 310 | #endif 311 | #if CID_COMPILER_MSVC && !_HAS_EXCEPTIONS 312 | #define CID_HAS_EXCEPTIONS 0 313 | #endif 314 | 315 | #ifndef CID_HAS_EXCEPTIONS 316 | #define CID_HAS_EXCEPTIONS 1 317 | #endif 318 | 319 | #include 320 | 321 | #ifndef CID_THROW 322 | #if CID_HAS_EXCEPTIONS 323 | #define CID_THROW(x) throw x 324 | #else 325 | #define CID_THROW(x) assert(false) 326 | #endif 327 | #endif 328 | 329 | #if __cplusplus >= 201103L || CID_MSC_VER >= 1900 || CID_HAS_FEATURE(cxx_constexpr) 330 | 331 | #include 332 | namespace cid 333 | { 334 | template 335 | constexpr inline static size_t arraysize(const T (&)[N]) noexcept 336 | { 337 | return N; 338 | } 339 | } 340 | 341 | #define CID_ARRAYSIZE(arr) ::cid::arraysize(arr) 342 | #elif CID_COMPILER_MSVC 343 | #define CID_ARRAYSIZE(arr) _countof(arr) 344 | #elif __cplusplus >= 199711L && \ 345 | (defined(__INTEL_COMPILER) || defined(__clang__) || \ 346 | (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)))) 347 | template 348 | char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(T (&)[N]))[N]; 349 | #define CID_ARRAYSIZE(x) sizeof(COUNTOF_REQUIRES_ARRAY_ARGUMENT(x)) 350 | #else 351 | #define CID_ARRAYSIZE(arr) sizeof(arr) / sizeof(arr[0]) 352 | #endif 353 | 354 | #ifdef CID_COMPILER_MSVC 355 | #define CID_FUNC_SIGNATURE __FUNCSIG__ 356 | #else 357 | #define CID_FUNC_SIGNATURE __PRETTY_FUNCTION__ 358 | #endif 359 | -------------------------------------------------------------------------------- /include/kfr/dft/conv.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/complex.hpp" 26 | #include "../base/constants.hpp" 27 | #include "../base/memory.hpp" 28 | #include "../base/read_write.hpp" 29 | #include "../base/vec.hpp" 30 | #include "../expressions/operators.hpp" 31 | 32 | #include "fft.hpp" 33 | 34 | #pragma clang diagnostic push 35 | #if CID_HAS_WARNING("-Wshadow") 36 | #pragma clang diagnostic ignored "-Wshadow" 37 | #endif 38 | 39 | namespace kfr 40 | { 41 | 42 | template 43 | KFR_INTRIN univector convolve(const univector& src1, const univector& src2) 44 | { 45 | const size_t size = next_poweroftwo(src1.size() + src2.size() - 1); 46 | univector> src1padded = src1; 47 | univector> src2padded = src2; 48 | src1padded.resize(size, 0); 49 | src2padded.resize(size, 0); 50 | dft_plan plan(size); 51 | univector temp(plan.temp_size); 52 | plan.execute(src1padded, src1padded, temp); 53 | plan.execute(src2padded, src2padded, temp); 54 | src1padded = src1padded * src2padded; 55 | plan.execute(src1padded, src1padded, temp, true); 56 | return typed(real(src1padded), src1.size() + src2.size() - 1) / T(size); 57 | } 58 | } 59 | #pragma clang diagnostic pop 60 | -------------------------------------------------------------------------------- /include/kfr/dft/reference_dft.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/complex.hpp" 26 | #include "../base/constants.hpp" 27 | #include "../base/memory.hpp" 28 | #include "../base/read_write.hpp" 29 | #include "../base/vec.hpp" 30 | #include "../misc/small_buffer.hpp" 31 | #include 32 | 33 | namespace kfr 34 | { 35 | 36 | template 37 | void reference_fft_pass(Tnumber pi2, size_t N, size_t offset, size_t delta, int flag, Tnumber (*x)[2], 38 | Tnumber (*X)[2], Tnumber (*XX)[2]) 39 | { 40 | const size_t N2 = N / 2; 41 | using std::sin; 42 | using std::cos; 43 | 44 | if (N != 2) 45 | { 46 | reference_fft_pass(pi2, N2, offset, 2 * delta, flag, x, XX, X); 47 | reference_fft_pass(pi2, N2, offset + delta, 2 * delta, flag, x, XX, X); 48 | 49 | for (size_t k = 0; k < N2; k++) 50 | { 51 | const size_t k00 = offset + k * delta; 52 | const size_t k01 = k00 + N2 * delta; 53 | const size_t k10 = offset + 2 * k * delta; 54 | const size_t k11 = k10 + delta; 55 | const Tnumber m = static_cast(k) / N; 56 | const Tnumber cs = cos(pi2 * m); 57 | const Tnumber sn = flag * sin(pi2 * m); 58 | const Tnumber tmp0 = cs * XX[k11][0] + sn * XX[k11][1]; 59 | const Tnumber tmp1 = cs * XX[k11][1] - sn * XX[k11][0]; 60 | X[k01][0] = XX[k10][0] - tmp0; 61 | X[k01][1] = XX[k10][1] - tmp1; 62 | X[k00][0] = XX[k10][0] + tmp0; 63 | X[k00][1] = XX[k10][1] + tmp1; 64 | } 65 | } 66 | else 67 | { 68 | const size_t k00 = offset; 69 | const size_t k01 = k00 + delta; 70 | X[k01][0] = x[k00][0] - x[k01][0]; 71 | X[k01][1] = x[k00][1] - x[k01][1]; 72 | X[k00][0] = x[k00][0] + x[k01][0]; 73 | X[k00][1] = x[k00][1] + x[k01][1]; 74 | } 75 | } 76 | 77 | template 78 | void reference_fft(complex* out, const complex* in, size_t size, bool inversion = false) 79 | { 80 | using Tcmplx = Tnumber(*)[2]; 81 | if (size < 2) 82 | return; 83 | std::vector> datain(size); 84 | std::vector> dataout(size); 85 | std::vector> temp(size); 86 | std::copy(in, in + size, datain.begin()); 87 | const Tnumber pi2 = c_pi; 88 | reference_fft_pass(pi2, size, 0, 1, inversion ? -1 : +1, Tcmplx(datain.data()), 89 | Tcmplx(dataout.data()), Tcmplx(temp.data())); 90 | std::copy(dataout.begin(), dataout.end(), out); 91 | } 92 | 93 | template 94 | void reference_dft(complex* out, const complex* in, size_t size, bool inversion = false) 95 | { 96 | using std::sin; 97 | using std::cos; 98 | if (is_poweroftwo(size)) 99 | { 100 | return reference_fft(out, in, size, inversion); 101 | } 102 | constexpr Tnumber pi2 = c_pi; 103 | if (size < 2) 104 | return; 105 | std::vector> datain; 106 | if (out == in) 107 | { 108 | datain.resize(size); 109 | std::copy_n(in, size, datain.begin()); 110 | in = datain.data(); 111 | } 112 | { 113 | Tnumber sumr = 0; 114 | Tnumber sumi = 0; 115 | for (size_t j = 0; j < size; j++) 116 | { 117 | sumr += static_cast(in[j].real()); 118 | sumi += static_cast(in[j].imag()); 119 | } 120 | out[0] = { static_cast(sumr), static_cast(sumi) }; 121 | } 122 | for (size_t i = 1; i < size; i++) 123 | { 124 | Tnumber sumr = static_cast(in[0].real()); 125 | Tnumber sumi = static_cast(in[0].imag()); 126 | 127 | for (size_t j = 1; j < size; j++) 128 | { 129 | const Tnumber x = pi2 * ((i * j) % size) / size; 130 | Tnumber twr = cos(x); 131 | Tnumber twi = sin(x); 132 | if (inversion) 133 | twi = -twi; 134 | 135 | sumr += twr * static_cast(in[j].real()) + twi * static_cast(in[j].imag()); 136 | sumi += twr * static_cast(in[j].imag()) - twi * static_cast(in[j].real()); 137 | out[i] = { static_cast(sumr), static_cast(sumi) }; 138 | } 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /include/kfr/dispatch/cpuid.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/types.hpp" 26 | #include 27 | 28 | namespace kfr 29 | { 30 | 31 | struct cpu_features 32 | { 33 | u32 max; 34 | u32 exmax; 35 | u32 isIntel : 1; 36 | u32 isAMD : 1; 37 | u32 has3DNOW : 1; 38 | u32 has3DNOWEXT : 1; 39 | u32 hasABM : 1; 40 | u32 hasADX : 1; 41 | u32 hasAES : 1; 42 | u32 hasAVX : 1; 43 | u32 hasAVX2 : 1; 44 | u32 hasAVXOSSUPPORT : 1; 45 | u32 hasAVX512OSSUPPORT : 1; 46 | u32 hasAVX512CD : 1; 47 | u32 hasAVX512ER : 1; 48 | u32 hasAVX512F : 1; 49 | u32 hasAVX512DQ : 1; 50 | u32 hasAVX512PF : 1; 51 | u32 hasAVX512BW : 1; 52 | u32 hasBMI1 : 1; 53 | u32 hasBMI2 : 1; 54 | u32 hasCLFSH : 1; 55 | u32 hasCMOV : 1; 56 | u32 hasCMPXCHG16B : 1; 57 | u32 hasCX8 : 1; 58 | u32 hasERMS : 1; 59 | u32 hasF16C : 1; 60 | u32 hasFMA : 1; 61 | u32 hasFSGSBASE : 1; 62 | u32 hasFXSR : 1; 63 | u32 hasHLE : 1; 64 | u32 hasINVPCID : 1; 65 | u32 hasLAHF : 1; 66 | u32 hasLZCNT : 1; 67 | u32 hasMMX : 1; 68 | u32 hasMMXEXT : 1; 69 | u32 hasMONITOR : 1; 70 | u32 hasMOVBE : 1; 71 | u32 hasMSR : 1; 72 | u32 hasOSXSAVE : 1; 73 | u32 hasPCLMULQDQ : 1; 74 | u32 hasPOPCNT : 1; 75 | u32 hasPREFETCHWT1 : 1; 76 | u32 hasRDRAND : 1; 77 | u32 hasRDSEED : 1; 78 | u32 hasRDTSCP : 1; 79 | u32 hasRTM : 1; 80 | u32 hasSEP : 1; 81 | u32 hasSHA : 1; 82 | u32 hasSSE : 1; 83 | u32 hasSSE2 : 1; 84 | u32 hasSSE3 : 1; 85 | u32 hasSSE41 : 1; 86 | u32 hasSSE42 : 1; 87 | u32 hasSSE4a : 1; 88 | u32 hasSSSE3 : 1; 89 | u32 hasSYSCALL : 1; 90 | u32 hasTBM : 1; 91 | u32 hasXOP : 1; 92 | u32 hasXSAVE : 1; 93 | u32 padding1 : 6; 94 | char vendor[17]; 95 | char model[49]; 96 | char padding2[2]; 97 | }; 98 | 99 | namespace internal 100 | { 101 | 102 | struct cpu_data 103 | { 104 | u32 data[4]; 105 | }; 106 | 107 | #if defined KFR_COMPILER_GNU || defined KFR_COMPILER_CLANG 108 | KFR_INLINE u32 get_cpuid(u32 func, u32 subfunc, u32* eax, u32* ebx, u32* ecx, u32* edx) 109 | { 110 | __asm__("cpuid" : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "0"(func), "2"(subfunc)); 111 | return 1; 112 | } 113 | KFR_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0) 114 | { 115 | get_cpuid(func, subfunc, &ptr[0], &ptr[1], &ptr[2], &ptr[3]); 116 | } 117 | KFR_INLINE u32 get_xcr0() 118 | { 119 | u32 xcr0; 120 | __asm__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx"); 121 | return xcr0; 122 | } 123 | #endif 124 | 125 | template 126 | cpu_t detect_cpu() 127 | { 128 | cpu_features c; 129 | memset(&c, 0, sizeof(c)); 130 | cpu_data data0; 131 | cpu_data exdata0; 132 | 133 | u32 f_1_ECX(0); 134 | u32 f_1_EDX(0); 135 | u32 f_7_EBX(0); 136 | u32 f_7_ECX(0); 137 | u32 f_81_ECX(0); 138 | u32 f_81_EDX(0); 139 | 140 | cpuid(data0.data, 0); 141 | c.max = static_cast(data0.data[0]); 142 | cpuid(exdata0.data, 0x80000000); 143 | c.exmax = static_cast(exdata0.data[0]); 144 | 145 | *ptr_cast(c.vendor) = static_cast(data0.data[1]); 146 | *ptr_cast(c.vendor + 4) = static_cast(data0.data[3]); 147 | *ptr_cast(c.vendor + 8) = static_cast(data0.data[2]); 148 | 149 | c.isIntel = strncmp(c.vendor, "GenuineIntel", sizeof(c.vendor)) == 0 ? 1 : 0; 150 | c.isAMD = strncmp(c.vendor, "AuthenticAMD", sizeof(c.vendor)) == 0 ? 1 : 0; 151 | 152 | if (c.max >= 1) 153 | { 154 | cpu_data data1; 155 | cpuid(data1.data, 1); 156 | f_1_ECX = static_cast(data1.data[2]); 157 | f_1_EDX = static_cast(data1.data[3]); 158 | } 159 | 160 | if (c.max >= 7) 161 | { 162 | cpu_data data7; 163 | cpuid(data7.data, 7); 164 | f_7_EBX = static_cast(data7.data[1]); 165 | f_7_ECX = static_cast(data7.data[2]); 166 | } 167 | 168 | if (c.exmax >= 0x80000001) 169 | { 170 | cpu_data data81; 171 | cpuid(data81.data, 0x80000001); 172 | f_81_ECX = static_cast(data81.data[2]); 173 | f_81_EDX = static_cast(data81.data[3]); 174 | } 175 | 176 | if (c.exmax >= 0x80000004) 177 | { 178 | cpu_data data82; 179 | cpu_data data83; 180 | cpu_data data84; 181 | cpuid(data82.data, 0x80000002); 182 | cpuid(data83.data, 0x80000003); 183 | cpuid(data84.data, 0x80000004); 184 | memcpy(c.model, data82.data, sizeof(cpu_data)); 185 | memcpy(c.model + 16, data83.data, sizeof(cpu_data)); 186 | memcpy(c.model + 32, data84.data, sizeof(cpu_data)); 187 | } 188 | 189 | c.hasSSE3 = f_1_ECX >> 0 & 1; 190 | c.hasPCLMULQDQ = f_1_ECX >> 1 & 1; 191 | c.hasMONITOR = f_1_ECX >> 3 & 1; 192 | c.hasSSSE3 = f_1_ECX >> 9 & 1; 193 | c.hasFMA = f_1_ECX >> 12 & 1; 194 | c.hasCMPXCHG16B = f_1_ECX >> 13 & 1; 195 | c.hasSSE41 = f_1_ECX >> 19 & 1; 196 | c.hasSSE42 = f_1_ECX >> 20 & 1; 197 | c.hasMOVBE = f_1_ECX >> 22 & 1; 198 | c.hasPOPCNT = f_1_ECX >> 23 & 1; 199 | c.hasAES = f_1_ECX >> 25 & 1; 200 | c.hasXSAVE = f_1_ECX >> 26 & 1; 201 | c.hasOSXSAVE = f_1_ECX >> 27 & 1; 202 | c.hasAVX = f_1_ECX >> 28 & 1; 203 | c.hasF16C = f_1_ECX >> 29 & 1; 204 | c.hasRDRAND = f_1_ECX >> 30 & 1; 205 | c.hasMSR = f_1_EDX >> 5 & 1; 206 | c.hasCX8 = f_1_EDX >> 8 & 1; 207 | c.hasSEP = f_1_EDX >> 11 & 1; 208 | c.hasCMOV = f_1_EDX >> 15 & 1; 209 | c.hasCLFSH = f_1_EDX >> 19 & 1; 210 | c.hasMMX = f_1_EDX >> 23 & 1; 211 | c.hasFXSR = f_1_EDX >> 24 & 1; 212 | c.hasSSE = f_1_EDX >> 25 & 1; 213 | c.hasSSE2 = f_1_EDX >> 26 & 1; 214 | c.hasFSGSBASE = f_7_EBX >> 0 & 1; 215 | c.hasBMI1 = f_7_EBX >> 3 & 1; 216 | c.hasHLE = c.isIntel && f_7_EBX >> 4 & 1; 217 | c.hasAVX2 = f_7_EBX >> 5 & 1; 218 | c.hasBMI2 = f_7_EBX >> 8 & 1; 219 | c.hasERMS = f_7_EBX >> 9 & 1; 220 | c.hasINVPCID = f_7_EBX >> 10 & 1; 221 | c.hasRTM = c.isIntel && f_7_EBX >> 11 & 1; 222 | c.hasAVX512F = f_7_EBX >> 16 & 1; 223 | c.hasAVX512DQ = f_7_EBX >> 17 & 1; 224 | c.hasRDSEED = f_7_EBX >> 18 & 1; 225 | c.hasADX = f_7_EBX >> 19 & 1; 226 | c.hasAVX512PF = f_7_EBX >> 26 & 1; 227 | c.hasAVX512ER = f_7_EBX >> 27 & 1; 228 | c.hasAVX512CD = f_7_EBX >> 28 & 1; 229 | c.hasSHA = f_7_EBX >> 29 & 1; 230 | c.hasAVX512BW = f_7_EBX >> 30 & 1; 231 | c.hasPREFETCHWT1 = f_7_ECX >> 0 & 1; 232 | c.hasLAHF = f_81_ECX >> 0 & 1; 233 | c.hasLZCNT = c.isIntel && f_81_ECX >> 5 & 1; 234 | c.hasABM = c.isAMD && f_81_ECX >> 5 & 1; 235 | c.hasSSE4a = c.isAMD && f_81_ECX >> 6 & 1; 236 | c.hasXOP = c.isAMD && f_81_ECX >> 11 & 1; 237 | c.hasTBM = c.isAMD && f_81_ECX >> 21 & 1; 238 | c.hasSYSCALL = c.isIntel && f_81_EDX >> 11 & 1; 239 | c.hasMMXEXT = c.isAMD && f_81_EDX >> 22 & 1; 240 | c.hasRDTSCP = c.isIntel && f_81_EDX >> 27 & 1; 241 | c.has3DNOWEXT = c.isAMD && f_81_EDX >> 30 & 1; 242 | c.has3DNOW = c.isAMD && f_81_EDX >> 31 & 1; 243 | 244 | const u32 xcr0 = get_xcr0(); 245 | 246 | c.hasAVXOSSUPPORT = c.hasAVX && c.hasOSXSAVE && (xcr0 & 0x06) == 0x06; 247 | c.hasAVX512OSSUPPORT = c.hasAVX512F && c.hasOSXSAVE && (xcr0 & 0xE0) == 0xE0; 248 | 249 | #ifdef KFR_AVAIL_AVX512 250 | if (c.hasAVX512F && c.hasAVX512BW && c.hasAVX512DQ && c.hasAVX512OSSUPPORT) 251 | return cpu_t::avx3; 252 | #endif 253 | #ifdef KFR_AVAIL_AVX2 254 | if (c.hasAVX2 && c.hasAVXOSSUPPORT) 255 | return cpu_t::avx2; 256 | #endif 257 | #ifdef KFR_AVAIL_AVX 258 | if (c.hasAVX && c.hasAVXOSSUPPORT) 259 | return cpu_t::avx1; 260 | #endif 261 | #ifdef KFR_AVAIL_SSE41 262 | if (c.hasSSE41) 263 | return cpu_t::sse41; 264 | #endif 265 | #ifdef KFR_AVAIL_SSSE3 266 | if (c.hasSSSE3) 267 | return cpu_t::ssse3; 268 | #endif 269 | #ifdef KFR_AVAIL_SSE3 270 | if (c.hasSSE3) 271 | return cpu_t::sse3; 272 | #endif 273 | #ifdef KFR_AVAIL_SSE2 274 | if (c.hasSSE2) 275 | return cpu_t::sse2; 276 | #endif 277 | return cpu_t::lowest; 278 | } 279 | } 280 | } 281 | -------------------------------------------------------------------------------- /include/kfr/dispatch/cpuid_auto.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "cpuid.hpp" 26 | 27 | namespace kfr 28 | { 29 | namespace internal 30 | { 31 | 32 | KFR_INLINE cpu_t& cpu_v() 33 | { 34 | static cpu_t v1 = cpu_t::native; 35 | return v1; 36 | } 37 | 38 | KFR_INLINE char init_cpu_v() 39 | { 40 | cpu_v() = detect_cpu<0>(); 41 | return 0; 42 | } 43 | 44 | KFR_INLINE char init_dummyvar() 45 | { 46 | static char dummy = init_cpu_v(); 47 | return dummy; 48 | } 49 | 50 | static char dummyvar = init_dummyvar(); 51 | } 52 | KFR_INLINE cpu_t get_cpu() { return internal::cpu_v(); } 53 | } 54 | -------------------------------------------------------------------------------- /include/kfr/dispatch/runtimedispatch.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/dispatch.hpp" 26 | #include "../base/types.hpp" 27 | #include "cpuid_auto.hpp" 28 | 29 | namespace kfr 30 | { 31 | 32 | namespace internal 33 | { 34 | 35 | template 36 | KFR_CPU_INTRIN(sse2) 37 | auto with_cpu_impl(ccpu_t, Fn&& fn, Args&&... args) 38 | { 39 | return fn(std::forward(args)...); 40 | } 41 | 42 | template 43 | KFR_CPU_INTRIN(sse3) 44 | auto with_cpu_impl(ccpu_t, Fn&& fn, Args&&... args) 45 | { 46 | return fn(std::forward(args)...); 47 | } 48 | 49 | template 50 | KFR_CPU_INTRIN(ssse3) 51 | auto with_cpu_impl(ccpu_t, Fn&& fn, Args&&... args) 52 | { 53 | return fn(std::forward(args)...); 54 | } 55 | 56 | template 57 | KFR_CPU_INTRIN(sse41) 58 | auto with_cpu_impl(ccpu_t, Fn&& fn, Args&&... args) 59 | { 60 | return fn(std::forward(args)...); 61 | } 62 | 63 | template 64 | KFR_CPU_INTRIN(sse42) 65 | auto with_cpu_impl(ccpu_t, Fn&& fn, Args&&... args) 66 | { 67 | return fn(std::forward(args)...); 68 | } 69 | 70 | template 71 | KFR_CPU_INTRIN(avx) 72 | auto with_cpu_impl(ccpu_t, Fn&& fn, Args&&... args) 73 | { 74 | return fn(std::forward(args)...); 75 | } 76 | 77 | template 78 | KFR_CPU_INTRIN(avx2) 79 | auto with_cpu_impl(ccpu_t, Fn&& fn, Args&&... args) 80 | { 81 | return fn(std::forward(args)...); 82 | } 83 | } 84 | 85 | template 86 | KFR_INTRIN auto with_cpu(ccpu_t, Fn&& fn, Args&&... args) 87 | { 88 | return internal::with_cpu_impl(ccpu, std::forward(fn), std::forward(args)...); 89 | } 90 | 91 | template 92 | struct fn_with_cpu 93 | { 94 | template 95 | KFR_INTRIN auto operator()(Args&&... args) -> decltype(std::declval()(std::forward(args)...)) 96 | { 97 | return internal::with_cpu_impl(ccpu, std::forward(fn), std::forward(args)...); 98 | } 99 | Fn fn; 100 | }; 101 | 102 | template 103 | KFR_INTRIN fn_with_cpu make_with_cpu(ccpu_t, Fn&& fn) 104 | { 105 | return { std::forward(fn) }; 106 | } 107 | 108 | namespace internal 109 | { 110 | 111 | template 112 | struct runtime_dispatcher; 113 | 114 | template 115 | struct runtime_dispatcher 116 | { 117 | using targetFn = retarget; 118 | 119 | template 120 | KFR_INLINE static result_of call(Fn&& fn, cpu_t, Args&&... args) 121 | { 122 | return cpu_caller::retarget_call(std::forward(fn), std::forward(args)...); 123 | } 124 | }; 125 | 126 | template 127 | struct runtime_dispatcher 128 | { 129 | using nextdispatcher = runtime_dispatcher; 130 | 131 | using targetFn = retarget; 132 | 133 | template ::value&& is_enabled::value)> 135 | KFR_SINTRIN auto call(Fn&& fn, cpu_t set, Args&&... args) 136 | -> decltype(nextdispatcher::call(std::forward(fn), set, std::forward(args)...)) 137 | { 138 | return set >= newest 139 | ? cpu_caller::retarget_call(std::forward(fn), std::forward(args)...) 140 | : nextdispatcher::call(std::forward(fn), set, std::forward(args)...); 141 | } 142 | template ::value && is_enabled::value))> 144 | KFR_SINTRIN auto call(Fn&& fn, cpu_t set, Args&&... args) 145 | -> decltype(nextdispatcher::call(std::forward(fn), set, std::forward(args)...)) 146 | { 147 | return nextdispatcher::call(std::forward(fn), set, std::forward(args)...); 148 | } 149 | }; 150 | 151 | template 152 | KFR_INLINE auto runtimedispatch(cvals_t, Fn&& fn, Args&&... args) 153 | -> decltype(internal::runtime_dispatcher::call(std::forward(fn), get_cpu(), 154 | std::forward(args)...)) 155 | { 156 | return internal::runtime_dispatcher::call(std::forward(fn), get_cpu(), 157 | std::forward(args)...); 158 | } 159 | 160 | template 161 | KFR_INLINE auto dispatch(Fn&& fn, Args&&... args) -> decltype(fn(std::forward(args)...)) 162 | { 163 | return runtimedispatch(std::forward(fn), std::forward(args)...); 164 | } 165 | } 166 | 167 | template 168 | KFR_INLINE auto runtimedispatch(Fn&& fn, Args&&... args) 169 | -> decltype(internal::runtimedispatch(cpulist(), std::forward(fn), std::forward(args)...)) 170 | { 171 | return internal::runtimedispatch(cpulist(), std::forward(fn), std::forward(args)...); 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /include/kfr/expressions/conversion.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | 24 | #pragma once 25 | 26 | #include "../base/function.hpp" 27 | #include "../base/operators.hpp" 28 | #include "../base/vec.hpp" 29 | #include "../expressions/basic.hpp" 30 | 31 | namespace kfr 32 | { 33 | namespace internal 34 | { 35 | template 36 | struct expression_convert : expression 37 | { 38 | template 39 | using retarget_this = expression_convert>; 40 | 41 | KFR_INLINE expression_convert(E&& expr) noexcept : expression(std::forward(expr)) {} 42 | 43 | template 44 | KFR_INLINE vec operator()(cinput_t, size_t index, vec_t) 45 | { 46 | return this->argument_first(index, vec_t()); 47 | } 48 | }; 49 | } 50 | 51 | template 52 | KFR_INLINE internal::expression_convert> convert(E&& expr) 53 | { 54 | return internal::expression_convert>(std::forward(expr)); 55 | } 56 | KFR_FN(convert) 57 | } 58 | -------------------------------------------------------------------------------- /include/kfr/expressions/generators.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/function.hpp" 26 | #include "../base/log_exp.hpp" 27 | #include "../base/select.hpp" 28 | #include "../base/sin_cos.hpp" 29 | #include "../base/vec.hpp" 30 | 31 | #pragma clang diagnostic push 32 | #if CID_HAS_WARNING("-Winaccessible-base") 33 | #pragma clang diagnostic ignored "-Winaccessible-base" 34 | #endif 35 | 36 | namespace kfr 37 | { 38 | 39 | namespace internal 40 | { 41 | 42 | template 43 | struct in_generators : in_log_exp, in_select, in_sin_cos 44 | { 45 | private: 46 | using in_log_exp::exp; 47 | using in_log_exp::exp2; 48 | using in_select::select; 49 | using in_sin_cos::cossin; 50 | 51 | public: 52 | template 53 | struct generator 54 | { 55 | constexpr static size_t width = width_; 56 | using type = T; 57 | 58 | template 59 | KFR_INLINE vec operator()(cinput_t, size_t, vec_t t) const 60 | { 61 | return cast(generate(t)); 62 | } 63 | 64 | void resync(T start) const { ptr_cast(this)->sync(start); } 65 | 66 | protected: 67 | void call_next() const { ptr_cast(this)->next(); } 68 | template 69 | void call_shift(csize_t) const 70 | { 71 | ptr_cast(this)->shift(csize); 72 | } 73 | 74 | template 75 | void shift(csize_t) const 76 | { 77 | const vec oldvalue = value; 78 | call_next(); 79 | value = slice(oldvalue, value); 80 | } 81 | 82 | template 83 | KFR_INLINE vec generate(vec_t) const 84 | { 85 | const vec result = value; 86 | call_next(); 87 | return result; 88 | } 89 | 90 | template 91 | KFR_INLINE vec generate(vec_t) const 92 | { 93 | const vec result = narrow(value); 94 | shift(csize); 95 | return result; 96 | } 97 | 98 | template width)> 99 | KFR_INLINE vec generate(vec_t x) const 100 | { 101 | const auto lo = generate(low(x)); 102 | const auto hi = generate(high(x)); 103 | return concat(lo, hi); 104 | } 105 | 106 | mutable vec value; 107 | }; 108 | 109 | template (1, 2)> 110 | struct generator_linear : generator> 111 | { 112 | template 113 | using retarget_this = typename in_generators::template generator_linear; 114 | 115 | constexpr generator_linear(T start, T step) noexcept : step(step), vstep(step* width) 116 | { 117 | this->resync(start); 118 | } 119 | 120 | KFR_INLINE void sync(T start) const noexcept { this->value = start + enumerate() * step; } 121 | 122 | KFR_INLINE void next() const noexcept { this->value += vstep; } 123 | 124 | protected: 125 | T step; 126 | T vstep; 127 | }; 128 | 129 | template (1, 2)> 130 | struct generator_exp : generator> 131 | { 132 | template 133 | using retarget_this = typename in_generators::template generator_exp; 134 | 135 | generator_exp(T start, T step) noexcept : step(step), vstep(exp(make_vector(step* width))[0] - 1) 136 | { 137 | this->resync(start); 138 | } 139 | 140 | KFR_INLINE void sync(T start) const noexcept 141 | { 142 | this->value = exp(start + enumerate() * step); 143 | } 144 | 145 | KFR_INLINE void next() const noexcept { this->value += this->value * vstep; } 146 | 147 | protected: 148 | T step; 149 | T vstep; 150 | }; 151 | 152 | template (1, 2)> 153 | struct generator_exp2 : generator> 154 | { 155 | template 156 | using retarget_this = typename in_generators::template generator_exp2; 157 | 158 | generator_exp2(T start, T step) noexcept : step(step), vstep(exp2(make_vector(step* width))[0] - 1) 159 | { 160 | this->resync(start); 161 | } 162 | 163 | KFR_INLINE void sync(T start) const noexcept 164 | { 165 | this->value = exp2(start + enumerate() * step); 166 | } 167 | 168 | KFR_INLINE void next() const noexcept { this->value += this->value * vstep; } 169 | 170 | protected: 171 | T step; 172 | T vstep; 173 | }; 174 | 175 | template (1, 2)> 176 | struct generator_cossin : generator> 177 | { 178 | template 179 | using retarget_this = typename in_generators::template generator_cossin; 180 | 181 | generator_cossin(T start, T step) 182 | : step(step), alpha(2 * sqr(sin(width / 2 * step / 2))), beta(-sin(width / 2 * step)) 183 | { 184 | this->resync(start); 185 | } 186 | KFR_INLINE void sync(T start) const noexcept { this->value = init_cossin(step, start); } 187 | 188 | KFR_INLINE void next() const noexcept 189 | { 190 | this->value = this->value - subadd(alpha * this->value, beta * swap<2>(this->value)); 191 | } 192 | 193 | protected: 194 | T step; 195 | T alpha; 196 | T beta; 197 | KFR_NOINLINE static vec init_cossin(T w, T phase) 198 | { 199 | return cossin(dup(phase + enumerate() * w)); 200 | } 201 | }; 202 | 203 | template (2, 4)> 204 | struct generator_sin : generator> 205 | { 206 | template 207 | using retarget_this = typename in_generators::template generator_sin; 208 | 209 | generator_sin(T start, T step) 210 | : step(step), alpha(2 * sqr(sin(width * step / 2))), beta(sin(width * step)) 211 | { 212 | this->resync(start); 213 | } 214 | KFR_INLINE void sync(T start) const noexcept 215 | { 216 | const vec cs = splitpairs(cossin(dup(start + enumerate() * step))); 217 | this->cos_value = low(cs); 218 | this->value = high(cs); 219 | } 220 | 221 | KFR_INLINE void next() const noexcept 222 | { 223 | const vec c = this->cos_value; 224 | const vec s = this->value; 225 | 226 | const vec cc = alpha * c + beta * s; 227 | const vec ss = alpha * s - beta * c; 228 | 229 | this->cos_value = c - cc; 230 | this->value = s - ss; 231 | } 232 | 233 | template 234 | void shift(csize_t) const noexcept 235 | { 236 | const vec oldvalue = this->value; 237 | const vec oldcosvalue = this->cos_value; 238 | next(); 239 | this->value = slice(oldvalue, this->value); 240 | this->cos_value = slice(oldcosvalue, this->cos_value); 241 | } 242 | 243 | protected: 244 | T step; 245 | T alpha; 246 | T beta; 247 | mutable vec cos_value; 248 | }; 249 | }; 250 | } 251 | 252 | template >> 253 | KFR_SINTRIN internal::in_generators<>::generator_linear gen_linear(T1 start, T2 step) 254 | { 255 | return internal::in_generators<>::generator_linear(start, step); 256 | } 257 | template >> 258 | KFR_SINTRIN internal::in_generators<>::generator_exp gen_exp(T1 start, T2 step) 259 | { 260 | return internal::in_generators<>::generator_exp(start, step); 261 | } 262 | template >> 263 | KFR_SINTRIN internal::in_generators<>::generator_exp2 gen_exp2(T1 start, T2 step) 264 | { 265 | return internal::in_generators<>::generator_exp2(start, step); 266 | } 267 | template >> 268 | KFR_SINTRIN internal::in_generators<>::generator_sin gen_cossin(T1 start, T2 step) 269 | { 270 | return internal::in_generators<>::generator_cossin(start, step); 271 | } 272 | template >> 273 | KFR_SINTRIN internal::in_generators<>::generator_sin gen_sin(T1 start, T2 step) 274 | { 275 | return internal::in_generators<>::generator_sin(start, step); 276 | } 277 | } 278 | 279 | #pragma clang diagnostic pop 280 | -------------------------------------------------------------------------------- /include/kfr/expressions/operators.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/function.hpp" 26 | #include "../base/operators.hpp" 27 | #include "../base/vec.hpp" 28 | 29 | namespace kfr 30 | { 31 | 32 | #define KFR_EXPR_UNARY(fn, op) \ 33 | template ::value)> \ 34 | KFR_INLINE auto operator op(A1&& a1)->decltype(bind_expression(fn(), std::forward(a1))) \ 35 | { \ 36 | return bind_expression(fn(), std::forward(a1)); \ 37 | } 38 | 39 | #define KFR_EXPR_BINARY(fn, op) \ 40 | template ::value)> \ 41 | KFR_INLINE auto operator op(A1&& a1, A2&& a2) \ 42 | ->decltype(bind_expression(fn(), std::forward(a1), std::forward(a2))) \ 43 | { \ 44 | return bind_expression(fn(), std::forward(a1), std::forward(a2)); \ 45 | } 46 | 47 | KFR_EXPR_UNARY(fn_neg, -) 48 | KFR_EXPR_UNARY(fn_bitwisenot, ~) 49 | 50 | KFR_EXPR_BINARY(fn_add, +) 51 | KFR_EXPR_BINARY(fn_sub, -) 52 | KFR_EXPR_BINARY(fn_mul, *) 53 | KFR_EXPR_BINARY(fn_div, /) 54 | KFR_EXPR_BINARY(fn_bitwiseand, &) 55 | KFR_EXPR_BINARY(fn_bitwiseor, |) 56 | KFR_EXPR_BINARY(fn_bitwisexor, ^) 57 | KFR_EXPR_BINARY(fn_shl, <<) 58 | KFR_EXPR_BINARY(fn_shr, >>) 59 | 60 | KFR_EXPR_BINARY(fn_equal, ==) 61 | KFR_EXPR_BINARY(fn_notequal, !=) 62 | KFR_EXPR_BINARY(fn_less, <) 63 | KFR_EXPR_BINARY(fn_greater, >) 64 | KFR_EXPR_BINARY(fn_lessorequal, <=) 65 | KFR_EXPR_BINARY(fn_greaterorequal, >=) 66 | } 67 | -------------------------------------------------------------------------------- /include/kfr/expressions/pointer.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/vec.hpp" 26 | #include "basic.hpp" 27 | #include 28 | 29 | namespace kfr 30 | { 31 | 32 | constexpr size_t maximum_expression_width() { return bitness_const(16, 32); } 33 | 34 | template 35 | using expression_vtable = carray; 36 | 37 | struct dummy_content 38 | { 39 | }; 40 | 41 | struct expression_resource 42 | { 43 | virtual ~expression_resource() {} 44 | virtual void* instance() { return nullptr; } 45 | }; 46 | template 47 | struct alignas(E) expression_resource_impl : expression_resource 48 | { 49 | expression_resource_impl(E&& e) noexcept : e(std::move(e)) {} 50 | virtual ~expression_resource_impl() {} 51 | virtual void* instance() override final { return &e; } 52 | private: 53 | E e; 54 | }; 55 | 56 | template 57 | std::shared_ptr make_resource(E&& e) 58 | { 59 | using T = expression_resource_impl>; 60 | return std::static_pointer_cast( 61 | std::allocate_shared(allocator(), std::move(e))); 62 | } 63 | 64 | template 65 | struct expression_pointer : input_expression 66 | { 67 | using value_type = T; 68 | 69 | static_assert(is_poweroftwo(maxwidth), "N must be a power of two"); 70 | expression_pointer() noexcept : instance(nullptr), vtable(nullptr) {} 71 | expression_pointer(void* instance, const expression_vtable* vtable, 72 | std::shared_ptr resource = nullptr) 73 | : instance(instance), vtable(vtable), resource(std::move(resource)) 74 | { 75 | } 76 | template 77 | KFR_INLINE vec operator()(cinput_t, size_t index, vec_t) const 78 | { 79 | using func_t = simd (*)(void*, size_t); 80 | 81 | static_assert(is_poweroftwo(N), "N must be a power of two"); 82 | constexpr size_t findex = ilog2(N); 83 | static_assert(N <= maxwidth, "N is greater than maxwidth"); 84 | func_t func = reinterpret_cast(vtable->get(csize<2 + findex>)); 85 | vec result = cast(func(instance, index)); 86 | return result; 87 | } 88 | KFR_INLINE void begin_block(size_t size) const 89 | { 90 | using func_t = void (*)(void*, size_t); 91 | func_t func = reinterpret_cast(vtable->get(csize<0>)); 92 | func(instance, size); 93 | } 94 | KFR_INLINE void end_block(size_t size) const 95 | { 96 | using func_t = void (*)(void*, size_t); 97 | func_t func = reinterpret_cast(vtable->get(csize<1>)); 98 | func(instance, size); 99 | } 100 | 101 | private: 102 | void* instance; 103 | const expression_vtable* vtable; 104 | std::shared_ptr resource; 105 | }; 106 | 107 | namespace internal 108 | { 109 | template , 110 | typename NonMemFn = Ret (*)(Fn*, size_t, vec_t)> 111 | KFR_INLINE NonMemFn make_expression_func() 112 | { 113 | return [](Fn* fn, size_t index, vec_t x) { return *(fn->operator()(cinput, index, x)); }; 114 | } 115 | 116 | template 117 | KFR_INLINE NonMemFn make_expression_begin_block() 118 | { 119 | return [](Fn* fn, size_t size) { return fn->begin_block(size); }; 120 | } 121 | template 122 | KFR_INLINE NonMemFn make_expression_end_block() 123 | { 124 | return [](Fn* fn, size_t size) { return fn->end_block(size); }; 125 | } 126 | 127 | template 128 | expression_vtable make_expression_vtable_impl() 129 | { 130 | expression_vtable result; 131 | constexpr size_t size = result.size() - 2; 132 | 133 | result.get(csize<0>) = reinterpret_cast(&internal::make_expression_begin_block>); 134 | result.get(csize<1>) = reinterpret_cast(&internal::make_expression_end_block>); 135 | 136 | cforeach(csizeseq, [&](auto u) { 137 | constexpr size_t N = 1 << val_of(u); 138 | result.get(csize<2 + val_of(u)>) = 139 | reinterpret_cast(internal::make_expression_func>()); 140 | }); 141 | return result; 142 | } 143 | 144 | template 145 | KFR_INLINE expression_vtable* make_expression_vtable() 146 | { 147 | static_assert(is_input_expression::value, "E must be an expression"); 148 | static expression_vtable vtable = internal::make_expression_vtable_impl(); 149 | return &vtable; 150 | } 151 | } 152 | 153 | template , size_t maxwidth = maximum_expression_width()> 154 | KFR_INLINE expression_pointer to_pointer(E& expr) 155 | { 156 | static_assert(is_input_expression::value, "E must be an expression"); 157 | return expression_pointer(std::addressof(expr), 158 | internal::make_expression_vtable()); 159 | } 160 | 161 | template , size_t maxwidth = maximum_expression_width()> 162 | KFR_INLINE expression_pointer to_pointer(E&& expr) 163 | { 164 | static_assert(is_input_expression::value, "E must be an expression"); 165 | std::shared_ptr ptr = make_resource(std::move(expr)); 166 | return expression_pointer( 167 | ptr->instance(), internal::make_expression_vtable(), std::move(ptr)); 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /include/kfr/io/file.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/function.hpp" 26 | #include "../base/univector.hpp" 27 | #include "../base/vec.hpp" 28 | #include 29 | #include 30 | 31 | namespace kfr 32 | { 33 | 34 | namespace internal 35 | { 36 | struct expression_file_base 37 | { 38 | expression_file_base() = delete; 39 | expression_file_base(const expression_file_base&) = delete; 40 | expression_file_base(expression_file_base&&) = default; 41 | expression_file_base(FILE* file) : file(file) {} 42 | ~expression_file_base() { fclose(file); } 43 | bool ok() const { return file != nullptr; } 44 | FILE* file; 45 | }; 46 | 47 | struct expression_sequential_file_writer : expression_file_base, output_expression 48 | { 49 | using expression_file_base::expression_file_base; 50 | template 51 | void operator()(coutput_t, size_t, vec value) 52 | { 53 | write(value); 54 | } 55 | template 56 | void write(const U& value) 57 | { 58 | fwrite(std::addressof(value), 1, sizeof(U), file); 59 | } 60 | }; 61 | 62 | struct expression_sequential_file_reader : expression_file_base, input_expression 63 | { 64 | using expression_file_base::expression_file_base; 65 | template 66 | vec operator()(cinput_t, size_t, vec_t) const 67 | { 68 | vec input = qnan; 69 | read(input); 70 | return input; 71 | } 72 | template 73 | void read(U& value) const 74 | { 75 | fread(std::addressof(value), 1, sizeof(U), file); 76 | } 77 | }; 78 | 79 | template 80 | struct expression_file_writer : expression_file_base, output_expression 81 | { 82 | using expression_file_base::expression_file_base; 83 | template 84 | void operator()(coutput_t, size_t index, vec value) 85 | { 86 | if (position != index) 87 | fseeko(file, static_cast(index * sizeof(T)), SEEK_SET); 88 | const vec output = cast(value); 89 | fwrite(output.data(), sizeof(T), output.size(), file); 90 | position = index + N; 91 | } 92 | size_t position = 0; 93 | }; 94 | 95 | template 96 | struct expression_file_reader : expression_file_base, input_expression 97 | { 98 | using expression_file_base::expression_file_base; 99 | template 100 | vec operator()(cinput_t, size_t index, vec_t) const 101 | { 102 | if (position != index) 103 | fseeko(file, static_cast(index * sizeof(T)), SEEK_SET); 104 | vec input = qnan; 105 | fread(input.data(), sizeof(T), input.size(), file); 106 | position = index + N; 107 | return cast(input); 108 | } 109 | size_t position = 0; 110 | }; 111 | } 112 | 113 | inline internal::expression_sequential_file_reader sequential_file_reader(const std::string& file_name) 114 | { 115 | return internal::expression_sequential_file_reader(fopen(file_name.c_str(), "rb")); 116 | } 117 | inline internal::expression_sequential_file_writer sequential_file_writer(const std::string& file_name) 118 | { 119 | return internal::expression_sequential_file_writer(fopen(file_name.c_str(), "wb")); 120 | } 121 | 122 | template 123 | internal::expression_file_reader file_reader(const std::string& file_name) 124 | { 125 | return internal::expression_file_reader(fopen(file_name.c_str(), "rb")); 126 | } 127 | template 128 | internal::expression_file_writer file_writer(const std::string& file_name) 129 | { 130 | return internal::expression_file_writer(fopen(file_name.c_str(), "wb")); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /include/kfr/io/python_plot.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | #include "../base/vec.hpp" 25 | #include "../cometa/string.hpp" 26 | #include 27 | 28 | #ifdef KFR_OS_WIN 29 | #include 30 | #define cross_getcwd _getcwd 31 | #else 32 | #include 33 | #define cross_getcwd getcwd 34 | #endif 35 | 36 | namespace kfr 37 | { 38 | namespace internal 39 | { 40 | 41 | void python(const std::string& name, const std::string& code) 42 | { 43 | std::string filename; 44 | { 45 | char curdir[1024]; 46 | cross_getcwd(curdir, arraysize(curdir)); 47 | filename = curdir; 48 | } 49 | #ifdef KFR_OS_WIN 50 | const char* slash = "\\"; 51 | #else 52 | const char* slash = "/"; 53 | #endif 54 | filename = filename + slash + name + ".py"; 55 | 56 | FILE* f = fopen(filename.c_str(), "w"); 57 | fwrite(code.c_str(), 1, code.size(), f); 58 | fclose(f); 59 | std::system(("python \"" + filename + "\"").c_str()); 60 | } 61 | } 62 | 63 | static std::string concat_args() { return {}; } 64 | 65 | template 66 | static std::string concat_args(const std::string& left, const Ts&... rest) 67 | { 68 | const std::string right = concat_args(rest...); 69 | return left.empty() ? right : right.empty() ? left : left + ", " + right; 70 | } 71 | 72 | static void plot_show(const std::string& name, const std::string& wavfile, const std::string& options = "") 73 | { 74 | print(name, "..."); 75 | std::string ss; 76 | ss += "#!/usr/bin/env python\n" 77 | "import dspplot\n\n" 78 | "dspplot.plot(" + 79 | concat_args("r'" + wavfile + "'", options) + ")\n"; 80 | 81 | internal::python(name, ss); 82 | print("done\n"); 83 | } 84 | 85 | static void plot_show(const std::string& name, const char* x, const std::string& options = "") 86 | { 87 | plot_show(name, std::string(x), options); 88 | } 89 | 90 | template 91 | void plot_show(const std::string& name, T&& x, const std::string& options = "") 92 | { 93 | print(name, "..."); 94 | auto array = make_array_ref(std::forward(x)); 95 | std::string ss; 96 | ss += "#!/usr/bin/env python\n" 97 | "import dspplot\n\n" 98 | "data = [\n"; 99 | for (size_t i = 0; i < array.size(); i++) 100 | ss += as_string(fmt<'g', 20, 17>(array[i]), ",\n"); 101 | ss += "]\n"; 102 | 103 | ss += "dspplot.plot(" + concat_args("data", options) + ")\n"; 104 | 105 | internal::python(name, ss); 106 | print("done\n"); 107 | } 108 | 109 | template 110 | void plot_save(const std::string& name, T&& x, const std::string& options = "") 111 | { 112 | plot_show(name, std::forward(x), concat_args(options, "file='../svg/" + name + ".svg'")); 113 | } 114 | 115 | template 116 | void perfplot_show(const std::string& name, T1&& data, T2&& labels, const std::string& options = "") 117 | { 118 | print(name, "..."); 119 | auto array = make_array_ref(std::forward(data)); 120 | auto labels_array = make_array_ref(std::forward(labels)); 121 | std::string ss; 122 | ss += "#!/usr/bin/env python\n"; 123 | ss += "import dspplot\n\n"; 124 | ss += "data = [\n"; 125 | for (size_t i = 0; i < array.size(); i++) 126 | { 127 | auto subarray = make_array_ref(array[i]); 128 | ss += "[\n"; 129 | for (size_t i = 0; i < subarray.size(); i++) 130 | ss += as_string(" ", fmt<'g', 20, 17>(subarray[i]), ",\n"); 131 | ss += "],"; 132 | } 133 | ss += "]\n"; 134 | 135 | ss += "labels = [\n"; 136 | for (size_t i = 0; i < labels_array.size(); i++) 137 | { 138 | const std::string label = labels_array[i]; 139 | ss += " '" + label + "',"; 140 | } 141 | ss += "]\n"; 142 | 143 | ss += "dspplot.perfplot(" + concat_args("data, labels", options) + ")\n"; 144 | 145 | internal::python(name, ss); 146 | print("done\n"); 147 | } 148 | 149 | template 150 | void perfplot_save(const std::string& name, T1&& data, T2&& labels, const std::string& options = "") 151 | { 152 | perfplot_show(name, std::forward(data), std::forward(labels), 153 | concat_args(options, "file='../perf/" + name + ".svg'")); 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /include/kfr/io/tostring.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/complex.hpp" 26 | #include "../base/univector.hpp" 27 | #include "../base/vec.hpp" 28 | 29 | namespace cometa 30 | { 31 | 32 | template 33 | inline std::string repr(const kfr::complex& v); 34 | 35 | template 36 | inline std::string repr(kfr::simd v); 37 | 38 | template 39 | inline std::string repr(kfr::vec v); 40 | 41 | template 42 | inline std::string repr(const kfr::univector& v); 43 | } 44 | #include "../cometa/string.hpp" 45 | #include 46 | 47 | namespace cometa 48 | { 49 | 50 | namespace details 51 | { 52 | 53 | constexpr size_t number_width = 9; 54 | constexpr size_t number_precision = 6; 55 | constexpr size_t number_precision_short = 2; 56 | constexpr size_t number_columns = 8; 57 | 58 | template 59 | std::string fmtvalue(std::true_type, const T& x) 60 | { 61 | std::string str = as_string(fmt<'g', number_width, number_precision>(x)); 62 | if (str.size() > number_width) 63 | str = as_string(fmt<'g', number_width, number_precision_short>(x)); 64 | return str; 65 | } 66 | 67 | template 68 | std::string fmtvalue(std::true_type, const kfr::complex& x) 69 | { 70 | std::string restr = as_string(fmt<'g', number_width, number_precision>(x.real())); 71 | if (restr.size() > number_width) 72 | restr = as_string(fmt<'g', number_width, number_precision_short>(x.real())); 73 | 74 | std::string imstr = as_string(fmt<'g', -1, number_precision>(std::abs(x.imag()))); 75 | if (imstr.size() > number_width) 76 | imstr = as_string(fmt<'g', -1, number_precision_short>(std::abs(x.imag()))); 77 | 78 | return restr + (x.imag() < T(0) ? "-" : "+") + padleft(number_width, imstr + "j"); 79 | } 80 | 81 | template 82 | std::string fmtvalue(std::false_type, const T& x) 83 | { 84 | return as_string(fmtwidth(repr(x))); 85 | } 86 | } 87 | 88 | template 89 | inline std::string repr(const kfr::complex& v) 90 | { 91 | return as_string(v.real()) + " + " + as_string(v.imag()) + "j"; 92 | } 93 | 94 | template 95 | inline std::string repr(const T* source, size_t N) 96 | { 97 | std::string str; 98 | for (size_t i = 0; i < N; i++) 99 | { 100 | if (i > 0) 101 | { 102 | if (i % details::number_columns == 0) 103 | str += "\n"; 104 | else 105 | str += " "; 106 | } 107 | str += as_string(details::fmtvalue(std::is_floating_point(), source[i])); 108 | } 109 | return str; 110 | } 111 | 112 | template 113 | inline std::string repr(const kfr::complex* source, size_t N) 114 | { 115 | std::string str; 116 | for (size_t i = 0; i < N; i++) 117 | { 118 | if (i > 0) 119 | { 120 | if (i % (details::number_columns / 2) == 0) 121 | str += "\n"; 122 | else 123 | str += " "; 124 | } 125 | str += as_string(details::fmtvalue(std::true_type{}, source[i])); 126 | } 127 | return str; 128 | } 129 | 130 | template 131 | inline std::string repr(kfr::simd v) 132 | { 133 | return repr(tovec(v)); 134 | } 135 | 136 | template 137 | inline std::string repr(kfr::vec v) 138 | { 139 | return repr(v.data(), v.size()); 140 | } 141 | 142 | template 143 | inline std::string repr(const kfr::univector& v) 144 | { 145 | return repr(v.data(), v.size()); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /include/kfr/math.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "base/vec.hpp" 26 | 27 | #include "base/abs.hpp" 28 | #include "base/asin_acos.hpp" 29 | #include "base/atan.hpp" 30 | #include "base/complex.hpp" 31 | #include "base/constants.hpp" 32 | #include "base/digitreverse.hpp" 33 | #include "base/gamma.hpp" 34 | #include "base/log_exp.hpp" 35 | #include "base/logical.hpp" 36 | #include "base/min_max.hpp" 37 | #include "base/operators.hpp" 38 | #include "base/read_write.hpp" 39 | #include "base/round.hpp" 40 | #include "base/saturation.hpp" 41 | #include "base/select.hpp" 42 | #include "base/shuffle.hpp" 43 | #include "base/sin_cos.hpp" 44 | #include "base/sqrt.hpp" 45 | #include "base/tan.hpp" 46 | #include "kfr/base/hyperbolic.hpp" 47 | 48 | namespace kfr 49 | { 50 | using namespace native; 51 | } 52 | -------------------------------------------------------------------------------- /include/kfr/misc/compiletime.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | #include "../base/constants.hpp" 25 | #include "../base/operators.hpp" 26 | #include "../base/types.hpp" 27 | 28 | namespace kfr 29 | { 30 | 31 | namespace compiletime 32 | { 33 | 34 | template 35 | constexpr inline T select(bool c, T x, T y) 36 | { 37 | return c ? x : y; 38 | } 39 | template 40 | constexpr inline T trunc(T x) 41 | { 42 | return static_cast(static_cast(x)); 43 | } 44 | template 45 | constexpr inline T abs(T x) 46 | { 47 | return x < T() ? -x : x; 48 | } 49 | template 50 | constexpr inline T mulsign(T x, T y) 51 | { 52 | return y < T() ? -x : x; 53 | } 54 | template 55 | constexpr inline T sin(T x) 56 | { 57 | x = x - trunc(x / c_pi) * c_pi; 58 | constexpr T c2 = -0.16665853559970855712890625; 59 | constexpr T c4 = +8.31427983939647674560546875e-3; 60 | constexpr T c6 = -1.85423981747590005397796630859375e-4; 61 | 62 | x -= c_pi; 63 | T y = abs(x); 64 | y = select(y > c_pi, c_pi - y, y); 65 | y = mulsign(y, -x); 66 | 67 | const T y2 = y * y; 68 | T formula = c6; 69 | const T y3 = y2 * y; 70 | formula = fmadd(formula, y2, c4); 71 | formula = fmadd(formula, y2, c2); 72 | formula = formula * y3 + y; 73 | return formula; 74 | } 75 | template 76 | constexpr inline T cos(T x) 77 | { 78 | return sin(x + c_pi); 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /include/kfr/misc/random.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | #include "../base/function.hpp" 25 | #include "../base/operators.hpp" 26 | #include "../base/shuffle.hpp" 27 | #include "../base/vec.hpp" 28 | 29 | namespace kfr 30 | { 31 | 32 | using random_state = u32x4; 33 | 34 | struct seed_from_rdtsc_t 35 | { 36 | }; 37 | 38 | constexpr seed_from_rdtsc_t seed_from_rdtsc{}; 39 | 40 | struct random_bit_generator 41 | { 42 | random_bit_generator(seed_from_rdtsc_t) noexcept 43 | : state(bitcast(make_vector(__builtin_readcyclecounter(), 44 | (__builtin_readcyclecounter() << 11) ^ 0x710686d615e2257bull))) 45 | { 46 | (void)operator()(); 47 | } 48 | constexpr random_bit_generator(u32 x0, u32 x1, u32 x2, u32 x3) noexcept : state(x0, x1, x2, x3) 49 | { 50 | (void)operator()(); 51 | } 52 | constexpr random_bit_generator(u64 x0, u64 x1) noexcept : state(bitcast(make_vector(x0, x1))) 53 | { 54 | (void)operator()(); 55 | } 56 | 57 | inline random_state operator()() 58 | { 59 | constexpr static random_state mul{ 214013u, 17405u, 214013u, 69069u }; 60 | constexpr static random_state add{ 2531011u, 10395331u, 13737667u, 1u }; 61 | state = bitcast(rotateright<3>(bitcast(fmadd(state, mul, add)))); 62 | return state; 63 | } 64 | 65 | protected: 66 | random_state state; 67 | }; 68 | 69 | template 70 | inline vec random_bits(random_bit_generator& gen) 71 | { 72 | return narrow(bitcast(gen())); 73 | } 74 | template sizeof(random_state))> 75 | inline vec random_bits(random_bit_generator& gen) 76 | { 77 | constexpr size_t N2 = prev_poweroftwo(N - 1); 78 | return concat(random_bits(gen), random_bits(gen)); 79 | } 80 | 81 | template ::value)> 82 | inline vec random_uniform(random_bit_generator& gen) 83 | { 84 | return bitcast(random_bits(gen)); 85 | } 86 | 87 | template ::value)> 88 | inline vec randommantissa(random_bit_generator& gen) 89 | { 90 | return bitcast((random_uniform(gen) & 0x7FFFFFu) | 0x3f800000u) + 0.0f; 91 | } 92 | 93 | template ::value)> 94 | inline vec randommantissa(random_bit_generator& gen) 95 | { 96 | return bitcast((random_uniform(gen) & 0x000FFFFFFFFFFFFFull) | 0x3FF0000000000000ull) + 0.0; 97 | } 98 | 99 | template 100 | inline enable_if_f> random_uniform(random_bit_generator& gen) 101 | { 102 | return randommantissa(gen) - 1.f; 103 | } 104 | 105 | template 106 | inline enable_if_f> random_range(random_bit_generator& gen, T min, T max) 107 | { 108 | return mix(random_uniform(gen), min, max); 109 | } 110 | 111 | template 112 | inline enable_if_not_f> random_range(random_bit_generator& gen, T min, T max) 113 | { 114 | using big_type = findinttype::min()), sqr(std::numeric_limits::max())>; 115 | 116 | vec u = random_uniform(gen); 117 | const vec tmp = cast(u); 118 | return cast((tmp * (max - min) + min) >> typebits::bits); 119 | } 120 | 121 | namespace internal 122 | { 123 | template 124 | struct expression_random_uniform : input_expression 125 | { 126 | using value_type = T; 127 | constexpr expression_random_uniform(const random_bit_generator& gen) noexcept : gen(gen) {} 128 | template 129 | vec operator()(cinput_t, size_t, vec_t) const 130 | { 131 | return cast(random_uniform(gen)); 132 | } 133 | mutable random_bit_generator gen; 134 | }; 135 | 136 | template 137 | struct expression_random_range : input_expression 138 | { 139 | using value_type = T; 140 | constexpr expression_random_range(const random_bit_generator& gen, T min, T max) noexcept : gen(gen), 141 | min(min), 142 | max(max) 143 | { 144 | } 145 | 146 | template 147 | vec operator()(cinput_t, size_t, vec_t) const 148 | { 149 | return cast(random_range(gen, min, max)); 150 | } 151 | mutable random_bit_generator gen; 152 | const T min; 153 | const T max; 154 | }; 155 | } 156 | 157 | template 158 | inline internal::expression_random_uniform gen_random_uniform(const random_bit_generator& gen) 159 | { 160 | return internal::expression_random_uniform(gen); 161 | } 162 | 163 | template 164 | inline internal::expression_random_range gen_random_range(const random_bit_generator& gen, T min, T max) 165 | { 166 | return internal::expression_random_range(gen, min, max); 167 | } 168 | 169 | template 170 | inline internal::expression_random_uniform gen_random_uniform() 171 | { 172 | return internal::expression_random_uniform(random_bit_generator(seed_from_rdtsc)); 173 | } 174 | 175 | template 176 | inline internal::expression_random_range gen_random_range(T min, T max) 177 | { 178 | return internal::expression_random_range(random_bit_generator(seed_from_rdtsc), min, max); 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /include/kfr/misc/small_buffer.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/memory.hpp" 26 | #include 27 | #include 28 | 29 | namespace kfr 30 | { 31 | 32 | template 33 | struct small_buffer 34 | { 35 | public: 36 | small_buffer() noexcept : m_size(0), m_data(m_preallocated) {} 37 | 38 | small_buffer(std::size_t size) : small_buffer() { resize(size); } 39 | 40 | friend void swap(small_buffer& first, small_buffer& second) noexcept 41 | { 42 | using std::swap; 43 | 44 | swap(first.m_size, second.m_size); 45 | swap(first.m_data, second.m_data); 46 | swap(first.m_preallocated, second.m_preallocated); 47 | first.m_data = first.m_size <= Capacity ? first.m_preallocated : first.m_data; 48 | second.m_data = second.m_size <= Capacity ? second.m_preallocated : second.m_data; 49 | } 50 | small_buffer(small_buffer&& other) : small_buffer() { swap(other, *this); } 51 | 52 | small_buffer(const small_buffer& other) : small_buffer() { assign(other); } 53 | small_buffer& operator=(small_buffer other) 54 | { 55 | swap(other, *this); 56 | return *this; 57 | } 58 | 59 | ~small_buffer() { clear(); } 60 | 61 | void assign(const small_buffer& other) 62 | { 63 | resize(other.m_size); 64 | std::copy_n(other.m_data, m_size, m_data); 65 | } 66 | 67 | void resize(std::size_t newsize) 68 | { 69 | T* m_newdata; 70 | if (newsize <= Capacity) 71 | { 72 | m_newdata = m_preallocated; 73 | } 74 | else 75 | { 76 | m_newdata = aligned_allocate(newsize); 77 | } 78 | std::copy_n(std::make_move_iterator(m_data), std::min(newsize, m_size), m_newdata); 79 | if (m_data != m_preallocated) 80 | aligned_deallocate(m_data); 81 | m_data = m_newdata; 82 | m_size = newsize; 83 | } 84 | bool empty() const { return !size(); } 85 | std::size_t size() const { return m_size; } 86 | const T* begin() const { return m_data; } 87 | const T* end() const { return m_data + m_size; } 88 | const T* cbegin() const { return m_data; } 89 | const T* cend() const { return m_data + m_size; } 90 | T* begin() { return m_data; } 91 | T* end() { return m_data + m_size; } 92 | void clear() { resize(0); } 93 | const T& front() const { return m_data[0]; } 94 | const T& back() const { return m_data[m_size - 1]; } 95 | T& front() { return m_data[0]; } 96 | T& back() { return m_data[m_size - 1]; } 97 | void pop_back() { resize(m_size - 1); } 98 | T* data() { return m_data; } 99 | const T* data() const { return m_data; } 100 | T& operator[](std::size_t i) { return m_data[i]; } 101 | const T& operator[](std::size_t i) const { return m_data[i]; } 102 | void push_back(const T& value) 103 | { 104 | resize(m_size + 1); 105 | m_data[m_size - 1] = value; 106 | } 107 | 108 | protected: 109 | T m_preallocated[Capacity]; 110 | std::size_t m_size; 111 | T* m_data; 112 | }; 113 | } 114 | -------------------------------------------------------------------------------- /include/kfr/misc/sort.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "../base/min_max.hpp" 26 | #include "../base/shuffle.hpp" 27 | #include "../base/vec.hpp" 28 | 29 | namespace kfr 30 | { 31 | /** 32 | * Sort the elements in the vector in ascending order 33 | * @param x input vector 34 | * @return sorted vector 35 | * @code 36 | * CHECK(sort(make_vector(1000, 1, 2, -10)) == make_vector(-10, 1, 2, 1000)); 37 | * @endcode 38 | */ 39 | template 40 | KFR_INLINE vec sort(vec x) 41 | { 42 | using namespace kfr::native; 43 | constexpr size_t Nhalf = N / 2; 44 | vec e = low(x); 45 | vec o = high(x); 46 | constexpr auto blend0 = cconcat(csizes<1>, csizeseq); 47 | for (size_t i = 0; i < Nhalf; i++) 48 | { 49 | vec t; 50 | t = min(e, o); 51 | o = max(e, o); 52 | o = rotateright<1>(o); 53 | e = t; 54 | t = max(e, o); 55 | o = min(e, o); 56 | e = t; 57 | t = blend(e, o, blend0); 58 | o = blend(o, e, blend0); 59 | o = rotateleft<1>(o); 60 | e = t; 61 | } 62 | return interleavehalfs(concat(e, o)); 63 | } 64 | 65 | /** 66 | * Sort the elements in the vector in descending order 67 | * @param x input vector 68 | * @return sorted vector 69 | * @code 70 | * CHECK(sort(make_vector(1000, 1, 2, -10)) == make_vector(1000, 2, 1, -10)); 71 | * @endcode 72 | */ 73 | template 74 | KFR_INLINE vec sortdesc(vec x) 75 | { 76 | using namespace kfr::native; 77 | constexpr size_t Nhalf = N / 2; 78 | vec e = low(x); 79 | vec o = high(x); 80 | constexpr auto blend0 = cconcat(csizes<1>, csizeseq); 81 | for (size_t i = 0; i < Nhalf; i++) 82 | { 83 | vec t; 84 | t = max(e, o); 85 | o = min(e, o); 86 | o = rotateright<1>(o); 87 | e = t; 88 | t = min(e, o); 89 | o = max(e, o); 90 | e = t; 91 | t = blend(e, o, blend0); 92 | o = blend(o, e, blend0); 93 | o = rotateleft<1>(o); 94 | e = t; 95 | } 96 | return interleavehalfs(concat(e, o)); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /include/kfr/vec.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "base/vec.hpp" 26 | -------------------------------------------------------------------------------- /include/kfr/version.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2016 D Levin (http://www.kfrlib.com) 3 | * This file is part of KFR 4 | * 5 | * KFR is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * KFR is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with KFR. 17 | * 18 | * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. 19 | * Buying a commercial license is mandatory as soon as you develop commercial activities without 20 | * disclosing the source code of your own applications. 21 | * See http://www.kfrlib.com for details. 22 | */ 23 | #pragma once 24 | 25 | #include "base/types.hpp" 26 | #include 27 | 28 | namespace kfr 29 | { 30 | static std::string library_version() 31 | { 32 | return "KFR " + std::string(version_string) + bitness_const(" x86 ", " x86-64 ") + 33 | CID_STRINGIFY(KFR_ARCH_NAME); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /sources.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 D Levin (http://www.kfrlib.com) 2 | # This file is part of KFR 3 | # 4 | # KFR is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # KFR is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with KFR. 16 | 17 | 18 | set( 19 | KFR_SRC 20 | ${PROJECT_SOURCE_DIR}/include/kfr/base/abs.hpp 21 | ${PROJECT_SOURCE_DIR}/include/kfr/base/asin_acos.hpp 22 | ${PROJECT_SOURCE_DIR}/include/kfr/base/atan.hpp 23 | ${PROJECT_SOURCE_DIR}/include/kfr/base/complex.hpp 24 | ${PROJECT_SOURCE_DIR}/include/kfr/base/constants.hpp 25 | ${PROJECT_SOURCE_DIR}/include/kfr/base/digitreverse.hpp 26 | ${PROJECT_SOURCE_DIR}/include/kfr/base/dispatch.hpp 27 | ${PROJECT_SOURCE_DIR}/include/kfr/base/expression.hpp 28 | ${PROJECT_SOURCE_DIR}/include/kfr/base/function.hpp 29 | ${PROJECT_SOURCE_DIR}/include/kfr/base/gamma.hpp 30 | ${PROJECT_SOURCE_DIR}/include/kfr/base/log_exp.hpp 31 | ${PROJECT_SOURCE_DIR}/include/kfr/base/logical.hpp 32 | ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp 33 | ${PROJECT_SOURCE_DIR}/include/kfr/base/min_max.hpp 34 | ${PROJECT_SOURCE_DIR}/include/kfr/base/operators.hpp 35 | ${PROJECT_SOURCE_DIR}/include/kfr/base/read_write.hpp 36 | ${PROJECT_SOURCE_DIR}/include/kfr/base/round.hpp 37 | ${PROJECT_SOURCE_DIR}/include/kfr/base/saturation.hpp 38 | ${PROJECT_SOURCE_DIR}/include/kfr/base/select.hpp 39 | ${PROJECT_SOURCE_DIR}/include/kfr/base/shuffle.hpp 40 | ${PROJECT_SOURCE_DIR}/include/kfr/base/sin_cos.hpp 41 | ${PROJECT_SOURCE_DIR}/include/kfr/base/sinh_cosh.hpp 42 | ${PROJECT_SOURCE_DIR}/include/kfr/base/sqrt.hpp 43 | ${PROJECT_SOURCE_DIR}/include/kfr/base/tan.hpp 44 | ${PROJECT_SOURCE_DIR}/include/kfr/base/types.hpp 45 | ${PROJECT_SOURCE_DIR}/include/kfr/base/univector.hpp 46 | ${PROJECT_SOURCE_DIR}/include/kfr/base/vec.hpp 47 | ${PROJECT_SOURCE_DIR}/include/kfr/data/bitrev.hpp 48 | ${PROJECT_SOURCE_DIR}/include/kfr/data/sincos.hpp 49 | ${PROJECT_SOURCE_DIR}/include/kfr/dft/bitrev.hpp 50 | ${PROJECT_SOURCE_DIR}/include/kfr/dft/fft.hpp 51 | ${PROJECT_SOURCE_DIR}/include/kfr/dft/ft.hpp 52 | ${PROJECT_SOURCE_DIR}/include/kfr/dft/reference_dft.hpp 53 | ${PROJECT_SOURCE_DIR}/include/kfr/dispatch/cpuid.hpp 54 | ${PROJECT_SOURCE_DIR}/include/kfr/dispatch/runtimedispatch.hpp 55 | ${PROJECT_SOURCE_DIR}/include/kfr/expressions/basic.hpp 56 | ${PROJECT_SOURCE_DIR}/include/kfr/expressions/conversion.hpp 57 | ${PROJECT_SOURCE_DIR}/include/kfr/expressions/generators.hpp 58 | ${PROJECT_SOURCE_DIR}/include/kfr/expressions/operators.hpp 59 | ${PROJECT_SOURCE_DIR}/include/kfr/expressions/pointer.hpp 60 | ${PROJECT_SOURCE_DIR}/include/kfr/expressions/reduce.hpp 61 | ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp 62 | ${PROJECT_SOURCE_DIR}/include/kfr/io/file.hpp 63 | ${PROJECT_SOURCE_DIR}/include/kfr/io/python_plot.hpp 64 | ${PROJECT_SOURCE_DIR}/include/kfr/io/tostring.hpp 65 | ${PROJECT_SOURCE_DIR}/include/kfr/math.hpp 66 | ${PROJECT_SOURCE_DIR}/include/kfr/misc/compiletime.hpp 67 | ${PROJECT_SOURCE_DIR}/include/kfr/misc/random.hpp 68 | ${PROJECT_SOURCE_DIR}/include/kfr/misc/small_buffer.hpp 69 | ${PROJECT_SOURCE_DIR}/include/kfr/misc/sort.hpp 70 | ${PROJECT_SOURCE_DIR}/include/kfr/vec.hpp 71 | ${PROJECT_SOURCE_DIR}/include/kfr/version.hpp 72 | ${PROJECT_SOURCE_DIR}/include/kfr/base/kfr.h 73 | ${PROJECT_SOURCE_DIR}/include/kfr/base/intrinsics.h 74 | ${PROJECT_SOURCE_DIR}/include/kfr/cometa.hpp 75 | ${PROJECT_SOURCE_DIR}/include/kfr/cometa/string.hpp 76 | 77 | ${PROJECT_SOURCE_DIR}/tests/testo/testo.hpp 78 | ${PROJECT_SOURCE_DIR}/tests/testo/print_colored.hpp 79 | ) 80 | -------------------------------------------------------------------------------- /syntax-check.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import fnmatch 5 | import subprocess 6 | import os 7 | import sys 8 | 9 | path = os.path.dirname(os.path.realpath(__file__)) 10 | 11 | filenames = [] 12 | for root, dirnames, files in os.walk(os.path.join(path, 'include')): 13 | for filename in fnmatch.filter(files, '*.hpp'): 14 | filenames.append(os.path.join(root, filename)) 15 | 16 | 17 | target = "" 18 | if sys.platform.startswith('win32'): 19 | target = "--target=x86_64-w64-windows-gnu" 20 | 21 | fails = 0 22 | for filename in filenames: 23 | print(filename, '...') 24 | c = subprocess.call(["clang", "-fsyntax-only", filename, "-std=c++14", "-I"+os.path.join(path, "include"), "-Wno-pragma-once-outside-header", target]) 25 | if c != 0: 26 | fails+=1 27 | 28 | exit(fails) 29 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 D Levin (http://www.kfrlib.com) 2 | # This file is part of KFR 3 | # 4 | # KFR is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # KFR is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with KFR. 16 | 17 | 18 | cmake_minimum_required(VERSION 3.0) 19 | 20 | if (NOT MSVC) 21 | add_compile_options(-fno-exceptions -fno-rtti -ftemplate-backtrace-limit=0 -march=native) 22 | link_libraries(stdc++ pthread m) 23 | else () 24 | add_compile_options(/arch:AVX) 25 | endif () 26 | 27 | include_directories(../include) 28 | 29 | add_executable(dft_test dft_test.cpp ${KFR_SRC}) 30 | 31 | enable_testing() 32 | 33 | add_test(NAME dft_test 34 | COMMAND ${PROJECT_BINARY_DIR}/tests/dft_test) -------------------------------------------------------------------------------- /tests/dft_test.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * KFR (http://kfrlib.com) 3 | * Copyright (C) 2016 D Levin 4 | * See LICENSE.txt for details 5 | */ 6 | 7 | // library_version() 8 | #include 9 | 10 | #include 11 | 12 | #include "testo/testo.hpp" 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | using namespace kfr; 25 | 26 | TEST(fft_accuracy) 27 | { 28 | testo::active_test()->show_progress = true; 29 | random_bit_generator gen(2247448713, 915890490, 864203735, 2982561); 30 | 31 | testo::matrix(named("type") = ctypes, // 32 | named("inverse") = std::make_tuple(false, true), // 33 | named("log2(size)") = make_range(1, 21), // 34 | [&gen](auto type, bool inverse, size_t log2size) { 35 | using float_type = type_of; 36 | const size_t size = 1 << log2size; 37 | 38 | univector> in = 39 | typed(gen_random_range(gen, -1.0, +1.0), size * 2); 40 | univector> out = in; 41 | univector> refout = out; 42 | const dft_plan dft(size); 43 | univector temp(dft.temp_size); 44 | 45 | reference_dft(refout.data(), in.data(), size, inverse); 46 | dft.execute(out, out, temp, inverse); 47 | 48 | const float_type rms_diff = rms(cabs(refout - out)); 49 | const double ops = log2size * 100; 50 | const double epsilon = std::numeric_limits::epsilon(); 51 | CHECK(rms_diff < epsilon * ops); 52 | }); 53 | } 54 | 55 | int main(int argc, char** argv) 56 | { 57 | println(library_version()); 58 | 59 | return testo::run_all("", true); 60 | } 61 | -------------------------------------------------------------------------------- /tests/testo/print_colored.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #if defined(_WIN32) 5 | #include 6 | #endif 7 | 8 | namespace print_colored 9 | { 10 | 11 | enum text_color : uint32_t 12 | { 13 | Black = 0x00, 14 | DarkBlue = 0x01, 15 | DarkGreen = 0x02, 16 | DarkCyan = 0x03, 17 | DarkRed = 0x04, 18 | DarkMagenta = 0x05, 19 | DarkYellow = 0x06, 20 | LightGrey = 0x07, 21 | Gray = 0x08, 22 | Blue = 0x09, 23 | Green = 0x0A, 24 | Cyan = 0x0B, 25 | Red = 0x0C, 26 | Magenta = 0x0D, 27 | Yellow = 0x0E, 28 | White = 0x0F, 29 | BgBlack = 0x00, 30 | BgDarkBlue = 0x10, 31 | BgDarkGreen = 0x20, 32 | BgDarkCyan = 0x30, 33 | BgDarkRed = 0x40, 34 | BgDarkMagenta = 0x50, 35 | BgDarkYellow = 0x60, 36 | BgLightGrey = 0x70, 37 | BgGray = 0x80, 38 | BgBlue = 0x90, 39 | BgGreen = 0xA0, 40 | BgCyan = 0xB0, 41 | BgRed = 0xC0, 42 | BgMagenta = 0xD0, 43 | BgYellow = 0xE0, 44 | BgWhite = 0xF0, 45 | 46 | Normal = BgBlack | LightGrey 47 | }; 48 | 49 | enum console_buffer 50 | { 51 | ConsoleStdOutput, 52 | ConsoleStdError 53 | }; 54 | 55 | #if defined(_WIN32) 56 | typedef HANDLE console_handle_t; 57 | 58 | inline console_handle_t console_handle(console_buffer console = ConsoleStdOutput) 59 | { 60 | static HANDLE con_out = ::GetStdHandle(STD_OUTPUT_HANDLE); 61 | static HANDLE con_err = ::GetStdHandle(STD_ERROR_HANDLE); 62 | return console == ConsoleStdOutput ? con_out : con_err; 63 | } 64 | 65 | #endif 66 | 67 | struct console_color 68 | { 69 | public: 70 | console_color(text_color c, console_buffer console = ConsoleStdOutput) 71 | : m_old(get(console)), m_console(console) 72 | { 73 | set(c, m_console); 74 | } 75 | 76 | ~console_color() { set(m_old, m_console); } 77 | 78 | private: 79 | text_color get(console_buffer console = ConsoleStdOutput) 80 | { 81 | #ifdef _WIN32 82 | CONSOLE_SCREEN_BUFFER_INFO info; 83 | ::GetConsoleScreenBufferInfo(console_handle(console), &info); 84 | return static_cast(info.wAttributes & 0xFF); 85 | #else 86 | return static_color(); 87 | #endif 88 | } 89 | 90 | void set(text_color new_color, console_buffer console = ConsoleStdOutput) 91 | { 92 | #ifdef _WIN32 93 | ::SetConsoleTextAttribute(console_handle(console), static_cast(new_color)); 94 | #else 95 | if (new_color != Normal) 96 | { 97 | uint8_t t = new_color & 0xF; 98 | uint8_t b = (new_color & 0xF0) >> 4; 99 | uint8_t tnum = 30 + ((t & 1) << 2 | (t & 2) | (t & 4) >> 2); 100 | uint8_t bnum = 40 + ((b & 1) << 2 | (b & 2) | (b & 4) >> 2); 101 | if (t & 8) 102 | tnum += 60; 103 | if (b & 8) 104 | bnum += 60; 105 | printf("\x1B[%d;%dm", tnum, bnum); 106 | } 107 | else 108 | { 109 | printf("\x1B[0m"); 110 | } 111 | static_color() = new_color; 112 | #endif 113 | } 114 | 115 | text_color m_old; 116 | console_buffer m_console; 117 | #ifndef _WIN32 118 | static text_color& static_color() 119 | { 120 | static text_color color = Normal; 121 | return color; 122 | } 123 | #endif 124 | }; 125 | 126 | template 127 | struct colored_text_tpl : public console_color 128 | { 129 | public: 130 | colored_text_tpl() : console_color(color, console) {} 131 | 132 | private: 133 | }; 134 | 135 | typedef colored_text_tpl darkblue_text; 136 | typedef colored_text_tpl darkgreen_text; 137 | typedef colored_text_tpl darkcyan_text; 138 | typedef colored_text_tpl darkred_text; 139 | typedef colored_text_tpl darkmagenta_text; 140 | typedef colored_text_tpl darkyellow_text; 141 | typedef colored_text_tpl lightgrey_text; 142 | typedef colored_text_tpl gray_text; 143 | typedef colored_text_tpl blue_text; 144 | typedef colored_text_tpl green_text; 145 | typedef colored_text_tpl cyan_text; 146 | typedef colored_text_tpl red_text; 147 | typedef colored_text_tpl magenta_text; 148 | typedef colored_text_tpl yellow_text; 149 | typedef colored_text_tpl white_text; 150 | } 151 | --------------------------------------------------------------------------------