├── .clang-format
├── .gitignore
├── .travis.yml
├── CMakeLists.txt
├── LICENSE.txt
├── README.md
├── build.py
├── format-all.py
├── img
    └── fft_performance.png
├── include
    └── kfr
    │   ├── all.hpp
    │   ├── base
    │       ├── abs.hpp
    │       ├── asin_acos.hpp
    │       ├── atan.hpp
    │       ├── complex.hpp
    │       ├── constants.hpp
    │       ├── digitreverse.hpp
    │       ├── dispatch.hpp
    │       ├── expression.hpp
    │       ├── function.hpp
    │       ├── gamma.hpp
    │       ├── hyperbolic.hpp
    │       ├── intrinsics.h
    │       ├── kfr.h
    │       ├── log_exp.hpp
    │       ├── logical.hpp
    │       ├── memory.hpp
    │       ├── min_max.hpp
    │       ├── operators.hpp
    │       ├── read_write.hpp
    │       ├── round.hpp
    │       ├── saturation.hpp
    │       ├── select.hpp
    │       ├── shuffle.hpp
    │       ├── sin_cos.hpp
    │       ├── sinh_cosh.hpp
    │       ├── specializations.i
    │       ├── sqrt.hpp
    │       ├── tan.hpp
    │       ├── types.hpp
    │       ├── univector.hpp
    │       └── vec.hpp
    │   ├── cident.h
    │   ├── cometa.hpp
    │   ├── cometa
    │       └── string.hpp
    │   ├── data
    │       ├── bitrev.hpp
    │       └── sincos.hpp
    │   ├── dft
    │       ├── bitrev.hpp
    │       ├── conv.hpp
    │       ├── fft.hpp
    │       ├── ft.hpp
    │       └── reference_dft.hpp
    │   ├── dispatch
    │       ├── cpuid.hpp
    │       ├── cpuid_auto.hpp
    │       └── runtimedispatch.hpp
    │   ├── expressions
    │       ├── basic.hpp
    │       ├── conversion.hpp
    │       ├── generators.hpp
    │       ├── operators.hpp
    │       ├── pointer.hpp
    │       └── reduce.hpp
    │   ├── io
    │       ├── audiofile.hpp
    │       ├── file.hpp
    │       ├── python_plot.hpp
    │       └── tostring.hpp
    │   ├── math.hpp
    │   ├── misc
    │       ├── compiletime.hpp
    │       ├── random.hpp
    │       ├── small_buffer.hpp
    │       └── sort.hpp
    │   ├── vec.hpp
    │   └── version.hpp
├── sources.cmake
├── syntax-check.py
└── tests
    ├── CMakeLists.txt
    ├── dft_test.cpp
    └── testo
        ├── print_colored.hpp
        └── testo.hpp


/.clang-format:
--------------------------------------------------------------------------------
 1 | UseTab: Never
 2 | IndentWidth: 4
 3 | Language : Cpp
 4 | BreakBeforeBraces: Allman
 5 | MaxEmptyLinesToKeep: 1
 6 | IndentCaseLabels: false
 7 | NamespaceIndentation: None
 8 | AccessModifierOffset: -4
 9 | SpacesInParentheses: false
10 | SpaceInEmptyParentheses: false
11 | SpacesInCStyleCastParentheses: false
12 | PointerAlignment: Left
13 | Cpp11BracedListStyle: false
14 | AllowShortIfStatementsOnASingleLine: false
15 | AllowShortFunctionsOnASingleLine : true
16 | AlignOperands: true
17 | Standard: Cpp11
18 | IndentCaseLabels: false
19 | AlignTrailingComments : false
20 | ConstructorInitializerAllOnOneLineOrOnePerLine : false
21 | ColumnLimit: 110
22 | BinPackParameters : true
23 | BinPackArguments : true
24 | AlwaysBreakTemplateDeclarations : true
25 | AlignConsecutiveAssignments : true
26 | PenaltyReturnTypeOnItsOwnLine: 50000
27 | CommentPragmas: '^ >>>'
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.obj
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 | 
16 | # Fortran module files
17 | *.mod
18 | *.smod
19 | 
20 | # Compiled Static libraries
21 | *.lai
22 | *.la
23 | *.a
24 | *.lib
25 | 
26 | # Executables
27 | *.exe
28 | *.out
29 | *.app
30 | 
31 | # CMake files
32 | CMakeCache.txt
33 | CMakeFiles
34 | CMakeScripts
35 | Makefile
36 | cmake_install.cmake
37 | install_manifest.txt
38 | CTestTestfile.cmake
39 | 
40 | # build directory
41 | build/
42 | 
43 | # test directory
44 | svg/
45 | 
46 | # Byte-compiled / optimized / DLL files
47 | __pycache__/
48 | *.py[cod]
49 | *$py.class
50 | 
51 | # Distribution / packaging
52 | .Python
53 | env/
54 | build/
55 | develop-eggs/
56 | dist/
57 | downloads/
58 | eggs/
59 | .eggs/
60 | lib/
61 | lib64/
62 | parts/
63 | sdist/
64 | var/
65 | *.egg-info/
66 | .installed.cfg
67 | *.egg
68 | 
69 | # Sphinx documentation
70 | docs/_build/
71 | 
72 | # CLion
73 | .idea/
74 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: required
 2 | dist: precise
 3 | language: cpp
 4 | 
 5 | matrix:
 6 |   include:
 7 |     - compiler: clang
 8 |       addons:
 9 |         apt:
10 |           sources:
11 |             - ubuntu-toolchain-r-test
12 |             - llvm-toolchain-precise-3.8
13 |             - george-edison55-precise-backports
14 |           packages:
15 |             - cmake
16 |             - cmake-data
17 |             - g++-5
18 |             - clang-3.8
19 |       env:
20 |         - CXXCOMPILER=clang++-3.8 CCOMPILER=clang-3.8
21 | 
22 | before_install:
23 |   - sudo apt-get update -qq
24 | script:
25 |   - mkdir build
26 |   - cd build
27 |   - cmake -DCMAKE_CXX_COMPILER=$CXXCOMPILER -DCMAKE_C_COMPILER=$CCOMPILER -DCMAKE_BUILD_TYPE=Release .. && make
28 |   - cd tests
29 |   - ctest
30 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 2 | # This file is part of KFR
 3 | # 
 4 | # KFR is free software: you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | # 
 9 | # KFR is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | # 
14 | # You should have received a copy of the GNU General Public License
15 | # along with KFR.
16 | 
17 | 
18 | cmake_minimum_required(VERSION 3.0)
19 | 
20 | set(OPT_BITNESS "") # cmake -DOPT_BITNESS="-m32" or -m64
21 | set(OPT_STATIC "") # cmake -DOPT_STATIC="-static"
22 | 
23 | if (CMAKE_BUILD_TYPE_INITIALIZED_TO_DEFAULT)
24 |     set(CMAKE_BUILD_TYPE Release)
25 | endif ()
26 | if (${CMAKE_GENERATOR} STREQUAL "MinGW Makefiles" OR ${CMAKE_GENERATOR} STREQUAL "MSYS Makefiles")
27 |     set(OPT_TARGET "--target=x86_64-w64-windows-gnu")
28 |     set(CMAKE_CXX_COMPILER clang++)
29 |     set(CMAKE_C_COMPILER clang)
30 | else ()
31 |     set(OPT_TARGET "") # default target
32 | endif ()
33 | 
34 | if (${CMAKE_GENERATOR} STREQUAL "Visual Studio 14 2015")
35 |     set(CMAKE_GENERATOR_TOOLSET LLVM-vs2014)
36 | endif ()
37 | set(CMAKE_CXX_FLAGS "${OPT_TARGET} ${OPT_BITNESS} ${OPT_STATIC}" CACHE STRING "compile flags" FORCE)
38 | set(CMAKE_C_FLAGS "${OPT_TARGET} ${OPT_BITNESS} ${OPT_STATIC}" CACHE STRING "compile flags" FORCE)
39 | #set(CMAKE_EXE_LINKER_FLAGS "${OPT_TARGET} ${OPT_BITNESS}")
40 | #set(CMAKE_SHARED_LINKER_FLAGS "${OPT_TARGET} ${OPT_BITNESS}")
41 | #set(CMAKE_STATIC_LINKER_FLAGS "${OPT_TARGET} ${OPT_BITNESS}")
42 | 
43 | project(kfr)
44 | 
45 | include(sources.cmake)
46 | 
47 | set(ALL_WARNINGS -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c99-extensions -Wno-padded)
48 | 
49 | if (NOT MSVC)
50 |     add_compile_options(-std=c++1y)
51 | else ()
52 |     add_compile_options(/EHsc /D_HAS_EXCEPTIONS=0)
53 | endif ()
54 | 
55 | add_subdirectory(tests)
56 | 
57 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # kfr-fft
 2 | Highly optimized FFT
 3 | 
 4 | KFR is a fast, modern C++ DSP framework, DFT/FFT, Audio resampling, FIR/IIR Filtering, Biquad, vector functions (SSE, AVX)
 5 | 
 6 | ## Features
 7 | 
 8 | * FFT is optimized for SSE2, SSE3, SSE4.x, AVX and AVX2 processors
 9 | * Both double and single precision
10 | 
11 | ## Performace
12 | 
13 | FFT (double precision, sizes from 1024 to 16777216)
14 | See [fft benchmark](https://github.com/kfrlib/fft-benchmark) for details about benchmarking process.
15 | 
16 | ![FFT Performance](img/fft_performance.png)
17 | 
18 | ## Prerequisities
19 | 
20 | * macOS: XCode 6.3, 6.4, 7.x, 8.x
21 | * Windows: MinGW 5.2 and Clang 3.7 or newer
22 | * Ubuntu: GCC 5.1 and Clang 3.7 or newer
23 | * CoMeta metaprogramming library (already included)
24 | 
25 | ## Tests
26 | 
27 | Execute `build.py` to run the tests or run tests manually from the `tests` directory
28 | 
29 | Tested on the following systems:
30 | 
31 | * OS X 10.11.4 / AppleClang 7.3.0.7030031
32 | * Ubuntu 14.04 / gcc-5 (Ubuntu 5.3.0-3ubuntu1~14.04) 5.3.0 20151204 / clang version 3.8.0 (tags/RELEASE_380/final)
33 | * Windows 8.1 / MinGW-W64 / clang version 3.8.0 (branches/release_38)
34 | 
35 | ## Planned for future versions
36 | 
37 | * DFT for any lengths (not only powers of two)
38 | 
39 | ## License
40 | 
41 | KFR is dual-licensed, available under both commercial and open-source GPL license.
42 | 
43 | If you want to use KFR in commercial product or a closed-source project, you need to [purchase a Commercial License](http://kfrlib.com/purchase-license)
44 | 


--------------------------------------------------------------------------------
/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 4 | # This file is part of KFR
 5 | # 
 6 | # KFR is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | # 
11 | # KFR is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | # GNU General Public License for more details.
15 | # 
16 | # You should have received a copy of the GNU General Public License
17 | # along with KFR.
18 | 
19 | 
20 | from __future__ import print_function
21 | 
22 | import os
23 | import subprocess
24 | import sys
25 | 
26 | path = os.path.dirname(os.path.realpath(__file__))
27 | build_dir = os.path.join(path, 'build')
28 | 
29 | try:
30 |     os.makedirs(build_dir)
31 | except:
32 |     pass
33 | 
34 | print('Checking clang...', end=' ')
35 | if subprocess.call(['clang', '--version'], stdout=subprocess.PIPE): 
36 |     raise Exception('clang is not on your PATH')
37 | print('ok')
38 | print('Checking clang++...', end=' ')
39 | if subprocess.call(['clang++', '--version'], stdout=subprocess.PIPE): 
40 |     raise Exception('clang++ is not on your PATH')
41 | print('ok')
42 | 
43 | if sys.platform.startswith('win32'):
44 |     generator = 'MinGW Makefiles'    
45 | else:
46 |     generator = 'Unix Makefiles'
47 | 
48 | options = [
49 |     '-DCMAKE_BUILD_TYPE=Release',
50 |     ]
51 | 
52 | if subprocess.call(['cmake', '-G', generator, '..'] + options, cwd=build_dir): raise Exception('Can\'t make project')
53 | if subprocess.call(['cmake', '--build', '.', '--', '-j4'], cwd=build_dir): raise Exception('Can\'t build project')
54 | if subprocess.call(['ctest'], cwd=os.path.join(build_dir, 'tests')): raise Exception('Can\'t test project')
55 | 


--------------------------------------------------------------------------------
/format-all.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | 
 4 | import fnmatch
 5 | import os
 6 | import subprocess
 7 | import sys
 8 | import glob
 9 | 
10 | path = os.path.dirname(os.path.realpath(__file__))
11 | 
12 | filenames = []
13 | for root, dirnames, files in os.walk(path, path):
14 |     for filename in fnmatch.filter(files, '*.hpp'):
15 |         filenames.append(os.path.join(root, filename))
16 |     for filename in fnmatch.filter(files, '*.h'):
17 |         filenames.append(os.path.join(root, filename))
18 |     for filename in fnmatch.filter(files, '*.cpp'):
19 |         filenames.append(os.path.join(root, filename))
20 | 
21 | for filename in filenames:
22 |     print( filename, '...' )
23 |     subprocess.call(['clang-format', '-i', filename])
24 |     # Fix clang-format bug: https://llvm.org/bugs/show_bug.cgi?id=26125
25 |     for tmp_file in glob.glob(filename+'*.tmp'):
26 |         os.remove(tmp_file)
27 | 


--------------------------------------------------------------------------------
/img/fft_performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfrlib/fft/6b6a9315d8b690766b6e121611f68727ea7b2112/img/fft_performance.png


--------------------------------------------------------------------------------
/include/kfr/all.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | 
24 | #include "cometa/string.hpp"
25 | 
26 | #include "base/abs.hpp"
27 | #include "base/asin_acos.hpp"
28 | #include "base/atan.hpp"
29 | #include "base/complex.hpp"
30 | #include "base/constants.hpp"
31 | #include "base/digitreverse.hpp"
32 | #include "base/dispatch.hpp"
33 | #include "base/function.hpp"
34 | #include "base/gamma.hpp"
35 | #include "base/hyperbolic.hpp"
36 | #include "base/log_exp.hpp"
37 | #include "base/logical.hpp"
38 | #include "base/memory.hpp"
39 | #include "base/min_max.hpp"
40 | #include "base/operators.hpp"
41 | #include "base/read_write.hpp"
42 | #include "base/round.hpp"
43 | #include "base/saturation.hpp"
44 | #include "base/select.hpp"
45 | #include "base/shuffle.hpp"
46 | #include "base/sin_cos.hpp"
47 | #include "base/sqrt.hpp"
48 | #include "base/tan.hpp"
49 | #include "base/types.hpp"
50 | #include "base/univector.hpp"
51 | #include "base/vec.hpp"
52 | #include "dispatch/cpuid.hpp"
53 | #include "dispatch/runtimedispatch.hpp"
54 | #include "expressions/basic.hpp"
55 | #include "expressions/conversion.hpp"
56 | #include "expressions/generators.hpp"
57 | #include "expressions/operators.hpp"
58 | #include "expressions/pointer.hpp"
59 | #include "expressions/reduce.hpp"
60 | #include "version.hpp"
61 | 
62 | #include "misc/compiletime.hpp"
63 | #include "misc/random.hpp"
64 | #include "misc/small_buffer.hpp"
65 | #include "misc/sort.hpp"
66 | 
67 | #include "data/bitrev.hpp"
68 | #include "data/sincos.hpp"
69 | #include "dsp/biquad.hpp"
70 | #include "dsp/biquad_design.hpp"
71 | #include "dsp/fir.hpp"
72 | #include "dsp/fir_design.hpp"
73 | #include "dsp/fracdelay.hpp"
74 | #include "dsp/goertzel.hpp"
75 | #include "dsp/impulse.hpp"
76 | #include "dsp/interpolation.hpp"
77 | #include "dsp/oscillators.hpp"
78 | #include "dsp/resample.hpp"
79 | #include "dsp/speaker.hpp"
80 | #include "dsp/units.hpp"
81 | #include "dsp/waveshaper.hpp"
82 | #include "dsp/weighting.hpp"
83 | #include "dsp/window.hpp"
84 | #include "io/audiofile.hpp"
85 | #include "io/file.hpp"
86 | #include "io/python_plot.hpp"
87 | #include "io/tostring.hpp"
88 | #include "math.hpp"
89 | #include "vec.hpp"
90 | 
91 | #include "dft/bitrev.hpp"
92 | #include "dft/conv.hpp"
93 | #include "dft/fft.hpp"
94 | #include "dft/ft.hpp"
95 | #include "dft/reference_dft.hpp"
96 | 


--------------------------------------------------------------------------------
/include/kfr/base/abs.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "function.hpp"
 26 | #include "operators.hpp"
 27 | #include "select.hpp"
 28 | 
 29 | #pragma clang diagnostic push
 30 | #if CID_HAS_WARNING("-Winaccessible-base")
 31 | #pragma clang diagnostic ignored "-Winaccessible-base"
 32 | #endif
 33 | 
 34 | namespace kfr
 35 | {
 36 | 
 37 | namespace internal
 38 | {
 39 | 
 40 | template <cpu_t cpu = cpu_t::native>
 41 | struct in_abs : in_abs<older(cpu)>
 42 | {
 43 |     struct fn_abs : in_abs<older(cpu)>::fn_abs, fn_disabled
 44 |     {
 45 |     };
 46 | };
 47 | 
 48 | template <>
 49 | struct in_abs<cpu_t::sse2> : in_select<cpu_t::sse2>
 50 | {
 51 |     constexpr static cpu_t cpu = cpu_t::sse2;
 52 | 
 53 | private:
 54 |     using in_select<cpu_t::sse2>::select;
 55 | 
 56 | public:
 57 |     template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
 58 |     KFR_SINTRIN vec<T, N> abs(vec<T, N> value)
 59 |     {
 60 |         return select(value >= T(), value, -value);
 61 |     }
 62 |     template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
 63 |     KFR_SINTRIN vec<T, N> abs(vec<T, N> value)
 64 |     {
 65 |         return value & invhighbitmask<T>;
 66 |     }
 67 | 
 68 |     KFR_HANDLE_ALL(abs)
 69 |     KFR_HANDLE_SCALAR(abs)
 70 |     KFR_SPEC_FN(in_abs, abs)
 71 | };
 72 | 
 73 | template <>
 74 | struct in_abs<cpu_t::ssse3> : in_abs<cpu_t::sse2>, in_select<cpu_t::sse2>
 75 | {
 76 |     constexpr static cpu_t cpu = cpu_t::ssse3;
 77 | 
 78 | private:
 79 |     using in_select<cpu_t::sse2>::select;
 80 | 
 81 | public:
 82 |     template <size_t N>
 83 |     KFR_SINTRIN vec<i64, N> abs(vec<i64, N> value)
 84 |     {
 85 |         return select(value >= 0, value, -value);
 86 |     }
 87 | 
 88 |     KFR_CPU_INTRIN(ssse3) i32sse abs(i32sse value) { return _mm_abs_epi32(*value); }
 89 |     KFR_CPU_INTRIN(ssse3) i16sse abs(i16sse value) { return _mm_abs_epi16(*value); }
 90 |     KFR_CPU_INTRIN(ssse3) i8sse abs(i8sse value) { return _mm_abs_epi8(*value); }
 91 | 
 92 |     template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
 93 |     KFR_SINTRIN vec<T, N> abs(vec<T, N> value)
 94 |     {
 95 |         return value & invhighbitmask<T>;
 96 |     }
 97 | 
 98 |     KFR_HANDLE_ALL(abs)
 99 |     KFR_HANDLE_SCALAR(abs)
100 |     KFR_SPEC_FN(in_abs, abs)
101 | };
102 | 
103 | template <>
104 | struct in_abs<cpu_t::avx2> : in_abs<cpu_t::ssse3>
105 | {
106 |     constexpr static cpu_t cpu = cpu_t::avx2;
107 |     using in_abs<cpu_t::ssse3>::abs;
108 | 
109 |     KFR_CPU_INTRIN(avx2) i32avx abs(i32avx value) { return _mm256_abs_epi32(*value); }
110 |     KFR_CPU_INTRIN(avx2) i16avx abs(i16avx value) { return _mm256_abs_epi16(*value); }
111 |     KFR_CPU_INTRIN(avx2) i8avx abs(i8avx value) { return _mm256_abs_epi8(*value); }
112 | 
113 |     KFR_HANDLE_ALL(abs)
114 |     KFR_HANDLE_SCALAR(abs)
115 |     KFR_SPEC_FN(in_abs, abs)
116 | };
117 | }
118 | 
119 | namespace native
120 | {
121 | using fn_abs = internal::in_abs<>::fn_abs;
122 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
123 | 
124 | KFR_INTRIN ftype<T1> abs(const T1& x)
125 | {
126 |     return internal::in_abs<>::abs(x);
127 | }
128 | 
129 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
130 | 
131 | KFR_INTRIN expr_func<fn_abs, E1> abs(E1&& x)
132 | {
133 |     return { fn_abs(), std::forward<E1>(x) };
134 | }
135 | }
136 | }
137 | 
138 | #pragma clang diagnostic pop
139 | 


--------------------------------------------------------------------------------
/include/kfr/base/asin_acos.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "abs.hpp"
 26 | #include "atan.hpp"
 27 | #include "constants.hpp"
 28 | #include "function.hpp"
 29 | #include "min_max.hpp"
 30 | #include "operators.hpp"
 31 | #include "select.hpp"
 32 | #include "shuffle.hpp"
 33 | #include "sqrt.hpp"
 34 | 
 35 | #pragma clang diagnostic push
 36 | #if CID_HAS_WARNING("-Winaccessible-base")
 37 | #pragma clang diagnostic ignored "-Winaccessible-base"
 38 | #endif
 39 | 
 40 | namespace kfr
 41 | {
 42 | 
 43 | namespace internal
 44 | {
 45 | 
 46 | template <cpu_t cpu = cpu_t::native>
 47 | struct in_asin_acos : private in_select<cpu>, private in_atan<cpu>, private in_sqrt<cpu>
 48 | {
 49 | private:
 50 |     using in_atan<cpu>::atan2;
 51 |     using in_sqrt<cpu>::sqrt;
 52 | 
 53 | public:
 54 |     template <typename T, size_t N>
 55 |     KFR_SINTRIN vec<T, N> asin(vec<T, N> x)
 56 |     {
 57 |         return atan2(x, sqrt(T(1) - x * x));
 58 |     }
 59 | 
 60 |     template <typename T, size_t N>
 61 |     KFR_SINTRIN vec<T, N> acos(vec<T, N> x)
 62 |     {
 63 |         return atan2(sqrt(T(1) - x * x), x);
 64 |     }
 65 |     KFR_SPEC_FN(in_asin_acos, asin)
 66 |     KFR_SPEC_FN(in_asin_acos, acos)
 67 | };
 68 | }
 69 | 
 70 | namespace native
 71 | {
 72 | using fn_asin = internal::in_asin_acos<>::fn_asin;
 73 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
 74 | KFR_INTRIN ftype<T1> asin(const T1& x)
 75 | {
 76 |     return internal::in_asin_acos<>::asin(x);
 77 | }
 78 | 
 79 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
 80 | KFR_INTRIN expr_func<fn_asin, E1> asin(E1&& x)
 81 | {
 82 |     return { fn_asin(), std::forward<E1>(x) };
 83 | }
 84 | 
 85 | using fn_acos = internal::in_asin_acos<>::fn_acos;
 86 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
 87 | KFR_INTRIN ftype<T1> acos(const T1& x)
 88 | {
 89 |     return internal::in_asin_acos<>::acos(x);
 90 | }
 91 | 
 92 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
 93 | KFR_INTRIN expr_func<fn_acos, E1> acos(E1&& x)
 94 | {
 95 |     return { fn_acos(), std::forward<E1>(x) };
 96 | }
 97 | }
 98 | }
 99 | 
100 | #pragma clang diagnostic pop
101 | 


--------------------------------------------------------------------------------
/include/kfr/base/constants.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | #pragma once
24 | 
25 | #include "types.hpp"
26 | #include <limits>
27 | 
28 | namespace kfr
29 | {
30 | 
31 | // π (pi)
32 | // c_pi<f64, 4>      = 4pi
33 | // c_pi<f64, 3, 4>   = 3/4pi
34 | template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
35 | constexpr Tsub c_pi = Tsub(3.1415926535897932384626433832795 * m / d);
36 | 
37 | // π² (pi²)
38 | // c_sqr_pi<f64, 4>      = 4pi²
39 | // c_sqr_pi<f64, 3, 4>   = 3/4pi²
40 | template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
41 | constexpr Tsub c_sqr_pi = Tsub(9.8696044010893586188344909998762 * m / d);
42 | 
43 | // 1/π (1/pi)
44 | // c_recip_pi<f64>       1/pi
45 | // c_recip_pi<f64, 4>    4/pi
46 | template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
47 | constexpr Tsub c_recip_pi = Tsub(0.31830988618379067153776752674503 * m / d);
48 | 
49 | // degree to radian conversion factor
50 | template <typename T, typename Tsub = subtype<T>>
51 | constexpr Tsub c_degtorad = c_pi<T, 1, 180>;
52 | 
53 | // radian to degree conversion factor
54 | template <typename T, typename Tsub = subtype<T>>
55 | constexpr Tsub c_radtodeg = c_recip_pi<T, 180>;
56 | 
57 | // e, Euler's number
58 | template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
59 | constexpr Tsub c_e = Tsub(2.718281828459045235360287471352662 * m / d);
60 | 
61 | template <typename T, typename Tsub = subtype<T>>
62 | constexpr unsigned c_mantissa_bits = sizeof(Tsub) == 32 ? 23 : 52;
63 | 
64 | template <typename T, typename Tsub = usubtype<T>>
65 | constexpr Tsub c_mantissa_mask = (Tsub(1) << c_mantissa_bits<T>)-1;
66 | 
67 | template <typename T, typename Tsub = subtype<T>>
68 | constexpr Tsub c_epsilon = (std::numeric_limits<Tsub>::epsilon());
69 | 
70 | template <typename T, typename Tsub = subtype<T>>
71 | constexpr Tsub c_infinity = std::numeric_limits<Tsub>::infinity();
72 | 
73 | template <typename T, typename Tsub = subtype<T>>
74 | constexpr Tsub c_neginfinity = -std::numeric_limits<Tsub>::infinity();
75 | 
76 | template <typename T, typename Tsub = subtype<T>>
77 | constexpr Tsub c_qnan = std::numeric_limits<Tsub>::quiet_NaN();
78 | 
79 | template <typename T, typename Tsub = subtype<T>>
80 | constexpr Tsub c_recip_log_2 = Tsub(1.442695040888963407359924681001892137426645954);
81 | 
82 | template <typename T, typename Tsub = subtype<T>>
83 | constexpr Tsub c_recip_log_10 = Tsub(0.43429448190325182765112891891661);
84 | 
85 | template <typename T, typename Tsub = subtype<T>>
86 | constexpr Tsub c_log_2 = Tsub(0.69314718055994530941723212145818);
87 | 
88 | template <typename T, typename Tsub = subtype<T>>
89 | constexpr Tsub c_log_10 = Tsub(2.3025850929940456840179914546844);
90 | 
91 | template <typename T, int m = 1, int d = 1, typename Tsub = subtype<T>>
92 | constexpr Tsub c_sqrt_2 = Tsub(1.4142135623730950488016887242097 * m / d);
93 | }
94 | 


--------------------------------------------------------------------------------
/include/kfr/base/digitreverse.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | #include "shuffle.hpp"
 25 | #include "types.hpp"
 26 | 
 27 | namespace kfr
 28 | {
 29 | 
 30 | namespace internal
 31 | {
 32 | 
 33 | template <size_t radix, size_t bits>
 34 | constexpr enable_if<radix == 2, u32> digitreverse(u32 x)
 35 | {
 36 |     x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
 37 |     x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
 38 |     x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
 39 |     x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
 40 |     return ((x >> 16) | (x << 16)) >> (32 - bits);
 41 | }
 42 | 
 43 | constexpr inline u32 bit_permute_step_simple(u32 x, u32 m, u32 shift)
 44 | {
 45 |     return ((x & m) << shift) | ((x >> shift) & m);
 46 | }
 47 | 
 48 | template <size_t radix, size_t bits>
 49 | constexpr enable_if<radix == 4, u32> digitreverse(u32 x)
 50 | {
 51 |     if (bits <= 2)
 52 |         return x;
 53 |     if (bits <= 4)
 54 |     {
 55 |         x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1      regroups 4 bits
 56 |         return x >> (4 - bits);
 57 |     }
 58 |     if (bits <= 8)
 59 |     {
 60 |         x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1      regroups 4 bits
 61 |         x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2      regroups 8 bits
 62 |         return x >> (8 - bits);
 63 |     }
 64 |     if (bits <= 16)
 65 |     {
 66 |         x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1      regroups 4 bits
 67 |         x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2      regroups 8 bits
 68 |         x = bit_permute_step_simple(x, 0x00ff00ff, 8); // Bit index complement 3      regroups 16 bits
 69 |         return x >> (16 - bits);
 70 |     }
 71 |     if (bits <= 32)
 72 |     {
 73 |         x = bit_permute_step_simple(x, 0x33333333, 2); // Bit index complement 1      regroups 4 bits
 74 |         x = bit_permute_step_simple(x, 0x0f0f0f0f, 4); // Bit index complement 2      regroups 8 bits
 75 |         x = bit_permute_step_simple(x, 0x00ff00ff, 8); // Bit index complement 3      regroups 16 bits
 76 |         x = bit_permute_step_simple(x, 0x0000ffff, 16); // Bit index complement 4     regroups 32 bits
 77 |         return x >> (32 - bits);
 78 |     }
 79 |     return x;
 80 | }
 81 | 
 82 | template <size_t radix, size_t bits>
 83 | struct shuffle_index_digitreverse
 84 | {
 85 |     constexpr inline size_t operator()(size_t index) const
 86 |     {
 87 |         return digitreverse<radix, bits>(static_cast<u32>(index));
 88 |     }
 89 | };
 90 | }
 91 | 
 92 | template <size_t radix, size_t groupsize = 1, typename T, size_t N>
 93 | KFR_INLINE vec<T, N> digitreverse(vec<T, N> x)
 94 | {
 95 |     return shufflevector<N, internal::shuffle_index_digitreverse<radix, ilog2(N / groupsize)>, groupsize>(x);
 96 | }
 97 | 
 98 | template <size_t groupsize = 1, typename T, size_t N>
 99 | KFR_INLINE vec<T, N> bitreverse(vec<T, N> x)
100 | {
101 |     return digitreverse<2, groupsize>(x);
102 | }
103 | 
104 | template <size_t groupsize = 1, typename T, size_t N>
105 | KFR_INLINE vec<T, N> digitreverse4(vec<T, N> x)
106 | {
107 |     return digitreverse<4, groupsize>(x);
108 | }
109 | 
110 | template <size_t bits>
111 | constexpr inline u32 bitreverse(u32 x)
112 | {
113 |     return internal::digitreverse<2, bits>(x);
114 | }
115 | 
116 | template <size_t bits>
117 | constexpr inline u32 digitreverse4(u32 x)
118 | {
119 |     return internal::digitreverse<4, bits>(x);
120 | }
121 | }
122 | 


--------------------------------------------------------------------------------
/include/kfr/base/dispatch.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "kfr.h"
 26 | 
 27 | #include "types.hpp"
 28 | 
 29 | namespace kfr
 30 | {
 31 | 
 32 | namespace internal
 33 | {
 34 | 
 35 | template <typename Fn, cpu_t newcpu, typename = void>
 36 | struct retarget_impl
 37 | {
 38 |     using type = Fn;
 39 | };
 40 | 
 41 | template <typename Fn, cpu_t newcpu>
 42 | struct retarget_impl<Fn, newcpu, void_t<typename Fn::template retarget_this<newcpu>>>
 43 | {
 44 |     using type = typename Fn::template retarget_this<newcpu>;
 45 | };
 46 | }
 47 | 
 48 | template <typename Fn, cpu_t newcpu>
 49 | using retarget = typename internal::retarget_impl<Fn, newcpu>::type;
 50 | 
 51 | template <cpu_t newcpu, typename Fn, typename NewFn = retarget<Fn, newcpu>,
 52 |           KFR_ENABLE_IF(std::is_constructible<NewFn, Fn&&>::value)>
 53 | KFR_INLINE NewFn retarget_func(Fn&& fn)
 54 | {
 55 |     return NewFn(std::move(fn));
 56 | }
 57 | 
 58 | template <cpu_t newcpu, typename Fn, typename NewEmptyFn = retarget<Fn, newcpu>,
 59 |           KFR_ENABLE_IF(!std::is_constructible<NewEmptyFn, Fn&&>::value && std::is_empty<NewEmptyFn>::value &&
 60 |                         std::is_constructible<NewEmptyFn>::value)>
 61 | KFR_INLINE NewEmptyFn retarget_func(Fn&&)
 62 | {
 63 |     return NewEmptyFn();
 64 | }
 65 | 
 66 | namespace internal
 67 | {
 68 | 
 69 | template <cpu_t a>
 70 | struct cpu_caller;
 71 | 
 72 | template <>
 73 | struct cpu_caller<cpu_t::avx2>
 74 | {
 75 |     constexpr static cpu_t a = cpu_t::avx2;
 76 | 
 77 |     template <typename Fn, typename... Args>
 78 |     KFR_NOINLINE static KFR_USE_CPU(avx2) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
 79 |     {
 80 |         return fn(std::forward<Args>(args)...);
 81 |     }
 82 | 
 83 |     template <typename Fn, typename... Args>
 84 |     KFR_NOINLINE static KFR_USE_CPU(avx2) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
 85 |     {
 86 |         return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
 87 |     }
 88 | };
 89 | 
 90 | template <>
 91 | struct cpu_caller<cpu_t::avx1>
 92 | {
 93 |     constexpr static cpu_t a = cpu_t::avx1;
 94 | 
 95 |     template <typename Fn, typename... Args>
 96 |     KFR_NOINLINE static KFR_USE_CPU(avx) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
 97 |     {
 98 |         return fn(std::forward<Args>(args)...);
 99 |     }
100 | 
101 |     template <typename Fn, typename... Args>
102 |     KFR_NOINLINE static KFR_USE_CPU(avx) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
103 |     {
104 |         return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
105 |     }
106 | };
107 | 
108 | template <>
109 | struct cpu_caller<cpu_t::sse41>
110 | {
111 |     constexpr static cpu_t a = cpu_t::sse41;
112 | 
113 |     template <typename Fn, typename... Args>
114 |     KFR_NOINLINE static KFR_USE_CPU(sse41) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
115 |     {
116 |         return fn(std::forward<Args>(args)...);
117 |     }
118 | 
119 |     template <typename Fn, typename... Args>
120 |     KFR_NOINLINE static KFR_USE_CPU(sse41) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
121 |     {
122 |         return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
123 |     }
124 | };
125 | 
126 | template <>
127 | struct cpu_caller<cpu_t::ssse3>
128 | {
129 |     constexpr static cpu_t a = cpu_t::ssse3;
130 | 
131 |     template <typename Fn, typename... Args>
132 |     KFR_NOINLINE static KFR_USE_CPU(ssse3) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
133 |     {
134 |         return fn(std::forward<Args>(args)...);
135 |     }
136 | 
137 |     template <typename Fn, typename... Args>
138 |     KFR_NOINLINE static KFR_USE_CPU(ssse3) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
139 |     {
140 |         return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
141 |     }
142 | };
143 | 
144 | template <>
145 | struct cpu_caller<cpu_t::sse3>
146 | {
147 |     constexpr static cpu_t a = cpu_t::sse3;
148 | 
149 |     template <typename Fn, typename... Args>
150 |     KFR_NOINLINE static KFR_USE_CPU(sse3) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
151 |     {
152 |         return fn(std::forward<Args>(args)...);
153 |     }
154 | 
155 |     template <typename Fn, typename... Args>
156 |     KFR_NOINLINE static KFR_USE_CPU(sse3) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
157 |     {
158 |         return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
159 |     }
160 | };
161 | 
162 | template <>
163 | struct cpu_caller<cpu_t::sse2>
164 | {
165 |     constexpr static cpu_t a = cpu_t::sse2;
166 | 
167 |     template <typename Fn, typename... Args>
168 |     KFR_NOINLINE static KFR_USE_CPU(sse2) result_of<Fn(Args...)> call(Fn&& fn, Args&&... args)
169 |     {
170 |         return fn(std::forward<Args>(args)...);
171 |     }
172 | 
173 |     template <typename Fn, typename... Args>
174 |     KFR_NOINLINE static KFR_USE_CPU(sse2) result_of<Fn(Args...)> retarget_call(Fn&& fn, Args&&... args)
175 |     {
176 |         return (retarget_func<a>(std::forward<Fn>(fn)))(std::forward<Args>(args)...);
177 |     }
178 | };
179 | 
180 | template <cpu_t c, typename Fn, typename... Args, KFR_ENABLE_IF(c == cpu_t::native)>
181 | KFR_INLINE auto dispatch_impl(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...))
182 | {
183 |     using targetFn = retarget<Fn, cpu_t::native>;
184 |     targetFn newfn = retarget_func<c>(std::forward<Fn>(fn));
185 |     return newfn(std::forward<Args>(args)...);
186 | }
187 | 
188 | template <cpu_t c, typename Fn, typename... Args, KFR_ENABLE_IF(c != cpu_t::native && c != cpu_t::runtime)>
189 | KFR_INLINE auto dispatch_impl(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...))
190 | {
191 |     return internal::cpu_caller<c>::retarget_call(std::forward<Fn>(fn), std::forward<Args>(args)...);
192 | }
193 | }
194 | 
195 | template <cpu_t c, typename Fn, typename... Args>
196 | KFR_INLINE auto dispatch(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...))
197 | {
198 |     return internal::dispatch_impl<c>(std::forward<Fn>(fn), std::forward<Args>(args)...);
199 | }
200 | }
201 | 


--------------------------------------------------------------------------------
/include/kfr/base/function.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "dispatch.hpp"
 26 | #include "expression.hpp"
 27 | #include "shuffle.hpp"
 28 | #include "types.hpp"
 29 | #include "vec.hpp"
 30 | 
 31 | #pragma clang diagnostic push
 32 | #pragma clang diagnostic ignored "-Wshadow"
 33 | 
 34 | namespace kfr
 35 | {
 36 | 
 37 | #define KFR_HANDLE_ALL(fn)                                                                                   \
 38 |     template <typename T, size_t N, typename... Args>                                                        \
 39 |     KFR_SINTRIN vec<T, N> fn(vec<T, N> x, Args&&... args)                                                    \
 40 |     {                                                                                                        \
 41 |         return handle_all<cpu, fn_##fn>(x, std::forward<Args>(args)...);                                     \
 42 |     }
 43 | #define KFR_HANDLE_ALL_REDUCE(redfn, fn)                                                                     \
 44 |     template <typename T, size_t N, typename... Args>                                                        \
 45 |     KFR_SINTRIN auto fn(vec<T, N> x, Args&&... args)                                                         \
 46 |     {                                                                                                        \
 47 |         return handle_all_reduce<cpu, redfn, fn_##fn>(x, std::forward<Args>(args)...);                       \
 48 |     }
 49 | 
 50 | #define KFR_HANDLE_SCALAR(fn)                                                                                \
 51 |     template <typename T, typename... Ts, KFR_ENABLE_IF(!is_vec<T>::value)>                                  \
 52 |     KFR_SINTRIN auto fn(const T& x, const Ts&... rest)                                                       \
 53 |     {                                                                                                        \
 54 |         return fn(make_vector(x), make_vector(rest)...)[0];                                                  \
 55 |     }
 56 | 
 57 | namespace internal
 58 | {
 59 | 
 60 | struct fn_disabled
 61 | {
 62 |     constexpr static bool disabled = true;
 63 | };
 64 | 
 65 | template <cpu_t c, typename T>
 66 | constexpr inline size_t next_fast_width(size_t n)
 67 | {
 68 |     return n > vector_width<T, cpu_t::sse2> ? vector_width<T, c> : vector_width<T, cpu_t::sse2>;
 69 | }
 70 | 
 71 | template <cpu_t c, typename T, size_t N, size_t Nout = next_fast_width<c, T>(N)>
 72 | KFR_INLINE vec<T, Nout> extend_reg(vec<T, N> x)
 73 | {
 74 |     return extend<Nout>(x);
 75 | }
 76 | template <cpu_t c, typename T, size_t N, size_t Nout = next_fast_width<c, T>(N)>
 77 | KFR_INLINE vec<T, Nout> extend_reg(vec<T, N> x, T value)
 78 | {
 79 |     return widen<Nout>(x, value);
 80 | }
 81 | 
 82 | template <cpu_t cur, typename Fn, typename T, size_t N, typename... Args,
 83 |           KFR_ENABLE_IF(N < vector_width<T, cur>)>
 84 | KFR_INLINE auto handle_all_f(Fn&& fn, vec<T, N> x, Args&&... args)
 85 | {
 86 |     return narrow<N>(fn(extend_reg<cur>(x), extend_reg<cur>(args)...));
 87 | }
 88 | template <cpu_t cur, typename Fn, typename T, size_t N, typename... Args,
 89 |           KFR_ENABLE_IF(N > vector_width<T, cur>)>
 90 | KFR_INLINE auto handle_all_f(Fn&& fn, vec<T, N> x, Args&&... args)
 91 | {
 92 |     return concat(fn(low(x), low(args)...), fn(high(x), high(args)...));
 93 | }
 94 | 
 95 | template <cpu_t cur, typename Fn, typename T, size_t N, typename... Args>
 96 | KFR_INLINE auto handle_all(vec<T, N> x, Args&&... args)
 97 | {
 98 |     Fn fn{};
 99 |     return handle_all_f<cur>(fn, x, std::forward<Args>(args)...);
100 | }
101 | 
102 | template <cpu_t cur, typename RedFn, typename Fn, typename T, size_t N, typename... Args,
103 |           typename = u8[N < vector_width<T, cur>]>
104 | KFR_INLINE auto handle_all_reduce_f(RedFn&& redfn, Fn&& fn, vec<T, N> x, Args&&... args)
105 | {
106 |     return fn(extend_reg<cur>(x, redfn(initialvalue<T>())),
107 |               extend_reg<cur>(args, redfn(initialvalue<T>()))...);
108 | }
109 | template <cpu_t cur, typename RedFn, typename Fn, typename T, size_t N, typename... Args,
110 |           typename = u8[N > vector_width<T, cur>], typename = void>
111 | KFR_INLINE auto handle_all_reduce_f(RedFn&& redfn, Fn&& fn, vec<T, N> x, Args&&... args)
112 | {
113 |     return redfn(fn(low(x), low(args)...), fn(high(x), high(args)...));
114 | }
115 | template <cpu_t cur, typename RedFn, typename Fn, typename T, size_t N, typename... Args>
116 | KFR_INLINE auto handle_all_reduce(vec<T, N> x, Args&&... args)
117 | {
118 |     RedFn redfn{};
119 |     Fn fn{};
120 |     return handle_all_reduce_f<cur>(redfn, fn, x, std::forward<Args>(args)...);
121 | }
122 | }
123 | }
124 | #pragma clang diagnostic pop
125 | 


--------------------------------------------------------------------------------
/include/kfr/base/gamma.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | #include "function.hpp"
 25 | #include "log_exp.hpp"
 26 | 
 27 | #pragma clang diagnostic push
 28 | #if CID_HAS_WARNING("-Wc99-extensions")
 29 | #pragma clang diagnostic ignored "-Wc99-extensions"
 30 | #endif
 31 | 
 32 | namespace kfr
 33 | {
 34 | 
 35 | namespace internal
 36 | {
 37 | template <typename T>
 38 | constexpr T gamma_precalc[] = {
 39 |     0x2.81b263fec4e08p+0,  0x3.07b4100e04448p+16, -0xa.a0da01d4d4e2p+16, 0xf.05ccb27bb9dbp+16,
 40 |     -0xa.fa79616b7c6ep+16, 0x4.6dd6c10d4df5p+16,  -0xf.a2304199eb4ap+12, 0x1.c21dd4aade3dp+12,
 41 |     -0x1.62f981f01cf84p+8, 0x5.a937aa5c48d98p+0,  -0x3.c640bf82e2104p-8, 0xc.914c540f959cp-24,
 42 | };
 43 | 
 44 | template <cpu_t c = cpu_t::native, cpu_t cc = c>
 45 | struct in_gamma : in_log_exp<cc>
 46 | {
 47 | private:
 48 |     using in_log_exp<cc>::exp;
 49 |     using in_log_exp<cc>::pow;
 50 | 
 51 | public:
 52 |     template <typename T, size_t N>
 53 |     KFR_SINTRIN vec<T, N> gamma(vec<T, N> z)
 54 |     {
 55 |         constexpr size_t Count = arraysize(internal::gamma_precalc<T>);
 56 |         vec<T, N> accm = gamma_precalc<T>[0];
 57 |         KFR_LOOP_UNROLL
 58 |         for (size_t k = 1; k < Count; k++)
 59 |             accm += gamma_precalc<T>[k] / (z + cast<utype<T>>(k));
 60 |         accm *= exp(-(z + Count)) * pow(z + Count, z + 0.5);
 61 |         return accm / z;
 62 |     }
 63 | 
 64 |     template <typename T, size_t N>
 65 |     KFR_SINTRIN vec<T, N> factorial_approx(vec<T, N> x)
 66 |     {
 67 |         return gamma(x + T(1));
 68 |     }
 69 |     KFR_SPEC_FN(in_gamma, gamma)
 70 |     KFR_SPEC_FN(in_gamma, factorial_approx)
 71 | };
 72 | }
 73 | 
 74 | namespace native
 75 | {
 76 | using fn_gamma = internal::in_gamma<>::fn_gamma;
 77 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
 78 | KFR_INTRIN ftype<T1> gamma(const T1& x)
 79 | {
 80 |     return internal::in_gamma<>::gamma(x);
 81 | }
 82 | 
 83 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
 84 | KFR_INTRIN expr_func<fn_gamma, E1> gamma(E1&& x)
 85 | {
 86 |     return { fn_gamma(), std::forward<E1>(x) };
 87 | }
 88 | 
 89 | using fn_factorial_approx = internal::in_gamma<>::fn_factorial_approx;
 90 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
 91 | KFR_INTRIN ftype<T1> factorial_approx(const T1& x)
 92 | {
 93 |     return internal::in_gamma<>::factorial_approx(x);
 94 | }
 95 | 
 96 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
 97 | KFR_INTRIN expr_func<fn_factorial_approx, E1> factorial_approx(E1&& x)
 98 | {
 99 |     return { fn_factorial_approx(), std::forward<E1>(x) };
100 | }
101 | }
102 | }
103 | 
104 | #pragma clang diagnostic pop
105 | 


--------------------------------------------------------------------------------
/include/kfr/base/hyperbolic.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | #include "abs.hpp"
 25 | #include "constants.hpp"
 26 | #include "function.hpp"
 27 | #include "log_exp.hpp"
 28 | #include "min_max.hpp"
 29 | #include "operators.hpp"
 30 | #include "select.hpp"
 31 | 
 32 | namespace kfr
 33 | {
 34 | 
 35 | namespace internal
 36 | {
 37 | 
 38 | template <cpu_t c = cpu_t::native>
 39 | struct in_hyperbolic : in_log_exp<c>
 40 | {
 41 |     constexpr static cpu_t cur = c;
 42 | 
 43 | private:
 44 |     using in_log_exp<c>::exp;
 45 | 
 46 | public:
 47 |     template <typename T, size_t N>
 48 |     KFR_SINTRIN vec<T, N> sinh(vec<T, N> x)
 49 |     {
 50 |         return (exp(x) - exp(-x)) * T(0.5);
 51 |     }
 52 | 
 53 |     template <typename T, size_t N>
 54 |     KFR_SINTRIN vec<T, N> cosh(vec<T, N> x)
 55 |     {
 56 |         return (exp(x) + exp(-x)) * T(0.5);
 57 |     }
 58 | 
 59 |     template <typename T, size_t N>
 60 |     KFR_SINTRIN vec<T, N> tanh(vec<T, N> x)
 61 |     {
 62 |         x = -2 * x;
 63 |         return (1 - exp(x)) / (1 + exp(x));
 64 |     }
 65 | 
 66 |     template <typename T, size_t N>
 67 |     KFR_SINTRIN vec<T, N> coth(vec<T, N> x)
 68 |     {
 69 |         x = -2 * x;
 70 |         return (1 + exp(x)) / (1 - exp(x));
 71 |     }
 72 | 
 73 |     template <typename T, size_t N, KFR_ENABLE_IF(N > 1)>
 74 |     KFR_SINTRIN vec<T, N> sinhcosh(vec<T, N> x)
 75 |     {
 76 |         const vec<T, N> a = exp(x);
 77 |         const vec<T, N> b = exp(-x);
 78 |         return subadd(a, b) * T(0.5);
 79 |     }
 80 | 
 81 |     template <typename T, size_t N, KFR_ENABLE_IF(N > 1)>
 82 |     KFR_SINTRIN vec<T, N> coshsinh(vec<T, N> x)
 83 |     {
 84 |         const vec<T, N> a = exp(x);
 85 |         const vec<T, N> b = exp(-x);
 86 |         return addsub(a, b) * T(0.5);
 87 |     }
 88 |     KFR_HANDLE_SCALAR(sinh)
 89 |     KFR_HANDLE_SCALAR(cosh)
 90 |     KFR_HANDLE_SCALAR(tanh)
 91 |     KFR_HANDLE_SCALAR(coth)
 92 |     KFR_HANDLE_SCALAR(sinhcosh)
 93 |     KFR_HANDLE_SCALAR(coshsinh)
 94 |     KFR_SPEC_FN(in_hyperbolic, sinh)
 95 |     KFR_SPEC_FN(in_hyperbolic, cosh)
 96 |     KFR_SPEC_FN(in_hyperbolic, tanh)
 97 |     KFR_SPEC_FN(in_hyperbolic, coth)
 98 |     KFR_SPEC_FN(in_hyperbolic, sinhcosh)
 99 |     KFR_SPEC_FN(in_hyperbolic, coshsinh)
100 | };
101 | }
102 | 
103 | namespace native
104 | {
105 | using fn_sinh = internal::in_hyperbolic<>::fn_sinh;
106 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
107 | KFR_INTRIN ftype<T1> sinh(const T1& x)
108 | {
109 |     return internal::in_hyperbolic<>::sinh(x);
110 | }
111 | 
112 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
113 | KFR_INTRIN expr_func<fn_sinh, E1> sinh(E1&& x)
114 | {
115 |     return { fn_sinh(), std::forward<E1>(x) };
116 | }
117 | 
118 | using fn_cosh = internal::in_hyperbolic<>::fn_cosh;
119 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
120 | KFR_INTRIN ftype<T1> cosh(const T1& x)
121 | {
122 |     return internal::in_hyperbolic<>::cosh(x);
123 | }
124 | 
125 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
126 | KFR_INTRIN expr_func<fn_cosh, E1> cosh(E1&& x)
127 | {
128 |     return { fn_cosh(), std::forward<E1>(x) };
129 | }
130 | 
131 | using fn_tanh = internal::in_hyperbolic<>::fn_tanh;
132 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
133 | KFR_INTRIN ftype<T1> tanh(const T1& x)
134 | {
135 |     return internal::in_hyperbolic<>::tanh(x);
136 | }
137 | 
138 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
139 | KFR_INTRIN expr_func<fn_tanh, E1> tanh(E1&& x)
140 | {
141 |     return { fn_tanh(), std::forward<E1>(x) };
142 | }
143 | 
144 | using fn_coth = internal::in_hyperbolic<>::fn_coth;
145 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
146 | KFR_INTRIN ftype<T1> coth(const T1& x)
147 | {
148 |     return internal::in_hyperbolic<>::coth(x);
149 | }
150 | 
151 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
152 | KFR_INTRIN expr_func<fn_coth, E1> coth(E1&& x)
153 | {
154 |     return { fn_coth(), std::forward<E1>(x) };
155 | }
156 | 
157 | using fn_sinhcosh = internal::in_hyperbolic<>::fn_sinhcosh;
158 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
159 | KFR_INTRIN ftype<T1> sinhcosh(const T1& x)
160 | {
161 |     return internal::in_hyperbolic<>::sinhcosh(x);
162 | }
163 | 
164 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
165 | KFR_INTRIN expr_func<fn_sinhcosh, E1> sinhcosh(E1&& x)
166 | {
167 |     return { fn_sinhcosh(), std::forward<E1>(x) };
168 | }
169 | 
170 | using fn_coshsinh = internal::in_hyperbolic<>::fn_coshsinh;
171 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
172 | KFR_INTRIN ftype<T1> coshsinh(const T1& x)
173 | {
174 |     return internal::in_hyperbolic<>::coshsinh(x);
175 | }
176 | 
177 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
178 | KFR_INTRIN expr_func<fn_coshsinh, E1> coshsinh(E1&& x)
179 | {
180 |     return { fn_coshsinh(), std::forward<E1>(x) };
181 | }
182 | }
183 | }
184 | 


--------------------------------------------------------------------------------
/include/kfr/base/intrinsics.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "kfr.h"
  4 | 
  5 | #if KFR_COMPILER_CLANG
  6 | 
  7 | #pragma clang diagnostic push
  8 | #pragma clang diagnostic ignored "-Wreserved-id-macro"
  9 | 
 10 | #ifdef __AVX2__
 11 | #define KFR_AVX2_DEFINED
 12 | #endif
 13 | #ifdef __AVX__
 14 | #define KFR_AVX1_DEFINED
 15 | #endif
 16 | #ifdef __SSE4_2__
 17 | #define KFR_SSE42_DEFINED
 18 | #endif
 19 | #ifdef __SSE4_1__
 20 | #define KFR_SSE41_DEFINED
 21 | #endif
 22 | #ifdef __SSSE3__
 23 | #define KFR_SSSE3_DEFINED
 24 | #endif
 25 | #ifdef __SSE3__
 26 | #define KFR_SSE3_DEFINED
 27 | #endif
 28 | #ifdef __SSE2__
 29 | #define KFR_SSE2_DEFINED
 30 | #endif
 31 | #ifdef __SSE__
 32 | #define KFR_SSE1_DEFINED
 33 | #endif
 34 | #ifdef __MMX__
 35 | #define KFR_MMX_DEFINED
 36 | #endif
 37 | 
 38 | #ifndef KFR_AVX2_DEFINED
 39 | #define __AVX2__
 40 | #endif
 41 | #ifndef KFR_AVX1_DEFINED
 42 | #define __AVX__
 43 | #endif
 44 | #ifndef KFR_SSE42_DEFINED
 45 | #define __SSE4_2__
 46 | #endif
 47 | #ifndef KFR_SSE41_DEFINED
 48 | #define __SSE4_1__
 49 | #endif
 50 | #ifndef KFR_SSSE3_DEFINED
 51 | #define __SSSE3__
 52 | #endif
 53 | #ifndef KFR_SSE3_DEFINED
 54 | #define __SSE3__
 55 | #endif
 56 | #ifndef KFR_SSE2_DEFINED
 57 | #define __SSE2__
 58 | #endif
 59 | #ifndef KFR_SSE1_DEFINED
 60 | #define __SSE__
 61 | #endif
 62 | #ifndef KFR_MMX_DEFINED
 63 | #define __MMX__
 64 | #endif
 65 | 
 66 | #ifdef KFR_SKIP_AVX512
 67 | #ifndef __AVX512FINTRIN_H
 68 | #define __AVX512FINTRIN_H
 69 | #endif
 70 | #ifndef __AVX512VLINTRIN_H
 71 | #define __AVX512VLINTRIN_H
 72 | #endif
 73 | #ifndef __AVX512BWINTRIN_H
 74 | #define __AVX512BWINTRIN_H
 75 | #endif
 76 | #ifndef __AVX512CDINTRIN_H
 77 | #define __AVX512CDINTRIN_H
 78 | #endif
 79 | #ifndef __AVX512DQINTRIN_H
 80 | #define __AVX512DQINTRIN_H
 81 | #endif
 82 | #ifndef __AVX512VLBWINTRIN_H
 83 | #define __AVX512VLBWINTRIN_H
 84 | #endif
 85 | #ifndef __AVX512VLDQINTRIN_H
 86 | #define __AVX512VLDQINTRIN_H
 87 | #endif
 88 | #ifndef __AVX512ERINTRIN_H
 89 | #define __AVX512ERINTRIN_H
 90 | #endif
 91 | #ifndef __IFMAINTRIN_H
 92 | #define __IFMAINTRIN_H
 93 | #endif
 94 | #ifndef __IFMAVLINTRIN_H
 95 | #define __IFMAVLINTRIN_H
 96 | #endif
 97 | #ifndef __VBMIINTRIN_H
 98 | #define __VBMIINTRIN_H
 99 | #endif
100 | #ifndef __VBMIVLINTRIN_H
101 | #define __VBMIVLINTRIN_H
102 | #endif
103 | 
104 | #endif
105 | 
106 | #include <immintrin.h>
107 | #ifdef KFR_OS_WIN
108 | #include <intrin.h>
109 | #endif
110 | 
111 | #ifndef KFR_AVX2_DEFINED
112 | #undef __AVX2__
113 | #endif
114 | #ifndef KFR_AVX1_DEFINED
115 | #undef __AVX__
116 | #endif
117 | #ifndef KFR_SSE42_DEFINED
118 | #undef __SSE4_2__
119 | #endif
120 | #ifndef KFR_SSE41_DEFINED
121 | #undef __SSE4_1__
122 | #endif
123 | #ifndef KFR_SSSE3_DEFINED
124 | #undef __SSSE3__
125 | #endif
126 | #ifndef KFR_SSE3_DEFINED
127 | #undef __SSE3__
128 | #endif
129 | #ifndef KFR_SSE2_DEFINED
130 | #undef __SSE2__
131 | #endif
132 | #ifndef KFR_SSE1_DEFINED
133 | #undef __SSE__
134 | #endif
135 | #ifndef KFR_MMX_DEFINED
136 | #undef __MMX__
137 | #endif
138 | 
139 | #pragma clang diagnostic pop
140 | 
141 | #else
142 | 
143 | #include <intrin.h>
144 | 
145 | #endif
146 | 


--------------------------------------------------------------------------------
/include/kfr/base/kfr.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <stddef.h>
  4 | #include <stdint.h>
  5 | 
  6 | #include "../cident.h"
  7 | 
  8 | #define KFR_INLINE CID_INLINE
  9 | #define KFR_INLINE_MEMBER CID_INLINE_MEMBER
 10 | #define KFR_INLINE_LAMBDA CID_INLINE_LAMBDA
 11 | #define KFR_NOINLINE CID_NOINLINE
 12 | #define KFR_FLATTEN CID_FLATTEN
 13 | #define KFR_RESTRICT CID_RESTRICT
 14 | 
 15 | #ifdef CID_COMPILER_CLANG
 16 | #define KFR_COMPILER_CLANG CID_COMPILER_CLANG
 17 | #endif
 18 | 
 19 | #ifdef CID_OS_WIN
 20 | #define KFR_OS_WIN CID_OS_WIN
 21 | #endif
 22 | 
 23 | #ifdef CID_OS_OSX
 24 | #define KFR_OS_OSX CID_OS_OSX
 25 | #endif
 26 | 
 27 | #ifdef CID_OS_LINUX
 28 | #define KFR_OS_LINUX CID_OS_LINUX
 29 | #endif
 30 | 
 31 | #ifdef CID_GNU_ATTRIBUTES
 32 | #define KFR_GNU_ATTRIBUTES CID_GNU_ATTRIBUTES
 33 | #endif
 34 | 
 35 | #ifdef CID_MSVC_ATTRIBUTES
 36 | #define KFR_MSVC_ATTRIBUTES CID_MSVC_ATTRIBUTES
 37 | #endif
 38 | 
 39 | #ifdef CID_ARCH_X64
 40 | #define KFR_ARCH_X64 CID_ARCH_X64
 41 | #endif
 42 | 
 43 | #ifdef CID_ARCH_X32
 44 | #define KFR_ARCH_X32 CID_ARCH_X32
 45 | #endif
 46 | 
 47 | #define KFR_ARCH_NAME CID_ARCH_NAME
 48 | 
 49 | #define KFR_CDECL CID_CDECL
 50 | 
 51 | #define KFR_PUBLIC_C CID_PUBLIC_C
 52 | 
 53 | #ifdef __cplusplus
 54 | namespace kfr
 55 | {
 56 | using ::cid::arraysize;
 57 | }
 58 | #endif
 59 | 
 60 | #define KFR_VERSION_STRING "0.9.1"
 61 | #define KFR_VERSION_MAJOR 0
 62 | #define KFR_VERSION_MINOR 9
 63 | #define KFR_VERSION_BUILD 1
 64 | #define KFR_VERSION 901
 65 | 
 66 | #ifdef __cplusplus
 67 | namespace kfr
 68 | {
 69 | constexpr const char version_string[] = KFR_VERSION_STRING;
 70 | constexpr int version_major           = KFR_VERSION_MAJOR;
 71 | constexpr int version_minor           = KFR_VERSION_MINOR;
 72 | constexpr int version_build           = KFR_VERSION_BUILD;
 73 | constexpr int version                 = KFR_VERSION;
 74 | }
 75 | #endif
 76 | 
 77 | //#define KFR_MEMORY_ALIGNMENT 64
 78 | 
 79 | #if KFR_COMPILER_CLANG
 80 | #define KFR_LOOP_NOUNROLL                                                                                    \
 81 |     _Pragma("clang loop vectorize( disable )") _Pragma("clang loop interleave( disable )")                   \
 82 |         _Pragma("clang loop unroll( disable )")
 83 | 
 84 | #define KFR_LOOP_UNROLL _Pragma("clang loop unroll( full )")
 85 | 
 86 | #define KFR_VEC_CC __attribute__((vectorcall))
 87 | #else
 88 | #define KFR_LOOP_NOUNROLL
 89 | #define KFR_LOOP_UNROLL
 90 | #ifdef KFR_COMPILER_MSVC
 91 | #define KFR_VEC_CC __vectorcall
 92 | #endif
 93 | 
 94 | #endif
 95 | 
 96 | #define KFR_AVAIL_AVX2 1
 97 | #define KFR_AVAIL_AVX 1
 98 | #define KFR_AVAIL_SSE42 1
 99 | #define KFR_AVAIL_SSE41 1
100 | #define KFR_AVAIL_SSSE3 1
101 | #define KFR_AVAIL_SSE3 1
102 | #define KFR_AVAIL_SSE2 1
103 | #define KFR_AVAIL_SSE 1
104 | 
105 | #if defined(KFR_GNU_ATTRIBUTES)
106 | 
107 | #define KFR_CPU_NAME_avx2 "avx2"
108 | #define KFR_CPU_NAME_avx "avx"
109 | #define KFR_CPU_NAME_sse42 "sse4.2"
110 | #define KFR_CPU_NAME_sse41 "sse4.1"
111 | #define KFR_CPU_NAME_ssse3 "ssse3"
112 | #define KFR_CPU_NAME_sse3 "sse3"
113 | #define KFR_CPU_NAME_sse2 "sse2"
114 | 
115 | #define KFR_USE_CPU(arch) __attribute__((target(KFR_CPU_NAME_##arch)))
116 | 
117 | #else
118 | #define KFR_USE_CPU(arch)
119 | #endif
120 | 
121 | #if defined(KFR_GNU_ATTRIBUTES)
122 | #define KFR_FAST_CC __attribute__((fastcall))
123 | #else
124 | #define KFR_FAST_CC __fastcall
125 | #endif
126 | 
127 | #define KFR_INTRIN CID_INTRIN
128 | #define KFR_SINTRIN CID_INTRIN CID_NODEBUG static
129 | #define KFR_AINTRIN inline CID_NODEBUG static
130 | #define KFR_FAST_NOINLINE CID_NOINLINE
131 | 
132 | #define KFR_CPU_INTRIN(c) KFR_AINTRIN KFR_USE_CPU(c)
133 | 


--------------------------------------------------------------------------------
/include/kfr/base/memory.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "../base/read_write.hpp"
 26 | #include "../base/types.hpp"
 27 | #include <atomic>
 28 | #include <memory>
 29 | 
 30 | namespace kfr
 31 | {
 32 | 
 33 | namespace internal
 34 | {
 35 | 
 36 | struct memory_statistics
 37 | {
 38 |     std::atomic_uintptr_t allocation_count   = ATOMIC_VAR_INIT(0);
 39 |     std::atomic_uintptr_t allocation_size    = ATOMIC_VAR_INIT(0);
 40 |     std::atomic_uintptr_t deallocation_count = ATOMIC_VAR_INIT(0);
 41 |     std::atomic_uintptr_t deallocation_size  = ATOMIC_VAR_INIT(0);
 42 | };
 43 | 
 44 | inline memory_statistics& get_memory_statistics()
 45 | {
 46 |     static memory_statistics ms;
 47 |     return ms;
 48 | }
 49 | 
 50 | struct mem_header
 51 | {
 52 |     u8 offset;
 53 |     u8 alignment;
 54 |     u8 reserved1;
 55 |     u8 reserved2;
 56 |     size_t size;
 57 | } __attribute__((__packed__));
 58 | 
 59 | inline mem_header* aligned_header(void* ptr) { return ptr_cast<mem_header>(ptr) - 1; }
 60 | 
 61 | inline size_t aligned_size(void* ptr) { return aligned_header(ptr)->size; }
 62 | 
 63 | inline void* aligned_malloc(size_t size, size_t alignment)
 64 | {
 65 |     get_memory_statistics().allocation_count++;
 66 |     get_memory_statistics().allocation_size += size;
 67 |     void* ptr = malloc(size + (alignment - 1) + sizeof(mem_header));
 68 |     if (ptr == nullptr)
 69 |         return nullptr;
 70 |     void* aligned_ptr                      = advance(ptr, sizeof(mem_header));
 71 |     aligned_ptr                            = align_up(aligned_ptr, alignment);
 72 |     aligned_header(aligned_ptr)->alignment = static_cast<u8>(alignment > 255 ? 255 : alignment);
 73 |     aligned_header(aligned_ptr)->offset    = static_cast<u8>(distance(aligned_ptr, ptr));
 74 |     aligned_header(aligned_ptr)->size      = size;
 75 |     return aligned_ptr;
 76 | }
 77 | inline void aligned_free(void* ptr)
 78 | {
 79 |     get_memory_statistics().deallocation_count++;
 80 |     get_memory_statistics().deallocation_size += aligned_size(ptr);
 81 |     free(advance(ptr, -static_cast<ptrdiff_t>(aligned_header(ptr)->offset)));
 82 | }
 83 | }
 84 | 
 85 | template <typename T = void, size_t alignment = native_cache_alignment>
 86 | KFR_INLINE T* aligned_allocate(size_t size = 1)
 87 | {
 88 |     T* ptr = static_cast<T*>(__builtin_assume_aligned(
 89 |         internal::aligned_malloc(std::max(alignment, size * details::elementsize<T>), alignment), alignment));
 90 |     return ptr;
 91 | }
 92 | 
 93 | template <typename T = void>
 94 | KFR_INLINE void aligned_deallocate(T* ptr)
 95 | {
 96 |     return internal::aligned_free(ptr);
 97 | }
 98 | 
 99 | namespace internal
100 | {
101 | template <typename T>
102 | struct aligned_deleter
103 | {
104 |     KFR_INLINE void operator()(T* ptr) const { aligned_deallocate(ptr); }
105 | };
106 | }
107 | 
108 | template <typename T>
109 | struct autofree
110 | {
111 |     KFR_INLINE autofree() {}
112 |     explicit KFR_INLINE autofree(size_t size) : ptr(aligned_allocate<T>(size)) {}
113 |     autofree(const autofree&) = delete;
114 |     autofree& operator=(const autofree&) = delete;
115 |     autofree(autofree&&) noexcept        = default;
116 |     autofree& operator=(autofree&&) noexcept = default;
117 |     KFR_INLINE T& operator[](size_t index) noexcept { return ptr[index]; }
118 |     KFR_INLINE const T& operator[](size_t index) const noexcept { return ptr[index]; }
119 | 
120 |     template <typename U = T>
121 |     KFR_INLINE U* data() noexcept
122 |     {
123 |         return ptr_cast<U>(ptr.get());
124 |     }
125 |     template <typename U = T>
126 |     KFR_INLINE const U* data() const noexcept
127 |     {
128 |         return ptr_cast<U>(ptr.get());
129 |     }
130 | 
131 |     std::unique_ptr<T[], internal::aligned_deleter<T>> ptr;
132 | };
133 | 
134 | template <typename T>
135 | struct allocator
136 | {
137 |     using value_type      = T;
138 |     using pointer         = T*;
139 |     using const_pointer   = const T*;
140 |     using reference       = T&;
141 |     using const_reference = const T&;
142 |     using size_type       = std::size_t;
143 |     using difference_type = std::ptrdiff_t;
144 | 
145 |     template <typename U>
146 |     struct rebind
147 |     {
148 |         using other = allocator<U>;
149 |     };
150 |     constexpr allocator() noexcept                 = default;
151 |     constexpr allocator(const allocator&) noexcept = default;
152 |     template <typename U>
153 |     constexpr allocator(const allocator<U>&) noexcept
154 |     {
155 |     }
156 |     pointer address(reference x) const noexcept { return std::addressof(x); }
157 |     const_pointer address(const_reference x) const noexcept { return std::addressof(x); }
158 |     pointer allocate(size_type n, std::allocator<void>::const_pointer = 0) const
159 |     {
160 |         pointer result = aligned_allocate<value_type>(n);
161 |         if (!result)
162 |             CID_THROW(std::bad_alloc());
163 |         return result;
164 |     }
165 |     void deallocate(pointer p, size_type) { aligned_deallocate(p); }
166 |     size_type max_size() const { return std::numeric_limits<size_type>::max() / sizeof(value_type); }
167 |     template <typename U, typename... Args>
168 |     void construct(U* p, Args&&... args)
169 |     {
170 |         ::new (pvoid(p)) U(std::forward<Args>(args)...);
171 |     }
172 |     template <typename U>
173 |     void destroy(U* p)
174 |     {
175 |         p->~U();
176 |     }
177 | };
178 | 
179 | template <typename T1, typename T2>
180 | constexpr inline bool operator==(const allocator<T1>&, const allocator<T2>&) noexcept
181 | {
182 |     return true;
183 | }
184 | template <typename T1, typename T2>
185 | constexpr inline bool operator!=(const allocator<T1>&, const allocator<T2>&) noexcept
186 | {
187 |     return false;
188 | }
189 | 
190 | struct aligned_new
191 | {
192 |     inline static void* operator new(size_t size) { return aligned_allocate(size); }
193 |     inline static void operator delete(void* ptr) { return aligned_deallocate(ptr); }
194 | };
195 | 
196 | #define KFR_CLASS_REFCOUNT(cl)                                                                               \
197 | public:                                                                                                      \
198 |     void addref() const { m_refcount++; }                                                                    \
199 |     void release() const                                                                                     \
200 |     {                                                                                                        \
201 |         if (--m_refcount == 0)                                                                               \
202 |         {                                                                                                    \
203 |             delete this;                                                                                     \
204 |         }                                                                                                    \
205 |     }                                                                                                        \
206 |                                                                                                              \
207 | private:                                                                                                     \
208 |     mutable std::atomic_uintptr_t m_refcount = ATOMIC_VAR_INIT(0);
209 | }
210 | 


--------------------------------------------------------------------------------
/include/kfr/base/read_write.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "shuffle.hpp"
 26 | #include "types.hpp"
 27 | #include "vec.hpp"
 28 | 
 29 | namespace kfr
 30 | {
 31 | 
 32 | template <size_t N, bool A = false, typename T>
 33 | KFR_INLINE vec<T, N> read(const T* src)
 34 | {
 35 |     return internal_read_write::read<N, A, T>(src);
 36 | }
 37 | 
 38 | template <bool A = false, size_t N, typename T>
 39 | KFR_INLINE void write(T* dest, vec<T, N> value)
 40 | {
 41 |     internal_read_write::write<A, N, T>(dest, value);
 42 | }
 43 | 
 44 | template <typename... Indices, typename T, size_t Nout = 1 + sizeof...(Indices)>
 45 | KFR_INLINE vec<T, Nout> gather(const T* base, size_t index, Indices... indices)
 46 | {
 47 |     return make_vector(base[index], base[indices]...);
 48 | }
 49 | 
 50 | template <size_t Index, size_t... Indices, typename T, size_t Nout = 1 + sizeof...(Indices)>
 51 | KFR_INLINE vec<T, Nout> gather(const T* base)
 52 | {
 53 |     return make_vector(base[Index], base[Indices]...);
 54 | }
 55 | 
 56 | template <size_t Index, size_t... Indices, typename T, size_t N, size_t InIndex = 0>
 57 | KFR_INLINE void scatter(const T* base, vec<T, N> value)
 58 | {
 59 |     base[Index] = value[InIndex];
 60 |     scatter<Indices..., T, N, InIndex + 1>(base, value);
 61 | }
 62 | 
 63 | namespace internal
 64 | {
 65 | template <typename T, size_t N, size_t... Indices>
 66 | KFR_INLINE vec<T, N> gather(const T* base, vec<u32, N> indices, csizes_t<Indices...>)
 67 | {
 68 |     return make_vector(base[indices[Indices]]...);
 69 | }
 70 | template <size_t Nout, size_t Stride, typename T, size_t... Indices>
 71 | KFR_INLINE vec<T, Nout> gather_stride(const T* base, csizes_t<Indices...>)
 72 | {
 73 |     return make_vector(base[Indices * Stride]...);
 74 | }
 75 | template <size_t Nout, typename T, size_t... Indices>
 76 | KFR_INLINE vec<T, Nout> gather_stride_s(const T* base, size_t stride, csizes_t<Indices...>)
 77 | {
 78 |     return make_vector(base[Indices * stride]...);
 79 | }
 80 | }
 81 | 
 82 | template <typename T, size_t N>
 83 | KFR_INLINE vec<T, N> gather(const T* base, vec<u32, N> indices)
 84 | {
 85 |     return internal::gather(base, indices, csizeseq<N>);
 86 | }
 87 | 
 88 | template <size_t Nout, typename T>
 89 | KFR_INLINE vec<T, Nout> gather_stride(const T* base, size_t stride)
 90 | {
 91 |     return internal::gather_stride_s<Nout>(base, stride, csizeseq<Nout>);
 92 | }
 93 | 
 94 | template <size_t Nout, size_t Stride, typename T>
 95 | KFR_INLINE vec<T, Nout> gather_stride(const T* base)
 96 | {
 97 |     return internal::gather_stride<Nout, Stride>(base, csizeseq<Nout>);
 98 | }
 99 | 
100 | template <size_t groupsize, typename T, size_t N, typename IT, size_t... Indices>
101 | KFR_INLINE vec<T, N * groupsize> gather_helper(const T* base, vec<IT, N> offset, csizes_t<Indices...>)
102 | {
103 |     return concat(read<groupsize>(base + groupsize * (*offset)[Indices])...);
104 | }
105 | template <size_t groupsize = 1, typename T, size_t N, typename IT>
106 | KFR_INLINE vec<T, N * groupsize> gather(const T* base, vec<IT, N> offset)
107 | {
108 |     return gather_helper<groupsize>(base, offset, csizeseq<N>);
109 | }
110 | 
111 | template <size_t groupsize, typename T, size_t N, size_t Nout = N* groupsize, typename IT, size_t... Indices>
112 | KFR_INLINE void scatter_helper(T* base, vec<IT, N> offset, vec<T, Nout> value, csizes_t<Indices...>)
113 | {
114 |     swallow{ (write(base + groupsize * (*offset)[Indices], slice<Indices * groupsize, groupsize>(value)),
115 |               0)... };
116 | }
117 | template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N* groupsize, typename IT>
118 | KFR_INLINE void scatter(T* base, vec<IT, N> offset, vec<T, Nout> value)
119 | {
120 |     return scatter_helper<groupsize>(base, offset, value, csizeseq<N>);
121 | }
122 | 
123 | template <typename T>
124 | constexpr T partial_masks[] = { internal::allones<T>,
125 |                                 internal::allones<T>,
126 |                                 internal::allones<T>,
127 |                                 internal::allones<T>,
128 |                                 internal::allones<T>,
129 |                                 internal::allones<T>,
130 |                                 internal::allones<T>,
131 |                                 internal::allones<T>,
132 |                                 internal::allones<T>,
133 |                                 internal::allones<T>,
134 |                                 internal::allones<T>,
135 |                                 internal::allones<T>,
136 |                                 internal::allones<T>,
137 |                                 internal::allones<T>,
138 |                                 internal::allones<T>,
139 |                                 internal::allones<T>,
140 |                                 internal::allones<T>,
141 |                                 internal::allones<T>,
142 |                                 internal::allones<T>,
143 |                                 internal::allones<T>,
144 |                                 internal::allones<T>,
145 |                                 internal::allones<T>,
146 |                                 internal::allones<T>,
147 |                                 internal::allones<T>,
148 |                                 internal::allones<T>,
149 |                                 internal::allones<T>,
150 |                                 internal::allones<T>,
151 |                                 internal::allones<T>,
152 |                                 internal::allones<T>,
153 |                                 internal::allones<T>,
154 |                                 internal::allones<T>,
155 |                                 internal::allones<T>,
156 |                                 T(),
157 |                                 T(),
158 |                                 T(),
159 |                                 T(),
160 |                                 T(),
161 |                                 T(),
162 |                                 T(),
163 |                                 T(),
164 |                                 T(),
165 |                                 T(),
166 |                                 T(),
167 |                                 T(),
168 |                                 T(),
169 |                                 T(),
170 |                                 T(),
171 |                                 T(),
172 |                                 T(),
173 |                                 T(),
174 |                                 T(),
175 |                                 T(),
176 |                                 T(),
177 |                                 T(),
178 |                                 T(),
179 |                                 T(),
180 |                                 T(),
181 |                                 T(),
182 |                                 T(),
183 |                                 T(),
184 |                                 T(),
185 |                                 T(),
186 |                                 T(),
187 |                                 T() };
188 | 
189 | template <typename T, size_t N>
190 | KFR_INLINE vec<T, N> partial_mask(size_t index)
191 | {
192 |     static_assert(N <= arraysize(partial_masks<T>) / 2,
193 |                   "N must not be greater than half of partial_masks expression_array");
194 |     return read<N>(&partial_masks<T>[0] + arraysize(partial_masks<T>) / 2 - index);
195 | }
196 | template <typename T, size_t N>
197 | KFR_INLINE vec<T, N> partial_mask(size_t index, vec_t<T, N>)
198 | {
199 |     return partial_mask<T, N>(index);
200 | }
201 | }
202 | 


--------------------------------------------------------------------------------
/include/kfr/base/round.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "function.hpp"
 26 | #include "operators.hpp"
 27 | 
 28 | namespace kfr
 29 | {
 30 | 
 31 | #define KFR_mm_trunc_ps(V) _mm_round_ps((V), _MM_FROUND_TRUNC)
 32 | #define KFR_mm_roundnearest_ps(V) _mm_round_ps((V), _MM_FROUND_NINT)
 33 | #define KFR_mm_trunc_pd(V) _mm_round_pd((V), _MM_FROUND_TRUNC)
 34 | #define KFR_mm_roundnearest_pd(V) _mm_round_pd((V), _MM_FROUND_NINT)
 35 | 
 36 | #define KFR_mm_trunc_ss(V) _mm_round_ss(_mm_setzero_ps(), (V), _MM_FROUND_TRUNC)
 37 | #define KFR_mm_roundnearest_ss(V) _mm_round_ss(_mm_setzero_ps(), (V), _MM_FROUND_NINT)
 38 | #define KFR_mm_trunc_sd(V) _mm_round_sd(_mm_setzero_pd(), (V), _MM_FROUND_TRUNC)
 39 | #define KFR_mm_roundnearest_sd(V) _mm_round_sd(_mm_setzero_pd(), (V), _MM_FROUND_NINT)
 40 | 
 41 | #define KFR_mm_floor_ss(V) _mm_floor_ss(_mm_setzero_ps(), (V))
 42 | #define KFR_mm_floor_sd(V) _mm_floor_sd(_mm_setzero_pd(), (V))
 43 | #define KFR_mm_ceil_ss(V) _mm_ceil_ss(_mm_setzero_ps(), (V))
 44 | #define KFR_mm_ceil_sd(V) _mm_ceil_sd(_mm_setzero_pd(), (V))
 45 | 
 46 | #define KFR_mm256_trunc_ps(V) _mm256_round_ps((V), _MM_FROUND_TRUNC)
 47 | #define KFR_mm256_roundnearest_ps(V) _mm256_round_ps((V), _MM_FROUND_NINT)
 48 | #define KFR_mm256_trunc_pd(V) _mm256_round_pd((V), _MM_FROUND_TRUNC)
 49 | #define KFR_mm256_roundnearest_pd(V) _mm256_round_pd((V), _MM_FROUND_NINT)
 50 | 
 51 | namespace internal
 52 | {
 53 | 
 54 | template <cpu_t c = cpu_t::native>
 55 | struct in_round : in_round<older(c)>
 56 | {
 57 |     struct fn_floor : in_round<older(c)>::fn_floor, fn_disabled
 58 |     {
 59 |     };
 60 |     struct fn_ceil : in_round<older(c)>::fn_ceil, fn_disabled
 61 |     {
 62 |     };
 63 |     struct fn_round : in_round<older(c)>::fn_round, fn_disabled
 64 |     {
 65 |     };
 66 |     struct fn_trunc : in_round<older(c)>::fn_trunc, fn_disabled
 67 |     {
 68 |     };
 69 |     struct fn_fract : in_round<older(c)>::fn_fract, fn_disabled
 70 |     {
 71 |     };
 72 | };
 73 | 
 74 | template <>
 75 | struct in_round<cpu_t::sse2>
 76 | {
 77 |     constexpr static cpu_t cpu = cpu_t::sse2;
 78 | 
 79 |     template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
 80 |     KFR_SINTRIN vec<T, N> floor(vec<T, N> value)
 81 |     {
 82 |         return value;
 83 |     }
 84 |     template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
 85 |     KFR_SINTRIN vec<T, N> ceil(vec<T, N> value)
 86 |     {
 87 |         return value;
 88 |     }
 89 |     template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
 90 |     KFR_SINTRIN vec<T, N> trunc(vec<T, N> value)
 91 |     {
 92 |         return value;
 93 |     }
 94 |     template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
 95 |     KFR_SINTRIN vec<T, N> round(vec<T, N> value)
 96 |     {
 97 |         return value;
 98 |     }
 99 |     template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
100 |     KFR_SINTRIN vec<T, N> fract(vec<T, N>)
101 |     {
102 |         return T();
103 |     }
104 | 
105 |     KFR_SINTRIN f32sse floor(f32sse x)
106 |     {
107 |         f32sse t = cast<f32>(cast<i32>(x));
108 |         return t - (bitcast<f32>(x < t) & 1.f);
109 |     }
110 |     KFR_SINTRIN f64sse floor(f64sse x)
111 |     {
112 |         f64sse t = cast<f64>(cast<i64>(x));
113 |         return t - (bitcast<f64>(x < t) & 1.0);
114 |     }
115 |     KFR_SINTRIN f32sse ceil(f32sse x)
116 |     {
117 |         f32sse t = cast<f32>(cast<i32>(x));
118 |         return t + (bitcast<f32>(x > t) & 1.f);
119 |     }
120 |     KFR_SINTRIN f64sse ceil(f64sse x)
121 |     {
122 |         f64sse t = cast<f64>(cast<i64>(x));
123 |         return t + (bitcast<f64>(x > t) & 1.0);
124 |     }
125 |     KFR_SINTRIN f32sse round(f32sse x) { return cast<f32>(cast<i32>(x + mulsign(f32x4(0.5f), x))); }
126 |     KFR_SINTRIN f64sse round(f64sse x) { return cast<f64>(cast<i64>(x + mulsign(f64x2(0.5), x))); }
127 |     KFR_SINTRIN f32sse trunc(f32sse x) { return cast<f32>(cast<i32>(x)); }
128 |     KFR_SINTRIN f64sse trunc(f64sse x) { return cast<f64>(cast<i64>(x)); }
129 |     KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); }
130 |     KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); }
131 | 
132 |     KFR_HANDLE_ALL(floor)
133 |     KFR_HANDLE_ALL(ceil)
134 |     KFR_HANDLE_ALL(round)
135 |     KFR_HANDLE_ALL(trunc)
136 |     KFR_HANDLE_ALL(fract)
137 |     KFR_HANDLE_SCALAR(floor)
138 |     KFR_HANDLE_SCALAR(ceil)
139 |     KFR_HANDLE_SCALAR(round)
140 |     KFR_HANDLE_SCALAR(trunc)
141 |     KFR_HANDLE_SCALAR(fract)
142 |     KFR_SPEC_FN(in_round, floor)
143 |     KFR_SPEC_FN(in_round, ceil)
144 |     KFR_SPEC_FN(in_round, round)
145 |     KFR_SPEC_FN(in_round, trunc)
146 |     KFR_SPEC_FN(in_round, fract)
147 | };
148 | 
149 | template <>
150 | struct in_round<cpu_t::sse41> : in_round<cpu_t::sse2>
151 | {
152 |     constexpr static cpu_t cpu = cpu_t::sse41;
153 | 
154 |     KFR_SINTRIN f32sse floor(f32sse value) { return _mm_floor_ps(*value); }
155 |     KFR_SINTRIN f32sse ceil(f32sse value) { return _mm_ceil_ps(*value); }
156 |     KFR_SINTRIN f32sse trunc(f32sse value) { return KFR_mm_trunc_ps(*value); }
157 |     KFR_SINTRIN f32sse round(f32sse value) { return KFR_mm_roundnearest_ps(*value); }
158 |     KFR_SINTRIN f64sse floor(f64sse value) { return _mm_floor_pd(*value); }
159 |     KFR_SINTRIN f64sse ceil(f64sse value) { return _mm_ceil_pd(*value); }
160 |     KFR_SINTRIN f64sse trunc(f64sse value) { return KFR_mm_trunc_pd(*value); }
161 |     KFR_SINTRIN f64sse round(f64sse value) { return KFR_mm_roundnearest_pd(*value); }
162 |     KFR_SINTRIN f32sse fract(f32sse x) { return x - floor(x); }
163 |     KFR_SINTRIN f64sse fract(f64sse x) { return x - floor(x); }
164 | 
165 |     KFR_HANDLE_ALL(floor)
166 |     KFR_HANDLE_ALL(ceil)
167 |     KFR_HANDLE_ALL(round)
168 |     KFR_HANDLE_ALL(trunc)
169 |     KFR_HANDLE_ALL(fract)
170 |     KFR_HANDLE_SCALAR(floor)
171 |     KFR_HANDLE_SCALAR(ceil)
172 |     KFR_HANDLE_SCALAR(round)
173 |     KFR_HANDLE_SCALAR(trunc)
174 |     KFR_HANDLE_SCALAR(fract)
175 |     KFR_SPEC_FN(in_round, floor)
176 |     KFR_SPEC_FN(in_round, ceil)
177 |     KFR_SPEC_FN(in_round, round)
178 |     KFR_SPEC_FN(in_round, trunc)
179 |     KFR_SPEC_FN(in_round, fract)
180 | };
181 | 
182 | template <>
183 | struct in_round<cpu_t::avx1> : in_round<cpu_t::sse41>
184 | {
185 |     constexpr static cpu_t cpu = cpu_t::avx1;
186 |     using in_round<cpu_t::sse41>::floor;
187 |     using in_round<cpu_t::sse41>::ceil;
188 |     using in_round<cpu_t::sse41>::trunc;
189 |     using in_round<cpu_t::sse41>::round;
190 |     using in_round<cpu_t::sse41>::fract;
191 | 
192 |     KFR_SINTRIN f32avx floor(f32avx value) { return _mm256_floor_ps(*value); }
193 |     KFR_SINTRIN f32avx ceil(f32avx value) { return _mm256_ceil_ps(*value); }
194 |     KFR_SINTRIN f32avx trunc(f32avx value) { return KFR_mm256_trunc_ps(*value); }
195 |     KFR_SINTRIN f32avx round(f32avx value) { return KFR_mm256_roundnearest_ps(*value); }
196 |     KFR_SINTRIN f64avx floor(f64avx value) { return _mm256_floor_pd(*value); }
197 |     KFR_SINTRIN f64avx ceil(f64avx value) { return _mm256_ceil_pd(*value); }
198 |     KFR_SINTRIN f64avx trunc(f64avx value) { return KFR_mm256_trunc_pd(*value); }
199 |     KFR_SINTRIN f64avx round(f64avx value) { return KFR_mm256_roundnearest_pd(*value); }
200 |     KFR_SINTRIN f32avx fract(f32avx x) { return x - floor(x); }
201 |     KFR_SINTRIN f64avx fract(f64avx x) { return x - floor(x); }
202 | 
203 |     KFR_HANDLE_ALL(floor)
204 |     KFR_HANDLE_ALL(ceil)
205 |     KFR_HANDLE_ALL(round)
206 |     KFR_HANDLE_ALL(trunc)
207 |     KFR_HANDLE_ALL(fract)
208 |     KFR_HANDLE_SCALAR(floor)
209 |     KFR_HANDLE_SCALAR(ceil)
210 |     KFR_HANDLE_SCALAR(round)
211 |     KFR_HANDLE_SCALAR(trunc)
212 |     KFR_HANDLE_SCALAR(fract)
213 |     KFR_SPEC_FN(in_round, floor)
214 |     KFR_SPEC_FN(in_round, ceil)
215 |     KFR_SPEC_FN(in_round, round)
216 |     KFR_SPEC_FN(in_round, trunc)
217 |     KFR_SPEC_FN(in_round, fract)
218 | };
219 | 
220 | #undef KFR_mm_trunc_ps
221 | #undef KFR_mm_roundnearest_ps
222 | #undef KFR_mm_trunc_pd
223 | #undef KFR_mm_roundnearest_pd
224 | #undef KFR_mm_trunc_ss
225 | #undef KFR_mm_roundnearest_ss
226 | #undef KFR_mm_trunc_sd
227 | #undef KFR_mm_roundnearest_sd
228 | #undef KFR_mm_floor_ss
229 | #undef KFR_mm_floor_sd
230 | #undef KFR_mm_ceil_ss
231 | #undef KFR_mm_ceil_sd
232 | #undef KFR_mm256_trunc_ps
233 | #undef KFR_mm256_roundnearest_ps
234 | #undef KFR_mm256_trunc_pd
235 | #undef KFR_mm256_roundnearest_pd
236 | }
237 | 
238 | namespace native
239 | {
240 | using fn_floor = internal::in_round<>::fn_floor;
241 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
242 | KFR_INTRIN ftype<T1> floor(const T1& x)
243 | {
244 |     return internal::in_round<>::floor(x);
245 | }
246 | 
247 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
248 | KFR_INTRIN expr_func<fn_floor, E1> floor(E1&& x)
249 | {
250 |     return { fn_floor(), std::forward<E1>(x) };
251 | }
252 | 
253 | using fn_ceil = internal::in_round<>::fn_ceil;
254 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
255 | KFR_INTRIN ftype<T1> ceil(const T1& x)
256 | {
257 |     return internal::in_round<>::ceil(x);
258 | }
259 | 
260 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
261 | KFR_INTRIN expr_func<fn_ceil, E1> ceil(E1&& x)
262 | {
263 |     return { fn_ceil(), std::forward<E1>(x) };
264 | }
265 | 
266 | using fn_round = internal::in_round<>::fn_round;
267 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
268 | KFR_INTRIN ftype<T1> round(const T1& x)
269 | {
270 |     return internal::in_round<>::round(x);
271 | }
272 | 
273 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
274 | KFR_INTRIN expr_func<fn_round, E1> round(E1&& x)
275 | {
276 |     return { fn_round(), std::forward<E1>(x) };
277 | }
278 | 
279 | using fn_trunc = internal::in_round<>::fn_trunc;
280 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
281 | KFR_INTRIN ftype<T1> trunc(const T1& x)
282 | {
283 |     return internal::in_round<>::trunc(x);
284 | }
285 | 
286 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
287 | KFR_INTRIN expr_func<fn_trunc, E1> trunc(E1&& x)
288 | {
289 |     return { fn_trunc(), std::forward<E1>(x) };
290 | }
291 | 
292 | using fn_fract = internal::in_round<>::fn_fract;
293 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
294 | KFR_INTRIN ftype<T1> fract(const T1& x)
295 | {
296 |     return internal::in_round<>::fract(x);
297 | }
298 | 
299 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
300 | KFR_INTRIN expr_func<fn_fract, E1> fract(E1&& x)
301 | {
302 |     return { fn_fract(), std::forward<E1>(x) };
303 | }
304 | }
305 | }
306 | 


--------------------------------------------------------------------------------
/include/kfr/base/saturation.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "function.hpp"
 26 | #include "select.hpp"
 27 | 
 28 | #pragma clang diagnostic push
 29 | #if CID_HAS_WARNING("-Winaccessible-base")
 30 | #pragma clang diagnostic ignored "-Winaccessible-base"
 31 | #endif
 32 | 
 33 | namespace kfr
 34 | {
 35 | 
 36 | namespace internal
 37 | {
 38 | 
 39 | template <cpu_t c = cpu_t::native, cpu_t cc = c>
 40 | struct in_saturated : in_saturated<older(c), cc>
 41 | {
 42 |     struct fn_satadd : in_saturated<older(c), cc>::fn_satadd, fn_disabled
 43 |     {
 44 |     };
 45 | };
 46 | 
 47 | template <cpu_t cc>
 48 | struct in_saturated<cpu_t::sse2, cc> : in_select<cc>
 49 | {
 50 |     constexpr static cpu_t cpu = cpu_t::sse2;
 51 | 
 52 | private:
 53 |     using in_select<cc>::select;
 54 | 
 55 | public:
 56 |     KFR_SINTRIN u8sse satadd(u8sse x, u8sse y) { return _mm_adds_epu8(*x, *y); }
 57 |     KFR_SINTRIN i8sse satadd(i8sse x, i8sse y) { return _mm_adds_epi8(*x, *y); }
 58 |     KFR_SINTRIN u16sse satadd(u16sse x, u16sse y) { return _mm_adds_epu16(*x, *y); }
 59 |     KFR_SINTRIN i16sse satadd(i16sse x, i16sse y) { return _mm_adds_epi16(*x, *y); }
 60 | 
 61 |     KFR_SINTRIN u8sse satsub(u8sse x, u8sse y) { return _mm_subs_epu8(*x, *y); }
 62 |     KFR_SINTRIN i8sse satsub(i8sse x, i8sse y) { return _mm_subs_epi8(*x, *y); }
 63 |     KFR_SINTRIN u16sse satsub(u16sse x, u16sse y) { return _mm_subs_epu16(*x, *y); }
 64 |     KFR_SINTRIN i16sse satsub(i16sse x, i16sse y) { return _mm_subs_epi16(*x, *y); }
 65 | 
 66 |     KFR_SINTRIN i32sse satadd(i32sse a, i32sse b) { return saturated_signed_add(a, b); }
 67 |     KFR_SINTRIN i64sse satadd(i64sse a, i64sse b) { return saturated_signed_add(a, b); }
 68 |     KFR_SINTRIN u32sse satadd(u32sse a, u32sse b) { return saturated_unsigned_add(a, b); }
 69 |     KFR_SINTRIN u64sse satadd(u64sse a, u64sse b) { return saturated_unsigned_add(a, b); }
 70 | 
 71 |     KFR_SINTRIN i32sse satsub(i32sse a, i32sse b) { return saturated_signed_sub(a, b); }
 72 |     KFR_SINTRIN i64sse satsub(i64sse a, i64sse b) { return saturated_signed_sub(a, b); }
 73 |     KFR_SINTRIN u32sse satsub(u32sse a, u32sse b) { return saturated_unsigned_sub(a, b); }
 74 |     KFR_SINTRIN u64sse satsub(u64sse a, u64sse b) { return saturated_unsigned_sub(a, b); }
 75 | 
 76 | private:
 77 |     template <typename T, size_t N>
 78 |     KFR_SINTRIN vec<T, N> saturated_signed_add(vec<T, N> a, vec<T, N> b)
 79 |     {
 80 |         constexpr size_t shift = typebits<i32>::bits - 1;
 81 |         const vec<T, N> sum = a + b;
 82 |         a = (a >> shift) + allonesvector(a);
 83 | 
 84 |         return select(((a ^ b) | ~(b ^ sum)) >= 0, a, sum);
 85 |     }
 86 |     template <typename T, size_t N>
 87 |     KFR_SINTRIN vec<T, N> saturated_signed_sub(vec<T, N> a, vec<T, N> b)
 88 |     {
 89 |         constexpr size_t shift = typebits<i32>::bits - 1;
 90 |         const vec<T, N> diff = a - b;
 91 |         a = (a >> shift) + allonesvector(a);
 92 | 
 93 |         return select(((a ^ b) & (a ^ diff)) < 0, a, diff);
 94 |     }
 95 |     template <typename T, size_t N>
 96 |     KFR_SINTRIN vec<T, N> saturated_unsigned_add(vec<T, N> a, vec<T, N> b)
 97 |     {
 98 |         constexpr vec<T, N> t = allonesvector(a);
 99 |         return select(a > t - b, t, a + b);
100 |     }
101 |     template <typename T, size_t N>
102 |     KFR_SINTRIN vec<T, N> saturated_unsigned_sub(vec<T, N> a, vec<T, N> b)
103 |     {
104 |         return select(a < b, zerovector(a), a - b);
105 |     }
106 | 
107 | public:
108 |     KFR_HANDLE_ALL(satadd)
109 |     KFR_HANDLE_ALL(satsub)
110 |     KFR_SPEC_FN(in_saturated, satadd)
111 |     KFR_SPEC_FN(in_saturated, satsub)
112 | };
113 | 
114 | template <cpu_t cc>
115 | struct in_saturated<cpu_t::avx2, cc> : in_saturated<cpu_t::sse2, cc>
116 | {
117 |     constexpr static cpu_t cpu = cpu_t::avx2;
118 |     using in_saturated<cpu_t::sse2, cc>::satadd;
119 |     using in_saturated<cpu_t::sse2, cc>::satsub;
120 | 
121 |     KFR_SINTRIN u8avx satadd(u8avx x, u8avx y) { return _mm256_adds_epu8(*x, *y); }
122 |     KFR_SINTRIN i8avx satadd(i8avx x, i8avx y) { return _mm256_adds_epi8(*x, *y); }
123 |     KFR_SINTRIN u16avx satadd(u16avx x, u16avx y) { return _mm256_adds_epu16(*x, *y); }
124 |     KFR_SINTRIN i16avx satadd(i16avx x, i16avx y) { return _mm256_adds_epi16(*x, *y); }
125 | 
126 |     KFR_SINTRIN u8avx satsub(u8avx x, u8avx y) { return _mm256_subs_epu8(*x, *y); }
127 |     KFR_SINTRIN i8avx satsub(i8avx x, i8avx y) { return _mm256_subs_epi8(*x, *y); }
128 |     KFR_SINTRIN u16avx satsub(u16avx x, u16avx y) { return _mm256_subs_epu16(*x, *y); }
129 |     KFR_SINTRIN i16avx satsub(i16avx x, i16avx y) { return _mm256_subs_epi16(*x, *y); }
130 | 
131 |     KFR_HANDLE_ALL(satadd)
132 |     KFR_HANDLE_ALL(satsub)
133 |     KFR_SPEC_FN(in_saturated, satadd)
134 |     KFR_SPEC_FN(in_saturated, satsub)
135 | };
136 | }
137 | namespace native
138 | {
139 | using fn_satadd = internal::in_saturated<>::fn_satadd;
140 | template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
141 | KFR_INLINE ftype<common_type<T1, T2>> satadd(const T1& x, const T2& y)
142 | {
143 |     return internal::in_saturated<>::satadd(x, y);
144 | }
145 | 
146 | template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
147 | KFR_INLINE expr_func<fn_satadd, E1, E2> satadd(E1&& x, E2&& y)
148 | {
149 |     return { fn_satadd(), std::forward<E1>(x), std::forward<E2>(y) };
150 | }
151 | using fn_satsub = internal::in_saturated<>::fn_satsub;
152 | template <typename T1, typename T2, KFR_ENABLE_IF(is_numeric_args<T1, T2>::value)>
153 | KFR_INLINE ftype<common_type<T1, T2>> satsub(const T1& x, const T2& y)
154 | {
155 |     return internal::in_saturated<>::satsub(x, y);
156 | }
157 | 
158 | template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
159 | KFR_INLINE expr_func<fn_satsub, E1, E2> satsub(E1&& x, E2&& y)
160 | {
161 |     return { fn_satsub(), std::forward<E1>(x), std::forward<E2>(y) };
162 | }
163 | }
164 | }
165 | 
166 | #pragma clang diagnostic pop
167 | 


--------------------------------------------------------------------------------
/include/kfr/base/select.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "function.hpp"
 26 | 
 27 | namespace kfr
 28 | {
 29 | namespace internal
 30 | {
 31 | 
 32 | template <cpu_t c>
 33 | struct in_select_impl : in_select_impl<older(c)>
 34 | {
 35 |     struct fn_select : fn_disabled
 36 |     {
 37 |     };
 38 | };
 39 | 
 40 | template <>
 41 | struct in_select_impl<cpu_t::sse2>
 42 | {
 43 |     constexpr static cpu_t cur = cpu_t::sse2;
 44 | 
 45 |     template <typename T, size_t N>
 46 |     KFR_SINTRIN vec<T, N> select(vec<T, N> m, vec<T, N> x, vec<T, N> y)
 47 |     {
 48 |         return y ^ ((x ^ y) & m);
 49 |     }
 50 |     KFR_SPEC_FN(in_select_impl, select)
 51 | };
 52 | 
 53 | template <>
 54 | struct in_select_impl<cpu_t::sse41> : in_select_impl<cpu_t::sse2>
 55 | {
 56 |     constexpr static cpu_t cpu = cpu_t::sse41;
 57 | 
 58 |     KFR_CPU_INTRIN(sse41) u8sse select(u8sse m, u8sse x, u8sse y) { return _mm_blendv_epi8(*y, *x, *m); }
 59 |     KFR_CPU_INTRIN(sse41) u16sse select(u16sse m, u16sse x, u16sse y) { return _mm_blendv_epi8(*y, *x, *m); }
 60 |     KFR_CPU_INTRIN(sse41) u32sse select(u32sse m, u32sse x, u32sse y) { return _mm_blendv_epi8(*y, *x, *m); }
 61 |     KFR_CPU_INTRIN(sse41) u64sse select(u64sse m, u64sse x, u64sse y) { return _mm_blendv_epi8(*y, *x, *m); }
 62 |     KFR_CPU_INTRIN(sse41) i8sse select(i8sse m, i8sse x, i8sse y) { return _mm_blendv_epi8(*y, *x, *m); }
 63 |     KFR_CPU_INTRIN(sse41) i16sse select(i16sse m, i16sse x, i16sse y) { return _mm_blendv_epi8(*y, *x, *m); }
 64 |     KFR_CPU_INTRIN(sse41) i32sse select(i32sse m, i32sse x, i32sse y) { return _mm_blendv_epi8(*y, *x, *m); }
 65 |     KFR_CPU_INTRIN(sse41) i64sse select(i64sse m, i64sse x, i64sse y) { return _mm_blendv_epi8(*y, *x, *m); }
 66 |     KFR_CPU_INTRIN(sse41) f32sse select(f32sse m, f32sse x, f32sse y) { return _mm_blendv_ps(*y, *x, *m); }
 67 |     KFR_CPU_INTRIN(sse41) f64sse select(f64sse m, f64sse x, f64sse y) { return _mm_blendv_pd(*y, *x, *m); }
 68 | 
 69 |     KFR_HANDLE_ALL(select)
 70 |     KFR_SPEC_FN(in_select_impl, select)
 71 | };
 72 | 
 73 | template <>
 74 | struct in_select_impl<cpu_t::avx1> : in_select_impl<cpu_t::sse41>
 75 | {
 76 |     constexpr static cpu_t cpu = cpu_t::avx1;
 77 |     using in_select_impl<cpu_t::sse41>::select;
 78 | 
 79 |     KFR_CPU_INTRIN(avx) f64avx select(f64avx m, f64avx x, f64avx y) { return _mm256_blendv_pd(*y, *x, *m); }
 80 |     KFR_CPU_INTRIN(avx) f32avx select(f32avx m, f32avx x, f32avx y) { return _mm256_blendv_ps(*y, *x, *m); }
 81 | 
 82 |     KFR_HANDLE_ALL(select)
 83 |     KFR_SPEC_FN(in_select_impl, select)
 84 | };
 85 | 
 86 | template <>
 87 | struct in_select_impl<cpu_t::avx2> : in_select_impl<cpu_t::avx1>
 88 | {
 89 |     constexpr static cpu_t cpu = cpu_t::avx2;
 90 |     using in_select_impl<cpu_t::avx1>::select;
 91 | 
 92 |     KFR_CPU_INTRIN(avx2) u8avx select(u8avx m, u8avx x, u8avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
 93 |     KFR_CPU_INTRIN(avx2) u16avx select(u16avx m, u16avx x, u16avx y)
 94 |     {
 95 |         return _mm256_blendv_epi8(*y, *x, *m);
 96 |     }
 97 |     KFR_CPU_INTRIN(avx2) u32avx select(u32avx m, u32avx x, u32avx y)
 98 |     {
 99 |         return _mm256_blendv_epi8(*y, *x, *m);
100 |     }
101 |     KFR_CPU_INTRIN(avx2) u64avx select(u64avx m, u64avx x, u64avx y)
102 |     {
103 |         return _mm256_blendv_epi8(*y, *x, *m);
104 |     }
105 |     KFR_CPU_INTRIN(avx2) i8avx select(i8avx m, i8avx x, i8avx y) { return _mm256_blendv_epi8(*y, *x, *m); }
106 |     KFR_CPU_INTRIN(avx2) i16avx select(i16avx m, i16avx x, i16avx y)
107 |     {
108 |         return _mm256_blendv_epi8(*y, *x, *m);
109 |     }
110 |     KFR_CPU_INTRIN(avx2) i32avx select(i32avx m, i32avx x, i32avx y)
111 |     {
112 |         return _mm256_blendv_epi8(*y, *x, *m);
113 |     }
114 |     KFR_CPU_INTRIN(avx2) i64avx select(i64avx m, i64avx x, i64avx y)
115 |     {
116 |         return _mm256_blendv_epi8(*y, *x, *m);
117 |     }
118 | 
119 |     KFR_HANDLE_ALL(select)
120 |     KFR_SPEC_FN(in_select_impl, select)
121 | };
122 | 
123 | template <cpu_t c = cpu_t::native>
124 | struct in_select : in_select_impl<c>
125 | {
126 |     using in_select_impl<c>::select;
127 | 
128 |     template <typename T, size_t N, typename M>
129 |     KFR_SINTRIN vec<T, N> select(mask<M, N> m, vec<T, N> x, vec<T, N> y)
130 |     {
131 |         static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
132 |         return in_select_impl<c>::select(bitcast<T>(m), x, y);
133 |     }
134 |     template <typename T, size_t N, typename M>
135 |     KFR_SINTRIN vec<T, N> select(mask<M, N> m, mask<T, N> x, mask<T, N> y)
136 |     {
137 |         static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
138 |         return in_select_impl<c>::select(bitcast<T>(m), ref_cast<vec<T, N>>(x), ref_cast<vec<T, N>>(y));
139 |     }
140 | 
141 |     template <typename T, size_t N, typename M>
142 |     KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, T y)
143 |     {
144 |         static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
145 |         return in_select_impl<c>::select(bitcast<T>(m), broadcast<N>(x), broadcast<N>(y));
146 |     }
147 | 
148 |     template <typename T, size_t N, typename M>
149 |     KFR_SINTRIN vec<T, N> select(mask<M, N> m, vec<T, N> x, T y)
150 |     {
151 |         static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
152 |         return in_select_impl<c>::select(bitcast<T>(m), x, broadcast<N>(y));
153 |     }
154 | 
155 |     template <typename T, size_t N, typename M>
156 |     KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, vec<T, N> y)
157 |     {
158 |         static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
159 |         return in_select_impl<c>::select(bitcast<T>(m), broadcast<N>(x), y);
160 |     }
161 |     template <typename T, size_t N, typename M>
162 |     KFR_SINTRIN vec<T, N> select(mask<M, N> m, mask<T, N> x, T y)
163 |     {
164 |         static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
165 |         return in_select_impl<c>::select(bitcast<T>(m), ref_cast<vec<T, N>>(x), broadcast<N>(y));
166 |     }
167 | 
168 |     template <typename T, size_t N, typename M>
169 |     KFR_SINTRIN vec<T, N> select(mask<M, N> m, T x, mask<T, N> y)
170 |     {
171 |         static_assert(sizeof(M) == sizeof(T), "select: Incompatible types");
172 |         return in_select_impl<c>::select(m, broadcast<N>(x), ref_cast<vec<T, N>>(y));
173 |     }
174 |     KFR_SPEC_FN(in_select, select)
175 | 
176 |     template <typename T, size_t N>
177 |     KFR_SINTRIN vec<T, N> sign(vec<T, N> x)
178 |     {
179 |         return select(x > T(), T(1), select(x < T(), T(-1), T(0)));
180 |     }
181 | };
182 | }
183 | 
184 | namespace native
185 | {
186 | using fn_select = internal::in_select<>::fn_select;
187 | template <typename T1, typename T2, typename T3, KFR_ENABLE_IF(is_numeric_args<T1, T2, T3>::value)>
188 | KFR_INLINE ftype<common_type<T2, T3>> select(const T1& arg1, const T2& arg2, const T3& arg3)
189 | {
190 |     return internal::in_select<>::select(arg1, arg2, arg3);
191 | }
192 | template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
193 | KFR_INLINE expr_func<fn_select, E1, E2, E3> select(E1&& arg1, E2&& arg2, E3&& arg3)
194 | {
195 |     return { fn_select(), std::forward<E1>(arg1), std::forward<E2>(arg2), std::forward<E3>(arg3) };
196 | }
197 | }
198 | }
199 | 


--------------------------------------------------------------------------------
/include/kfr/base/sinh_cosh.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | #include "abs.hpp"
 25 | #include "constants.hpp"
 26 | #include "function.hpp"
 27 | #include "log_exp.hpp"
 28 | #include "min_max.hpp"
 29 | #include "operators.hpp"
 30 | #include "select.hpp"
 31 | 
 32 | namespace kfr
 33 | {
 34 | 
 35 | namespace internal
 36 | {
 37 | 
 38 | template <cpu_t c = cpu_t::native>
 39 | struct in_sinh_cosh : in_log_exp<c>
 40 | {
 41 |     constexpr static cpu_t cur = c;
 42 | 
 43 | private:
 44 |     using in_log_exp<c>::exp;
 45 | 
 46 | public:
 47 |     template <typename T, size_t N>
 48 |     KFR_SINTRIN vec<T, N> sinh(vec<T, N> x)
 49 |     {
 50 |         return (exp(x) - exp(-x)) * T(0.5);
 51 |     }
 52 | 
 53 |     template <typename T, size_t N>
 54 |     KFR_SINTRIN vec<T, N> cosh(vec<T, N> x)
 55 |     {
 56 |         return (exp(x) + exp(-x)) * T(0.5);
 57 |     }
 58 | 
 59 |     template <typename T, size_t N, KFR_ENABLE_IF(N > 1)>
 60 |     KFR_SINTRIN vec<T, N> sinhcosh(vec<T, N> x)
 61 |     {
 62 |         const vec<T, N> a = exp(x);
 63 |         const vec<T, N> b = exp(-x);
 64 |         return subadd(a, b) * T(0.5);
 65 |     }
 66 | 
 67 |     template <typename T, size_t N, KFR_ENABLE_IF(N > 1)>
 68 |     KFR_SINTRIN vec<T, N> coshsinh(vec<T, N> x)
 69 |     {
 70 |         const vec<T, N> a = exp(x);
 71 |         const vec<T, N> b = exp(-x);
 72 |         return addsub(a, b) * T(0.5);
 73 |     }
 74 |     KFR_SPEC_FN(in_sinh_cosh, sinh)
 75 |     KFR_SPEC_FN(in_sinh_cosh, cosh)
 76 |     KFR_SPEC_FN(in_sinh_cosh, sinhcosh)
 77 |     KFR_SPEC_FN(in_sinh_cosh, coshsinh)
 78 | };
 79 | }
 80 | 
 81 | namespace native
 82 | {
 83 | using fn_sinh = internal::in_sinh_cosh<>::fn_sinh;
 84 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
 85 | 
 86 | KFR_INTRIN ftype<T1> sinh(const T1& x)
 87 | {
 88 |     return internal::in_sinh_cosh<>::sinh(x);
 89 | }
 90 | 
 91 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
 92 | 
 93 | KFR_INTRIN expr_func<fn_sinh, E1> sinh(E1&& x)
 94 | {
 95 |     return { fn_sinh(), std::forward<E1>(x) };
 96 | }
 97 | 
 98 | using fn_cosh = internal::in_sinh_cosh<>::fn_cosh;
 99 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
100 | 
101 | KFR_INTRIN ftype<T1> cosh(const T1& x)
102 | {
103 |     return internal::in_sinh_cosh<>::cosh(x);
104 | }
105 | 
106 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
107 | 
108 | KFR_INTRIN expr_func<fn_cosh, E1> cosh(E1&& x)
109 | {
110 |     return { fn_cosh(), std::forward<E1>(x) };
111 | }
112 | 
113 | using fn_sinhcosh = internal::in_sinh_cosh<>::fn_sinhcosh;
114 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
115 | 
116 | KFR_INTRIN ftype<T1> sinhcosh(const T1& x)
117 | {
118 |     return internal::in_sinh_cosh<>::sinhcosh(x);
119 | }
120 | 
121 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
122 | 
123 | KFR_INTRIN expr_func<fn_sinhcosh, E1> sinhcosh(E1&& x)
124 | {
125 |     return { fn_sinhcosh(), std::forward<E1>(x) };
126 | }
127 | 
128 | using fn_coshsinh = internal::in_sinh_cosh<>::fn_coshsinh;
129 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
130 | 
131 | KFR_INTRIN ftype<T1> coshsinh(const T1& x)
132 | {
133 |     return internal::in_sinh_cosh<>::coshsinh(x);
134 | }
135 | 
136 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
137 | 
138 | KFR_INTRIN expr_func<fn_coshsinh, E1> coshsinh(E1&& x)
139 | {
140 |     return { fn_coshsinh(), std::forward<E1>(x) };
141 | }
142 | }
143 | }
144 | 


--------------------------------------------------------------------------------
/include/kfr/base/specializations.i:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  * 
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  * 
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  * 
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  */
 18 | #pragma once
 19 | 
 20 | #include "vec.hpp"
 21 | #ifndef KFR_SHUFFLE_SPECIALIZATIONS
 22 | #include "shuffle.hpp"
 23 | #endif
 24 | 
 25 | namespace kfr
 26 | {
 27 | namespace internal
 28 | {
 29 | template <>
 30 | inline vec<f32, 32> shufflevector<f32, 32>(
 31 |     csizes_t<0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14,
 32 |              15, 22, 23, 30, 31>,
 33 |     vec<f32, 32> x, vec<f32, 32>)
 34 | {
 35 |     f32x32 w = x;
 36 | 
 37 |     w = concat(permute<0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15>(low(w)),
 38 |                permute<0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15>(high(w)));
 39 | 
 40 |     w = permutegroups<(4), 0, 4, 2, 6, 1, 5, 3, 7>(w); // avx: vperm2f128 & vinsertf128, sse: no-op
 41 |     return w;
 42 | }
 43 | 
 44 | template <>
 45 | inline vec<f32, 32> shufflevector<f32, 32>(
 46 |     csizes_t<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18, 19, 10, 11, 26, 27, 6, 7, 22,
 47 |              23, 14, 15, 30, 31>,
 48 |     vec<f32, 32> x, vec<f32, 32>)
 49 | {
 50 |     f32x32 w = x;
 51 | 
 52 |     w = concat(permute<0, 1, 8, 9, 4, 5, 12, 13, /**/ 2, 3, 10, 11, 6, 7, 14, 15>(even<8>(w)),
 53 |                permute<0, 1, 8, 9, 4, 5, 12, 13, /**/ 2, 3, 10, 11, 6, 7, 14, 15>(odd<8>(w)));
 54 | 
 55 |     w = permutegroups<(4), 0, 4, 1, 5, 2, 6, 3, 7>(w); // avx: vperm2f128 & vinsertf128, sse: no-op
 56 |     return w;
 57 | }
 58 | 
 59 | inline vec<f32, 32> bitreverse_2(vec<f32, 32> x)
 60 | {
 61 |     return shufflevector<f32, 32>(csizes<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18,
 62 |                                          19, 10, 11, 26, 27, 6, 7, 22, 23, 14, 15, 30, 31>,
 63 |                                   x, x);
 64 | }
 65 | 
 66 | template <>
 67 | inline vec<f32, 64> shufflevector<f32, 64>(
 68 |     csizes_t<0, 1, 32, 33, 16, 17, 48, 49, 8, 9, 40, 41, 24, 25, 56, 57, 4, 5, 36, 37, 20, 21, 52, 53, 12, 13,
 69 |              44, 45, 28, 29, 60, 61, 2, 3, 34, 35, 18, 19, 50, 51, 10, 11, 42, 43, 26, 27, 58, 59, 6, 7, 38,
 70 |              39, 22, 23, 54, 55, 14, 15, 46, 47, 30, 31, 62, 63>,
 71 |     vec<f32, 64> x, vec<f32, 64>)
 72 | {
 73 |     x = concat(bitreverse_2(even<8>(x)), bitreverse_2(odd<8>(x)));
 74 |     return permutegroups<(8), 0, 4, 1, 5, 2, 6, 3, 7>(x);
 75 | }
 76 | 
 77 | template <>
 78 | inline vec<f32, 16> shufflevector<f32, 16>(csizes_t<0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15>,
 79 |                                            vec<f32, 16> x, vec<f32, 16>)
 80 | {
 81 | //    asm volatile("int $3");
 82 |     x = permutegroups<(4), 0, 2, 1, 3>(x);
 83 | 
 84 |     x = concat(shuffle<0, 2, 8 + 0, 8 + 2>(low(x), high(x)), shuffle<1, 3, 8 + 1, 8 + 3>(low(x), high(x)));
 85 | 
 86 |     return x;
 87 | }
 88 | 
 89 | template <>
 90 | inline vec<f32, 16> shufflevector<f32, 16>(csizes_t<0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15>,
 91 |                                            vec<f32, 16> x, vec<f32, 16>)
 92 | {
 93 |     x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x)));
 94 | 
 95 |     x = permutegroups<(4), 0, 2, 1, 3>(x);
 96 | 
 97 |     return x;
 98 | }
 99 | 
100 | template <>
101 | inline vec<f32, 32> shufflevector<f32, 32>(
102 |     csizes_t<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13,
103 |              29, 14, 30, 15, 31>,
104 |     vec<f32, 32> x, vec<f32, 32>)
105 | {
106 |     x = permutegroups<(8), 0, 2, 1, 3>(x);
107 | 
108 |     x = concat(interleavehalfs(low(x)), interleavehalfs(high(x)));
109 | 
110 |     return x;
111 | }
112 | }
113 | }
114 | 


--------------------------------------------------------------------------------
/include/kfr/base/sqrt.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | #pragma once
24 | 
25 | #include "function.hpp"
26 | 
27 | namespace kfr
28 | {
29 | 
30 | namespace internal
31 | {
32 | 
33 | template <cpu_t c = cpu_t::native>
34 | struct in_sqrt : in_sqrt<older(c)>
35 | {
36 |     struct fn_sqrt : fn_disabled
37 |     {
38 |     };
39 | };
40 | 
41 | template <>
42 | struct in_sqrt<cpu_t::sse2>
43 | {
44 |     constexpr static cpu_t cpu = cpu_t::sse2;
45 | 
46 |     KFR_SINTRIN f32sse sqrt(f32sse x) { return _mm_sqrt_ps(*x); }
47 |     KFR_SINTRIN f64sse sqrt(f64sse x) { return _mm_sqrt_pd(*x); }
48 | 
49 |     KFR_HANDLE_ALL(sqrt)
50 |     KFR_HANDLE_SCALAR(sqrt)
51 |     KFR_SPEC_FN(in_sqrt, sqrt)
52 | };
53 | 
54 | template <>
55 | struct in_sqrt<cpu_t::avx1> : in_sqrt<cpu_t::sse2>
56 | {
57 |     constexpr static cpu_t cpu = cpu_t::avx1;
58 |     using in_sqrt<cpu_t::sse2>::sqrt;
59 | 
60 |     KFR_SINTRIN f32avx KFR_USE_CPU(avx) sqrt(f32avx x) { return _mm256_sqrt_ps(*x); }
61 |     KFR_SINTRIN f64avx KFR_USE_CPU(avx) sqrt(f64avx x) { return _mm256_sqrt_pd(*x); }
62 | 
63 |     KFR_HANDLE_ALL(sqrt)
64 |     KFR_HANDLE_SCALAR(sqrt)
65 |     KFR_SPEC_FN(in_sqrt, sqrt)
66 | };
67 | }
68 | namespace native
69 | {
70 | using fn_sqrt = internal::in_sqrt<>::fn_sqrt;
71 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
72 | KFR_INTRIN ftype<T1> sqrt(const T1& x)
73 | {
74 |     return internal::in_sqrt<>::sqrt(x);
75 | }
76 | 
77 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
78 | KFR_INTRIN expr_func<fn_sqrt, E1> sqrt(E1&& x)
79 | {
80 |     return { fn_sqrt(), std::forward<E1>(x) };
81 | }
82 | }
83 | }
84 | 


--------------------------------------------------------------------------------
/include/kfr/base/tan.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | #include "abs.hpp"
 25 | #include "constants.hpp"
 26 | #include "function.hpp"
 27 | #include "operators.hpp"
 28 | #include "select.hpp"
 29 | #include "sin_cos.hpp"
 30 | 
 31 | #pragma clang diagnostic push
 32 | #if CID_HAS_WARNING("-Winaccessible-base")
 33 | #pragma clang diagnostic ignored "-Winaccessible-base"
 34 | #endif
 35 | #if CID_HAS_WARNING("-Wc99-extensions")
 36 | #pragma clang diagnostic ignored "-Wc99-extensions"
 37 | #endif
 38 | 
 39 | namespace kfr
 40 | {
 41 | 
 42 | namespace internal
 43 | {
 44 | 
 45 | template <cpu_t c = cpu_t::native, cpu_t cc = c>
 46 | struct in_tan : in_trig<cc>, in_select<cc>, in_round<cc>, in_abs<cc>
 47 | {
 48 | private:
 49 |     using in_abs<cc>::abs;
 50 |     using in_round<cc>::floor;
 51 |     using in_select<cc>::select;
 52 |     using in_trig<cc>::mask_horner;
 53 | 
 54 |     template <typename T, size_t N, typename IT = itype<T>>
 55 |     KFR_SINTRIN vec<T, N> trig_fold(vec<T, N> x_full, mask<T, N>& inverse)
 56 |     {
 57 |         constexpr T pi_14 = c_pi<T, 1, 4>;
 58 | 
 59 |         vec<T, N> y      = abs(x_full);
 60 |         vec<T, N> scaled = y / pi_14;
 61 | 
 62 |         vec<T, N> k_real = floor(scaled);
 63 |         vec<IT, N> k     = cast<IT>(k_real);
 64 | 
 65 |         vec<T, N> x = y - k_real * pi_14;
 66 | 
 67 |         mask<T, N> need_offset = (k & 1) != 0;
 68 |         x = select(need_offset, x - pi_14, x);
 69 | 
 70 |         vec<IT, N> k_mod4 = k & 3;
 71 |         inverse = (k_mod4 == 1) || (k_mod4 == 2);
 72 |         return x;
 73 |     }
 74 | 
 75 | public:
 76 |     template <size_t N>
 77 |     KFR_SINTRIN vec<f32, N> tan(vec<f32, N> x_full)
 78 |     {
 79 |         mask<f32, N> inverse;
 80 |         const vec<f32, N> x = trig_fold(x_full, inverse);
 81 | 
 82 |         constexpr f32 tan_c2  = 0x5.555378p-4;
 83 |         constexpr f32 tan_c4  = 0x2.225bb8p-4;
 84 |         constexpr f32 tan_c6  = 0xd.ac3fep-8;
 85 |         constexpr f32 tan_c8  = 0x6.41644p-8;
 86 |         constexpr f32 tan_c10 = 0xc.bfe7ep-12;
 87 |         constexpr f32 tan_c12 = 0x2.6754dp-8;
 88 | 
 89 |         constexpr f32 cot_c2  = -0x5.555558p-4;
 90 |         constexpr f32 cot_c4  = -0x5.b0581p-8;
 91 |         constexpr f32 cot_c6  = -0x8.ac5ccp-12;
 92 |         constexpr f32 cot_c8  = -0xd.aaa01p-16;
 93 |         constexpr f32 cot_c10 = -0x1.a9a9b4p-16;
 94 |         constexpr f32 cot_c12 = -0x6.f7d4dp-24;
 95 | 
 96 |         const vec<f32, N> x2  = x * x;
 97 |         const vec<f32, N> val = mask_horner(x2, inverse, 1.0f, 1.0f, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6,
 98 |                                             tan_c6, cot_c8, tan_c8, cot_c10, tan_c10, cot_c12, tan_c12);
 99 | 
100 |         const vec<f32, N> z = select(inverse, val / -x, val * x);
101 |         return mulsign(z, x_full);
102 |     }
103 | 
104 |     template <size_t N>
105 |     KFR_SINTRIN vec<f64, N> tan(vec<f64, N> x_full)
106 |     {
107 |         mask<f64, N> inverse;
108 |         const vec<f64, N> x = trig_fold(x_full, inverse);
109 | 
110 |         constexpr f64 tan_c2  = 0x5.5555554d8e5b8p-4;
111 |         constexpr f64 tan_c4  = 0x2.222224820264p-4;
112 |         constexpr f64 tan_c6  = 0xd.d0d90de32b3e8p-8;
113 |         constexpr f64 tan_c8  = 0x5.99723bdcf5cacp-8;
114 |         constexpr f64 tan_c10 = 0x2.434a142e413ap-8;
115 |         constexpr f64 tan_c12 = 0xf.2b59061305efp-12;
116 |         constexpr f64 tan_c14 = 0x4.a12565071a664p-12;
117 |         constexpr f64 tan_c16 = 0x4.dada3797ac1bcp-12;
118 |         constexpr f64 tan_c18 = -0x1.a74976b6ea3f3p-12;
119 |         constexpr f64 tan_c20 = 0x1.d06a5ae5e4a74p-12;
120 | 
121 |         constexpr f64 cot_c2  = -0x5.5555555555554p-4;
122 |         constexpr f64 cot_c4  = -0x5.b05b05b05b758p-8;
123 |         constexpr f64 cot_c6  = -0x8.ab355dffc79a8p-12;
124 |         constexpr f64 cot_c8  = -0xd.debbca405c9f8p-16;
125 |         constexpr f64 cot_c10 = -0x1.66a8edb99b15p-16;
126 |         constexpr f64 cot_c12 = -0x2.450239be0ee92p-20;
127 |         constexpr f64 cot_c14 = -0x3.ad6ddb4719438p-24;
128 |         constexpr f64 cot_c16 = -0x5.ff4c42741356p-28;
129 |         constexpr f64 cot_c18 = -0x9.06881bcdf3108p-32;
130 |         constexpr f64 cot_c20 = -0x1.644abedc113cap-32;
131 | 
132 |         const vec<f64, N> x2 = x * x;
133 |         const vec<f64, N> val =
134 |             mask_horner(x2, inverse, 1.0, 1.0, cot_c2, tan_c2, cot_c4, tan_c4, cot_c6, tan_c6, cot_c8, tan_c8,
135 |                         cot_c10, tan_c10, cot_c12, tan_c12, cot_c14, tan_c14, cot_c16, tan_c16, cot_c18,
136 |                         tan_c18, cot_c20, tan_c20);
137 | 
138 |         const vec<f64, N> z = select(inverse, val / -x, val * x);
139 |         return mulsign(z, x_full);
140 |     }
141 |     template <typename T>
142 |     KFR_SINTRIN T tandeg(const T& x)
143 |     {
144 |         return tan(x * c_degtorad<T>);
145 |     }
146 | 
147 |     KFR_HANDLE_SCALAR(tan)
148 |     KFR_SPEC_FN(in_tan, tan)
149 |     KFR_SPEC_FN(in_tan, tandeg)
150 | };
151 | }
152 | 
153 | namespace native
154 | {
155 | using fn_tan = internal::in_tan<>::fn_tan;
156 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
157 | KFR_INTRIN ftype<T1> tan(const T1& x)
158 | {
159 |     return internal::in_tan<>::tan(x);
160 | }
161 | 
162 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
163 | KFR_INTRIN expr_func<fn_tan, E1> tan(E1&& x)
164 | {
165 |     return { fn_tan(), std::forward<E1>(x) };
166 | }
167 | 
168 | using fn_tandeg = internal::in_tan<>::fn_tandeg;
169 | template <typename T1, KFR_ENABLE_IF(is_numeric<T1>::value)>
170 | KFR_INTRIN ftype<T1> tandeg(const T1& x)
171 | {
172 |     return internal::in_tan<>::tandeg(x);
173 | }
174 | 
175 | template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
176 | KFR_INTRIN expr_func<fn_tandeg, E1> tandeg(E1&& x)
177 | {
178 |     return { fn_tandeg(), std::forward<E1>(x) };
179 | }
180 | }
181 | }
182 | 
183 | #pragma clang diagnostic pop
184 | 


--------------------------------------------------------------------------------
/include/kfr/cident.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #if defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__x86_64__)
  4 | #define CID_ARCH_X86 1
  5 | #endif
  6 | 
  7 | #ifdef CID_ARCH_X86
  8 | #if defined(_M_X64) || defined(__x86_64__)
  9 | #define CID_ARCH_X64 1
 10 | #else
 11 | #define CID_ARCH_X32 1
 12 | #endif
 13 | 
 14 | #if defined __AVX512F__ && !defined CID_ARCH_AVX512
 15 | #define CID_ARCH_AVX512 1
 16 | #define CID_ARCH_AVX2 1
 17 | #define CID_ARCH_AVX 1
 18 | #define CID_ARCH_SSE42 1
 19 | #define CID_ARCH_SSE41 1
 20 | #define CID_ARCH_SSSE3 1
 21 | #define CID_ARCH_SSE3 1
 22 | #define CID_ARCH_SSE2 1
 23 | #define CID_ARCH_SSE 1
 24 | #endif
 25 | #if defined __AVX2__ && !defined CID_ARCH_AVX2
 26 | #define CID_ARCH_AVX2 1
 27 | #define CID_ARCH_AVX 1
 28 | #define CID_ARCH_SSE42 1
 29 | #define CID_ARCH_SSE41 1
 30 | #define CID_ARCH_SSSE3 1
 31 | #define CID_ARCH_SSE3 1
 32 | #define CID_ARCH_SSE2 1
 33 | #define CID_ARCH_SSE 1
 34 | #endif
 35 | #if defined __AVX__ && !defined CID_ARCH_AVX
 36 | #define CID_ARCH_AVX 1
 37 | #define CID_ARCH_SSE42 1
 38 | #define CID_ARCH_SSE41 1
 39 | #define CID_ARCH_SSSE3 1
 40 | #define CID_ARCH_SSE3 1
 41 | #define CID_ARCH_SSE2 1
 42 | #define CID_ARCH_SSE 1
 43 | #endif
 44 | #if defined __SSE4_2__ && !defined CID_ARCH_SSE4_2
 45 | #define CID_ARCH_SSE4_2 1
 46 | #define CID_ARCH_SSE41 1
 47 | #define CID_ARCH_SSSE3 1
 48 | #define CID_ARCH_SSE3 1
 49 | #define CID_ARCH_SSE2 1
 50 | #define CID_ARCH_SSE 1
 51 | #endif
 52 | #if defined __SSE4_1__ && !defined CID_ARCH_SSE4_1
 53 | #define CID_ARCH_SSE4_1 1
 54 | #define CID_ARCH_SSSE3 1
 55 | #define CID_ARCH_SSE3 1
 56 | #define CID_ARCH_SSE2 1
 57 | #define CID_ARCH_SSE 1
 58 | #endif
 59 | #if defined __SSSE3__ && !defined CID_ARCH_SSSE3
 60 | #define CID_ARCH_SSSE3 1
 61 | #define CID_ARCH_SSE3 1
 62 | #define CID_ARCH_SSE2 1
 63 | #define CID_ARCH_SSE 1
 64 | #endif
 65 | #if defined __SSE3__ && !defined CID_ARCH_SSE3
 66 | #define CID_ARCH_SSE3 1
 67 | #define CID_ARCH_SSE2 1
 68 | #define CID_ARCH_SSE 1
 69 | #endif
 70 | #if (defined CID_ARCH_X64 || defined __SSE2__) && !defined CID_ARCH_SSE2
 71 | #define CID_ARCH_SSE2 1
 72 | #define CID_ARCH_SSE 1
 73 | #endif
 74 | 
 75 | #if (defined CID_ARCH_X64 || defined __SSE__) && !defined CID_ARCH_SSE1
 76 | #define CID_ARCH_SSE 1
 77 | #endif
 78 | 
 79 | #if defined __FMA__ && !defined CID_ARCH_FMA
 80 | #define CID_ARCH_FMA 1
 81 | #endif
 82 | 
 83 | #if defined __AES__ && !defined CID_ARCH_AES
 84 | #define CID_ARCH_AES 1
 85 | #endif
 86 | 
 87 | #if defined __BMI__ && !defined CID_ARCH_BMI
 88 | #define CID_ARCH_BMI 1
 89 | #endif
 90 | 
 91 | #if defined __BMI2__ && !defined CID_ARCH_BMI2
 92 | #define CID_ARCH_BMI2 1
 93 | #endif
 94 | 
 95 | #if defined __LZCNT__ && !defined CID_ARCH_LZCNT
 96 | #define CID_ARCH_LZCNT 1
 97 | #endif
 98 | 
 99 | #if defined CID_ARCH_AVX512
100 | #define CID_ARCH_NAME avx512
101 | #elif defined CID_ARCH_AVX2
102 | #define CID_ARCH_NAME avx2
103 | #elif defined CID_ARCH_AVX
104 | #define CID_ARCH_NAME avx
105 | #elif defined CID_ARCH_SSE4_1
106 | #define CID_ARCH_NAME sse41
107 | #elif defined CID_ARCH_SSSE3
108 | #define CID_ARCH_NAME ssse3
109 | #elif defined CID_ARCH_SSE3
110 | #define CID_ARCH_NAME sse3
111 | #elif defined CID_ARCH_SSE2
112 | #define CID_ARCH_NAME sse2
113 | #elif defined CID_ARCH_SSE
114 | #define CID_ARCH_NAME sse
115 | #else
116 | #define CID_ARCH_NAME legacy
117 | #endif
118 | 
119 | #endif
120 | 
121 | #define CID_STRINGIFY2(x) #x
122 | #define CID_STRINGIFY(x) CID_STRINGIFY2(x)
123 | 
124 | #if defined(_WIN32) // Windows
125 | #define CID_OS_WIN 1
126 | #endif
127 | 
128 | #if defined(__APPLE__)
129 | #include "TargetConditionals.h"
130 | #ifdef TARGET_OS_IPHONE
131 | #define CID_OS_IOS 1
132 | #define CID_OS_MOBILE 1
133 | #elif TARGET_IPHONE_SIMULATOR
134 | #define CID_OS_IOS 1
135 | #define CID_OS_IOS_SIMULATOR 1
136 | #define CID_OS_MOBILE 1
137 | #elif TARGET_OS_MAC
138 | #define CID_OS_MAC 1
139 | #define CID_OS_MACOS 1
140 | #define CID_OS_OSX 1
141 | #endif
142 | #define CID_OS_POSIX 1
143 | #endif
144 | 
145 | #if defined(__ANDROID__)
146 | #define CID_OS_ANDROID 1
147 | #define CID_OS_MOBILE 1
148 | #define CID_OS_POSIX 1
149 | #endif
150 | 
151 | #if defined(__linux__)
152 | #define CID_OS_LINUX 1
153 | #define CID_OS_POSIX 1
154 | #endif
155 | 
156 | #if defined(_MSC_VER) // Visual C/C++
157 | #define CID_COMPILER_MSVC 1
158 | #define CID_MSVC_ATTRIBUTES 1
159 | #define CID_MSC_VER _MSC_VER
160 | #else
161 | #define CID_MSC_VER 0
162 | #endif
163 | 
164 | #if defined(__GNUC__) || defined(__clang__) // GCC, Clang
165 | #define CID_COMPILER_GNU 1
166 | #define CID_GNU_ATTRIBUTES 1
167 | #define CID_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
168 | #if __cplusplus >= 201103L || defined __GXX_EXPERIMENTAL_CXX0X__
169 | #define CID_HAS_GXX_CXX11 1
170 | #endif
171 | #else
172 | #define CID_GCC_VERSION 0
173 | #endif
174 | 
175 | #if defined(__INTEL_COMPILER) // Intel Compiler
176 | #define CID_COMPILER_INTEL 1
177 | #define CID_ICC_VERSION __INTEL_COMPILER
178 | #elif defined(__ICL)
179 | #define CID_COMPILER_INTEL 1
180 | #define CID_ICC_VERSION __ICL
181 | #else
182 | #define CID_ICC_VERSION 0
183 | #endif
184 | 
185 | #if defined(__clang__) // Clang
186 | #define CID_COMPILER_CLANG 1
187 | #ifndef CID_GNU_ATTRIBUTES
188 | #define CID_GNU_ATTRIBUTES 1
189 | #endif
190 | #endif
191 | 
192 | #if defined(CID_GNU_ATTRIBUTES)
193 | 
194 | #define CID_NODEBUG
195 | // __attribute__((__nodebug__))
196 | #define CID_INLINE __inline__ __attribute__((__always_inline__))
197 | #define CID_INTRIN CID_INLINE CID_NODEBUG
198 | #define CID_INLINE_MEMBER __attribute__((__always_inline__))
199 | #define CID_INLINE_LAMBDA CID_INLINE_MEMBER
200 | #define CID_NOINLINE __attribute__((__noinline__))
201 | #define CID_FLATTEN __attribute__((__flatten__))
202 | #define CID_RESTRICT __restrict__
203 | 
204 | #elif defined(CID_MSVC_ATTRIBUTES)
205 | 
206 | #define CID_NODEBUG
207 | #define CID_INLINE inline __forceinline
208 | #define CID_INTRIN CID_INLINE CID_NODEBUG
209 | #define CID_INLINE_MEMBER __forceinline
210 | #define CID_INLINE_LAMBDA
211 | #define CID_NOINLINE __declspec(noinline)
212 | #define CID_FLATTEN
213 | #define CID_RESTRICT __restrict
214 | 
215 | #endif
216 | 
217 | #define CID_INLINE_STATIC CID_INLINE static
218 | 
219 | #define CID_EXTERN_C extern "C"
220 | 
221 | #define CID_PUBLIC_C CID_EXTERN_C CID_NOINLINE
222 | 
223 | #define CID_ALWAYS_INLINE_STATIC CID_ALWAYS_INLINE static
224 | 
225 | #ifdef CID_OS_WIN
226 | #define CID_CDECL __cdecl
227 | #else
228 | #define CID_CDECL __attribute__((cdecl))
229 | #endif
230 | 
231 | #ifdef CID_OS_WIN
232 | #if defined(CID_MSVC_ATTRIBUTES)
233 | #define CID_DLL_EXPORT __declspec(dllexport)
234 | #define CID_DLL_IMPORT __declspec(dllimport)
235 | #else
236 | #define CID_DLL_EXPORT __attribute__((dllexport))
237 | #define CID_DLL_IMPORT __attribute__((dllimport))
238 | #endif
239 | #else
240 | #define CID_DLL_EXPORT
241 | #define CID_DLL_IMPORT
242 | #endif
243 | 
244 | #ifdef __has_builtin
245 | #define CID_HAS_BUILTIN(builtin) __has_builtin(builtin)
246 | #else
247 | #define CID_HAS_BUILTIN(builtin) 0
248 | #endif
249 | 
250 | #ifdef __has_feature
251 | #define CID_HAS_FEATURE(feature) __has_feature(feature)
252 | #else
253 | #define CID_HAS_FEATURE(feature) 0
254 | #endif
255 | 
256 | #ifdef __has_extension
257 | #define CID_HAS_EXTENSION(extension) __has_extension(extension)
258 | #else
259 | #define CID_HAS_EXTENSION(extension) 0
260 | #endif
261 | 
262 | #ifdef __has_attribute
263 | #define CID_HAS_ATTRIBUTE(attribute) __has_attribute(attribute)
264 | #else
265 | #define CID_HAS_ATTRIBUTE(attribute) 0
266 | #endif
267 | 
268 | #ifdef __has_warning
269 | #define CID_HAS_WARNING(warning) __has_warning(warning)
270 | #else
271 | #define CID_HAS_WARNING(warning) 0
272 | #endif
273 | 
274 | #define CID_HAS_VARIADIC_TEMPLATES                                                                           \
275 |     (CID_HAS_FEATURE(cxx_variadic_templates) || (CID_GCC_VERSION >= 404 && CID_HAS_GXX_CXX11) ||             \
276 |      CID_MSC_VER >= 1800)
277 | 
278 | #ifdef CID_BUILDING_DLL
279 | #define CID_C_API CID_DLL_EXPORT
280 | #else
281 | #define CID_C_API CID_DLL_IMPORT
282 | #endif
283 | 
284 | #if __cplusplus >= 201103L || CID_MSC_VER >= 1900 || CID_HAS_FEATURE(cxx_constexpr)
285 | #define CID_HAS_CONSTEXPR 1
286 | #endif
287 | 
288 | #if __cpp_constexpr >= 201304 || CID_HAS_FEATURE(cxx_constexpr)
289 | #define CID_HAS_FULL_CONSTEXPR 1
290 | #endif
291 | 
292 | #if CID_HAS_CONSTEXPR
293 | #define CID_CONSTEXPR constexpr
294 | #else
295 | #define CID_CONSTEXPR
296 | #endif
297 | 
298 | #if CID_HAS_FEATURE(cxx_noexcept) || (CID_GCC_VERSION >= 408 && CID_HAS_GXX_CXX11) || CID_MSC_VER >= 1900
299 | #define CID_HAS_NOEXCEPT 1
300 | #endif
301 | 
302 | #if CID_HAS_NOEXCEPT
303 | #define CID_NOEXCEPT noexcept
304 | #else
305 | #define CID_NOEXCEPT
306 | #endif
307 | 
308 | #if CID_COMPILER_GNU && !defined(__EXCEPTIONS)
309 | #define CID_HAS_EXCEPTIONS 0
310 | #endif
311 | #if CID_COMPILER_MSVC && !_HAS_EXCEPTIONS
312 | #define CID_HAS_EXCEPTIONS 0
313 | #endif
314 | 
315 | #ifndef CID_HAS_EXCEPTIONS
316 | #define CID_HAS_EXCEPTIONS 1
317 | #endif
318 | 
319 | #include <assert.h>
320 | 
321 | #ifndef CID_THROW
322 | #if CID_HAS_EXCEPTIONS
323 | #define CID_THROW(x) throw x
324 | #else
325 | #define CID_THROW(x) assert(false)
326 | #endif
327 | #endif
328 | 
329 | #if __cplusplus >= 201103L || CID_MSC_VER >= 1900 || CID_HAS_FEATURE(cxx_constexpr)
330 | 
331 | #include <cstdint>
332 | namespace cid
333 | {
334 | template <typename T, size_t N>
335 | constexpr inline static size_t arraysize(const T (&)[N]) noexcept
336 | {
337 |     return N;
338 | }
339 | }
340 | 
341 | #define CID_ARRAYSIZE(arr) ::cid::arraysize(arr)
342 | #elif CID_COMPILER_MSVC
343 | #define CID_ARRAYSIZE(arr) _countof(arr)
344 | #elif __cplusplus >= 199711L &&                                                                              \
345 |     (defined(__INTEL_COMPILER) || defined(__clang__) ||                                                      \
346 |      (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))))
347 | template <typename T, size_t N>
348 | char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(T (&)[N]))[N];
349 | #define CID_ARRAYSIZE(x) sizeof(COUNTOF_REQUIRES_ARRAY_ARGUMENT(x))
350 | #else
351 | #define CID_ARRAYSIZE(arr) sizeof(arr) / sizeof(arr[0])
352 | #endif
353 | 
354 | #ifdef CID_COMPILER_MSVC
355 | #define CID_FUNC_SIGNATURE __FUNCSIG__
356 | #else
357 | #define CID_FUNC_SIGNATURE __PRETTY_FUNCTION__
358 | #endif
359 | 


--------------------------------------------------------------------------------
/include/kfr/dft/conv.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | #pragma once
24 | 
25 | #include "../base/complex.hpp"
26 | #include "../base/constants.hpp"
27 | #include "../base/memory.hpp"
28 | #include "../base/read_write.hpp"
29 | #include "../base/vec.hpp"
30 | #include "../expressions/operators.hpp"
31 | 
32 | #include "fft.hpp"
33 | 
34 | #pragma clang diagnostic push
35 | #if CID_HAS_WARNING("-Wshadow")
36 | #pragma clang diagnostic ignored "-Wshadow"
37 | #endif
38 | 
39 | namespace kfr
40 | {
41 | 
42 | template <typename T, size_t Tag1, size_t Tag2>
43 | KFR_INTRIN univector<T> convolve(const univector<T, Tag1>& src1, const univector<T, Tag2>& src2)
44 | {
45 |     const size_t size                = next_poweroftwo(src1.size() + src2.size() - 1);
46 |     univector<complex<T>> src1padded = src1;
47 |     univector<complex<T>> src2padded = src2;
48 |     src1padded.resize(size, 0);
49 |     src2padded.resize(size, 0);
50 |     dft_plan<T> plan(size);
51 |     univector<u8> temp(plan.temp_size);
52 |     plan.execute(src1padded, src1padded, temp);
53 |     plan.execute(src2padded, src2padded, temp);
54 |     src1padded = src1padded * src2padded;
55 |     plan.execute(src1padded, src1padded, temp, true);
56 |     return typed<T>(real(src1padded), src1.size() + src2.size() - 1) / T(size);
57 | }
58 | }
59 | #pragma clang diagnostic pop
60 | 


--------------------------------------------------------------------------------
/include/kfr/dft/reference_dft.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "../base/complex.hpp"
 26 | #include "../base/constants.hpp"
 27 | #include "../base/memory.hpp"
 28 | #include "../base/read_write.hpp"
 29 | #include "../base/vec.hpp"
 30 | #include "../misc/small_buffer.hpp"
 31 | #include <cmath>
 32 | 
 33 | namespace kfr
 34 | {
 35 | 
 36 | template <typename Tnumber = long double>
 37 | void reference_fft_pass(Tnumber pi2, size_t N, size_t offset, size_t delta, int flag, Tnumber (*x)[2],
 38 |                         Tnumber (*X)[2], Tnumber (*XX)[2])
 39 | {
 40 |     const size_t N2 = N / 2;
 41 |     using std::sin;
 42 |     using std::cos;
 43 | 
 44 |     if (N != 2)
 45 |     {
 46 |         reference_fft_pass(pi2, N2, offset, 2 * delta, flag, x, XX, X);
 47 |         reference_fft_pass(pi2, N2, offset + delta, 2 * delta, flag, x, XX, X);
 48 | 
 49 |         for (size_t k = 0; k < N2; k++)
 50 |         {
 51 |             const size_t k00   = offset + k * delta;
 52 |             const size_t k01   = k00 + N2 * delta;
 53 |             const size_t k10   = offset + 2 * k * delta;
 54 |             const size_t k11   = k10 + delta;
 55 |             const Tnumber m    = static_cast<Tnumber>(k) / N;
 56 |             const Tnumber cs   = cos(pi2 * m);
 57 |             const Tnumber sn   = flag * sin(pi2 * m);
 58 |             const Tnumber tmp0 = cs * XX[k11][0] + sn * XX[k11][1];
 59 |             const Tnumber tmp1 = cs * XX[k11][1] - sn * XX[k11][0];
 60 |             X[k01][0]          = XX[k10][0] - tmp0;
 61 |             X[k01][1]          = XX[k10][1] - tmp1;
 62 |             X[k00][0]          = XX[k10][0] + tmp0;
 63 |             X[k00][1]          = XX[k10][1] + tmp1;
 64 |         }
 65 |     }
 66 |     else
 67 |     {
 68 |         const size_t k00 = offset;
 69 |         const size_t k01 = k00 + delta;
 70 |         X[k01][0]        = x[k00][0] - x[k01][0];
 71 |         X[k01][1]        = x[k00][1] - x[k01][1];
 72 |         X[k00][0]        = x[k00][0] + x[k01][0];
 73 |         X[k00][1]        = x[k00][1] + x[k01][1];
 74 |     }
 75 | }
 76 | 
 77 | template <typename Tnumber = long double, typename T>
 78 | void reference_fft(complex<T>* out, const complex<T>* in, size_t size, bool inversion = false)
 79 | {
 80 |     using Tcmplx = Tnumber(*)[2];
 81 |     if (size < 2)
 82 |         return;
 83 |     std::vector<complex<Tnumber>> datain(size);
 84 |     std::vector<complex<Tnumber>> dataout(size);
 85 |     std::vector<complex<Tnumber>> temp(size);
 86 |     std::copy(in, in + size, datain.begin());
 87 |     const Tnumber pi2 = c_pi<Tnumber, 2, 1>;
 88 |     reference_fft_pass<Tnumber>(pi2, size, 0, 1, inversion ? -1 : +1, Tcmplx(datain.data()),
 89 |                                 Tcmplx(dataout.data()), Tcmplx(temp.data()));
 90 |     std::copy(dataout.begin(), dataout.end(), out);
 91 | }
 92 | 
 93 | template <typename Tnumber = long double, typename T>
 94 | void reference_dft(complex<T>* out, const complex<T>* in, size_t size, bool inversion = false)
 95 | {
 96 |     using std::sin;
 97 |     using std::cos;
 98 |     if (is_poweroftwo(size))
 99 |     {
100 |         return reference_fft<Tnumber>(out, in, size, inversion);
101 |     }
102 |     constexpr Tnumber pi2 = c_pi<Tnumber, 2>;
103 |     if (size < 2)
104 |         return;
105 |     std::vector<complex<T>> datain;
106 |     if (out == in)
107 |     {
108 |         datain.resize(size);
109 |         std::copy_n(in, size, datain.begin());
110 |         in = datain.data();
111 |     }
112 |     {
113 |         Tnumber sumr = 0;
114 |         Tnumber sumi = 0;
115 |         for (size_t j = 0; j < size; j++)
116 |         {
117 |             sumr += static_cast<Tnumber>(in[j].real());
118 |             sumi += static_cast<Tnumber>(in[j].imag());
119 |         }
120 |         out[0] = { static_cast<T>(sumr), static_cast<T>(sumi) };
121 |     }
122 |     for (size_t i = 1; i < size; i++)
123 |     {
124 |         Tnumber sumr = static_cast<Tnumber>(in[0].real());
125 |         Tnumber sumi = static_cast<Tnumber>(in[0].imag());
126 | 
127 |         for (size_t j = 1; j < size; j++)
128 |         {
129 |             const Tnumber x = pi2 * ((i * j) % size) / size;
130 |             Tnumber twr     = cos(x);
131 |             Tnumber twi     = sin(x);
132 |             if (inversion)
133 |                 twi = -twi;
134 | 
135 |             sumr += twr * static_cast<Tnumber>(in[j].real()) + twi * static_cast<Tnumber>(in[j].imag());
136 |             sumi += twr * static_cast<Tnumber>(in[j].imag()) - twi * static_cast<Tnumber>(in[j].real());
137 |             out[i] = { static_cast<T>(sumr), static_cast<T>(sumi) };
138 |         }
139 |     }
140 | }
141 | }
142 | 


--------------------------------------------------------------------------------
/include/kfr/dispatch/cpuid.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "../base/types.hpp"
 26 | #include <cstring>
 27 | 
 28 | namespace kfr
 29 | {
 30 | 
 31 | struct cpu_features
 32 | {
 33 |     u32 max;
 34 |     u32 exmax;
 35 |     u32 isIntel : 1;
 36 |     u32 isAMD : 1;
 37 |     u32 has3DNOW : 1;
 38 |     u32 has3DNOWEXT : 1;
 39 |     u32 hasABM : 1;
 40 |     u32 hasADX : 1;
 41 |     u32 hasAES : 1;
 42 |     u32 hasAVX : 1;
 43 |     u32 hasAVX2 : 1;
 44 |     u32 hasAVXOSSUPPORT : 1;
 45 |     u32 hasAVX512OSSUPPORT : 1;
 46 |     u32 hasAVX512CD : 1;
 47 |     u32 hasAVX512ER : 1;
 48 |     u32 hasAVX512F : 1;
 49 |     u32 hasAVX512DQ : 1;
 50 |     u32 hasAVX512PF : 1;
 51 |     u32 hasAVX512BW : 1;
 52 |     u32 hasBMI1 : 1;
 53 |     u32 hasBMI2 : 1;
 54 |     u32 hasCLFSH : 1;
 55 |     u32 hasCMOV : 1;
 56 |     u32 hasCMPXCHG16B : 1;
 57 |     u32 hasCX8 : 1;
 58 |     u32 hasERMS : 1;
 59 |     u32 hasF16C : 1;
 60 |     u32 hasFMA : 1;
 61 |     u32 hasFSGSBASE : 1;
 62 |     u32 hasFXSR : 1;
 63 |     u32 hasHLE : 1;
 64 |     u32 hasINVPCID : 1;
 65 |     u32 hasLAHF : 1;
 66 |     u32 hasLZCNT : 1;
 67 |     u32 hasMMX : 1;
 68 |     u32 hasMMXEXT : 1;
 69 |     u32 hasMONITOR : 1;
 70 |     u32 hasMOVBE : 1;
 71 |     u32 hasMSR : 1;
 72 |     u32 hasOSXSAVE : 1;
 73 |     u32 hasPCLMULQDQ : 1;
 74 |     u32 hasPOPCNT : 1;
 75 |     u32 hasPREFETCHWT1 : 1;
 76 |     u32 hasRDRAND : 1;
 77 |     u32 hasRDSEED : 1;
 78 |     u32 hasRDTSCP : 1;
 79 |     u32 hasRTM : 1;
 80 |     u32 hasSEP : 1;
 81 |     u32 hasSHA : 1;
 82 |     u32 hasSSE : 1;
 83 |     u32 hasSSE2 : 1;
 84 |     u32 hasSSE3 : 1;
 85 |     u32 hasSSE41 : 1;
 86 |     u32 hasSSE42 : 1;
 87 |     u32 hasSSE4a : 1;
 88 |     u32 hasSSSE3 : 1;
 89 |     u32 hasSYSCALL : 1;
 90 |     u32 hasTBM : 1;
 91 |     u32 hasXOP : 1;
 92 |     u32 hasXSAVE : 1;
 93 |     u32 padding1 : 6;
 94 |     char vendor[17];
 95 |     char model[49];
 96 |     char padding2[2];
 97 | };
 98 | 
 99 | namespace internal
100 | {
101 | 
102 | struct cpu_data
103 | {
104 |     u32 data[4];
105 | };
106 | 
107 | #if defined KFR_COMPILER_GNU || defined KFR_COMPILER_CLANG
108 | KFR_INLINE u32 get_cpuid(u32 func, u32 subfunc, u32* eax, u32* ebx, u32* ecx, u32* edx)
109 | {
110 |     __asm__("cpuid" : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "0"(func), "2"(subfunc));
111 |     return 1;
112 | }
113 | KFR_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0)
114 | {
115 |     get_cpuid(func, subfunc, &ptr[0], &ptr[1], &ptr[2], &ptr[3]);
116 | }
117 | KFR_INLINE u32 get_xcr0()
118 | {
119 |     u32 xcr0;
120 |     __asm__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx");
121 |     return xcr0;
122 | }
123 | #endif
124 | 
125 | template <size_t = 0>
126 | cpu_t detect_cpu()
127 | {
128 |     cpu_features c;
129 |     memset(&c, 0, sizeof(c));
130 |     cpu_data data0;
131 |     cpu_data exdata0;
132 | 
133 |     u32 f_1_ECX(0);
134 |     u32 f_1_EDX(0);
135 |     u32 f_7_EBX(0);
136 |     u32 f_7_ECX(0);
137 |     u32 f_81_ECX(0);
138 |     u32 f_81_EDX(0);
139 | 
140 |     cpuid(data0.data, 0);
141 |     c.max = static_cast<u32>(data0.data[0]);
142 |     cpuid(exdata0.data, 0x80000000);
143 |     c.exmax = static_cast<u32>(exdata0.data[0]);
144 | 
145 |     *ptr_cast<u32>(c.vendor)     = static_cast<u32>(data0.data[1]);
146 |     *ptr_cast<u32>(c.vendor + 4) = static_cast<u32>(data0.data[3]);
147 |     *ptr_cast<u32>(c.vendor + 8) = static_cast<u32>(data0.data[2]);
148 | 
149 |     c.isIntel = strncmp(c.vendor, "GenuineIntel", sizeof(c.vendor)) == 0 ? 1 : 0;
150 |     c.isAMD   = strncmp(c.vendor, "AuthenticAMD", sizeof(c.vendor)) == 0 ? 1 : 0;
151 | 
152 |     if (c.max >= 1)
153 |     {
154 |         cpu_data data1;
155 |         cpuid(data1.data, 1);
156 |         f_1_ECX = static_cast<u32>(data1.data[2]);
157 |         f_1_EDX = static_cast<u32>(data1.data[3]);
158 |     }
159 | 
160 |     if (c.max >= 7)
161 |     {
162 |         cpu_data data7;
163 |         cpuid(data7.data, 7);
164 |         f_7_EBX = static_cast<u32>(data7.data[1]);
165 |         f_7_ECX = static_cast<u32>(data7.data[2]);
166 |     }
167 | 
168 |     if (c.exmax >= 0x80000001)
169 |     {
170 |         cpu_data data81;
171 |         cpuid(data81.data, 0x80000001);
172 |         f_81_ECX = static_cast<u32>(data81.data[2]);
173 |         f_81_EDX = static_cast<u32>(data81.data[3]);
174 |     }
175 | 
176 |     if (c.exmax >= 0x80000004)
177 |     {
178 |         cpu_data data82;
179 |         cpu_data data83;
180 |         cpu_data data84;
181 |         cpuid(data82.data, 0x80000002);
182 |         cpuid(data83.data, 0x80000003);
183 |         cpuid(data84.data, 0x80000004);
184 |         memcpy(c.model, data82.data, sizeof(cpu_data));
185 |         memcpy(c.model + 16, data83.data, sizeof(cpu_data));
186 |         memcpy(c.model + 32, data84.data, sizeof(cpu_data));
187 |     }
188 | 
189 |     c.hasSSE3        = f_1_ECX >> 0 & 1;
190 |     c.hasPCLMULQDQ   = f_1_ECX >> 1 & 1;
191 |     c.hasMONITOR     = f_1_ECX >> 3 & 1;
192 |     c.hasSSSE3       = f_1_ECX >> 9 & 1;
193 |     c.hasFMA         = f_1_ECX >> 12 & 1;
194 |     c.hasCMPXCHG16B  = f_1_ECX >> 13 & 1;
195 |     c.hasSSE41       = f_1_ECX >> 19 & 1;
196 |     c.hasSSE42       = f_1_ECX >> 20 & 1;
197 |     c.hasMOVBE       = f_1_ECX >> 22 & 1;
198 |     c.hasPOPCNT      = f_1_ECX >> 23 & 1;
199 |     c.hasAES         = f_1_ECX >> 25 & 1;
200 |     c.hasXSAVE       = f_1_ECX >> 26 & 1;
201 |     c.hasOSXSAVE     = f_1_ECX >> 27 & 1;
202 |     c.hasAVX         = f_1_ECX >> 28 & 1;
203 |     c.hasF16C        = f_1_ECX >> 29 & 1;
204 |     c.hasRDRAND      = f_1_ECX >> 30 & 1;
205 |     c.hasMSR         = f_1_EDX >> 5 & 1;
206 |     c.hasCX8         = f_1_EDX >> 8 & 1;
207 |     c.hasSEP         = f_1_EDX >> 11 & 1;
208 |     c.hasCMOV        = f_1_EDX >> 15 & 1;
209 |     c.hasCLFSH       = f_1_EDX >> 19 & 1;
210 |     c.hasMMX         = f_1_EDX >> 23 & 1;
211 |     c.hasFXSR        = f_1_EDX >> 24 & 1;
212 |     c.hasSSE         = f_1_EDX >> 25 & 1;
213 |     c.hasSSE2        = f_1_EDX >> 26 & 1;
214 |     c.hasFSGSBASE    = f_7_EBX >> 0 & 1;
215 |     c.hasBMI1        = f_7_EBX >> 3 & 1;
216 |     c.hasHLE         = c.isIntel && f_7_EBX >> 4 & 1;
217 |     c.hasAVX2        = f_7_EBX >> 5 & 1;
218 |     c.hasBMI2        = f_7_EBX >> 8 & 1;
219 |     c.hasERMS        = f_7_EBX >> 9 & 1;
220 |     c.hasINVPCID     = f_7_EBX >> 10 & 1;
221 |     c.hasRTM         = c.isIntel && f_7_EBX >> 11 & 1;
222 |     c.hasAVX512F     = f_7_EBX >> 16 & 1;
223 |     c.hasAVX512DQ    = f_7_EBX >> 17 & 1;
224 |     c.hasRDSEED      = f_7_EBX >> 18 & 1;
225 |     c.hasADX         = f_7_EBX >> 19 & 1;
226 |     c.hasAVX512PF    = f_7_EBX >> 26 & 1;
227 |     c.hasAVX512ER    = f_7_EBX >> 27 & 1;
228 |     c.hasAVX512CD    = f_7_EBX >> 28 & 1;
229 |     c.hasSHA         = f_7_EBX >> 29 & 1;
230 |     c.hasAVX512BW    = f_7_EBX >> 30 & 1;
231 |     c.hasPREFETCHWT1 = f_7_ECX >> 0 & 1;
232 |     c.hasLAHF        = f_81_ECX >> 0 & 1;
233 |     c.hasLZCNT       = c.isIntel && f_81_ECX >> 5 & 1;
234 |     c.hasABM         = c.isAMD && f_81_ECX >> 5 & 1;
235 |     c.hasSSE4a       = c.isAMD && f_81_ECX >> 6 & 1;
236 |     c.hasXOP         = c.isAMD && f_81_ECX >> 11 & 1;
237 |     c.hasTBM         = c.isAMD && f_81_ECX >> 21 & 1;
238 |     c.hasSYSCALL     = c.isIntel && f_81_EDX >> 11 & 1;
239 |     c.hasMMXEXT      = c.isAMD && f_81_EDX >> 22 & 1;
240 |     c.hasRDTSCP      = c.isIntel && f_81_EDX >> 27 & 1;
241 |     c.has3DNOWEXT    = c.isAMD && f_81_EDX >> 30 & 1;
242 |     c.has3DNOW       = c.isAMD && f_81_EDX >> 31 & 1;
243 | 
244 |     const u32 xcr0 = get_xcr0();
245 | 
246 |     c.hasAVXOSSUPPORT    = c.hasAVX && c.hasOSXSAVE && (xcr0 & 0x06) == 0x06;
247 |     c.hasAVX512OSSUPPORT = c.hasAVX512F && c.hasOSXSAVE && (xcr0 & 0xE0) == 0xE0;
248 | 
249 | #ifdef KFR_AVAIL_AVX512
250 |     if (c.hasAVX512F && c.hasAVX512BW && c.hasAVX512DQ && c.hasAVX512OSSUPPORT)
251 |         return cpu_t::avx3;
252 | #endif
253 | #ifdef KFR_AVAIL_AVX2
254 |     if (c.hasAVX2 && c.hasAVXOSSUPPORT)
255 |         return cpu_t::avx2;
256 | #endif
257 | #ifdef KFR_AVAIL_AVX
258 |     if (c.hasAVX && c.hasAVXOSSUPPORT)
259 |         return cpu_t::avx1;
260 | #endif
261 | #ifdef KFR_AVAIL_SSE41
262 |     if (c.hasSSE41)
263 |         return cpu_t::sse41;
264 | #endif
265 | #ifdef KFR_AVAIL_SSSE3
266 |     if (c.hasSSSE3)
267 |         return cpu_t::ssse3;
268 | #endif
269 | #ifdef KFR_AVAIL_SSE3
270 |     if (c.hasSSE3)
271 |         return cpu_t::sse3;
272 | #endif
273 | #ifdef KFR_AVAIL_SSE2
274 |     if (c.hasSSE2)
275 |         return cpu_t::sse2;
276 | #endif
277 |     return cpu_t::lowest;
278 | }
279 | }
280 | }
281 | 


--------------------------------------------------------------------------------
/include/kfr/dispatch/cpuid_auto.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | #pragma once
24 | 
25 | #include "cpuid.hpp"
26 | 
27 | namespace kfr
28 | {
29 | namespace internal
30 | {
31 | 
32 | KFR_INLINE cpu_t& cpu_v()
33 | {
34 |     static cpu_t v1 = cpu_t::native;
35 |     return v1;
36 | }
37 | 
38 | KFR_INLINE char init_cpu_v()
39 | {
40 |     cpu_v() = detect_cpu<0>();
41 |     return 0;
42 | }
43 | 
44 | KFR_INLINE char init_dummyvar()
45 | {
46 |     static char dummy = init_cpu_v();
47 |     return dummy;
48 | }
49 | 
50 | static char dummyvar = init_dummyvar();
51 | }
52 | KFR_INLINE cpu_t get_cpu() { return internal::cpu_v(); }
53 | }
54 | 


--------------------------------------------------------------------------------
/include/kfr/dispatch/runtimedispatch.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "../base/dispatch.hpp"
 26 | #include "../base/types.hpp"
 27 | #include "cpuid_auto.hpp"
 28 | 
 29 | namespace kfr
 30 | {
 31 | 
 32 | namespace internal
 33 | {
 34 | 
 35 | template <typename Fn, typename... Args>
 36 | KFR_CPU_INTRIN(sse2)
 37 | auto with_cpu_impl(ccpu_t<cpu_t::sse2>, Fn&& fn, Args&&... args)
 38 | {
 39 |     return fn(std::forward<Args>(args)...);
 40 | }
 41 | 
 42 | template <typename Fn, typename... Args>
 43 | KFR_CPU_INTRIN(sse3)
 44 | auto with_cpu_impl(ccpu_t<cpu_t::sse3>, Fn&& fn, Args&&... args)
 45 | {
 46 |     return fn(std::forward<Args>(args)...);
 47 | }
 48 | 
 49 | template <typename Fn, typename... Args>
 50 | KFR_CPU_INTRIN(ssse3)
 51 | auto with_cpu_impl(ccpu_t<cpu_t::ssse3>, Fn&& fn, Args&&... args)
 52 | {
 53 |     return fn(std::forward<Args>(args)...);
 54 | }
 55 | 
 56 | template <typename Fn, typename... Args>
 57 | KFR_CPU_INTRIN(sse41)
 58 | auto with_cpu_impl(ccpu_t<cpu_t::sse41>, Fn&& fn, Args&&... args)
 59 | {
 60 |     return fn(std::forward<Args>(args)...);
 61 | }
 62 | 
 63 | template <typename Fn, typename... Args>
 64 | KFR_CPU_INTRIN(sse42)
 65 | auto with_cpu_impl(ccpu_t<cpu_t::sse42>, Fn&& fn, Args&&... args)
 66 | {
 67 |     return fn(std::forward<Args>(args)...);
 68 | }
 69 | 
 70 | template <typename Fn, typename... Args>
 71 | KFR_CPU_INTRIN(avx)
 72 | auto with_cpu_impl(ccpu_t<cpu_t::avx>, Fn&& fn, Args&&... args)
 73 | {
 74 |     return fn(std::forward<Args>(args)...);
 75 | }
 76 | 
 77 | template <typename Fn, typename... Args>
 78 | KFR_CPU_INTRIN(avx2)
 79 | auto with_cpu_impl(ccpu_t<cpu_t::avx2>, Fn&& fn, Args&&... args)
 80 | {
 81 |     return fn(std::forward<Args>(args)...);
 82 | }
 83 | }
 84 | 
 85 | template <cpu_t cpu, typename Fn, typename... Args>
 86 | KFR_INTRIN auto with_cpu(ccpu_t<cpu>, Fn&& fn, Args&&... args)
 87 | {
 88 |     return internal::with_cpu_impl(ccpu<cpu>, std::forward<Fn>(fn), std::forward<Args>(args)...);
 89 | }
 90 | 
 91 | template <cpu_t cpu, typename Fn>
 92 | struct fn_with_cpu
 93 | {
 94 |     template <typename... Args>
 95 |     KFR_INTRIN auto operator()(Args&&... args) -> decltype(std::declval<Fn>()(std::forward<Args>(args)...))
 96 |     {
 97 |         return internal::with_cpu_impl(ccpu<cpu>, std::forward<Fn>(fn), std::forward<Args>(args)...);
 98 |     }
 99 |     Fn fn;
100 | };
101 | 
102 | template <cpu_t cpu, typename Fn>
103 | KFR_INTRIN fn_with_cpu<cpu, Fn> make_with_cpu(ccpu_t<cpu>, Fn&& fn)
104 | {
105 |     return { std::forward<Fn>(fn) };
106 | }
107 | 
108 | namespace internal
109 | {
110 | 
111 | template <typename Fn, cpu_t, cpu_t...>
112 | struct runtime_dispatcher;
113 | 
114 | template <typename Fn, cpu_t oldest>
115 | struct runtime_dispatcher<Fn, oldest>
116 | {
117 |     using targetFn = retarget<Fn, oldest>;
118 | 
119 |     template <typename... Args>
120 |     KFR_INLINE static result_of<targetFn(Args&&...)> call(Fn&& fn, cpu_t, Args&&... args)
121 |     {
122 |         return cpu_caller<oldest>::retarget_call(std::forward<Fn>(fn), std::forward<Args>(args)...);
123 |     }
124 | };
125 | 
126 | template <typename Fn, cpu_t newest, cpu_t next, cpu_t... cpus>
127 | struct runtime_dispatcher<Fn, newest, next, cpus...>
128 | {
129 |     using nextdispatcher = runtime_dispatcher<Fn, next, cpus...>;
130 | 
131 |     using targetFn = retarget<Fn, newest>;
132 | 
133 |     template <typename... Args,
134 |               KFR_ENABLE_IF(is_callable<targetFn, Args&&...>::value&& is_enabled<targetFn>::value)>
135 |     KFR_SINTRIN auto call(Fn&& fn, cpu_t set, Args&&... args)
136 |         -> decltype(nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...))
137 |     {
138 |         return set >= newest
139 |                    ? cpu_caller<newest>::retarget_call(std::forward<Fn>(fn), std::forward<Args>(args)...)
140 |                    : nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...);
141 |     }
142 |     template <typename... Args,
143 |               KFR_ENABLE_IF(!(is_callable<targetFn, Args&&...>::value && is_enabled<targetFn>::value))>
144 |     KFR_SINTRIN auto call(Fn&& fn, cpu_t set, Args&&... args)
145 |         -> decltype(nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...))
146 |     {
147 |         return nextdispatcher::call(std::forward<Fn>(fn), set, std::forward<Args>(args)...);
148 |     }
149 | };
150 | 
151 | template <typename Fn, cpu_t newest, cpu_t... cpus, typename... Args>
152 | KFR_INLINE auto runtimedispatch(cvals_t<cpu_t, newest, cpus...>, Fn&& fn, Args&&... args)
153 |     -> decltype(internal::runtime_dispatcher<Fn, newest, cpus...>::call(std::forward<Fn>(fn), get_cpu(),
154 |                                                                         std::forward<Args>(args)...))
155 | {
156 |     return internal::runtime_dispatcher<Fn, newest, cpus...>::call(std::forward<Fn>(fn), get_cpu(),
157 |                                                                    std::forward<Args>(args)...);
158 | }
159 | 
160 | template <cpu_t c, typename Fn, typename... Args, KFR_ENABLE_IF(c == cpu_t::runtime)>
161 | KFR_INLINE auto dispatch(Fn&& fn, Args&&... args) -> decltype(fn(std::forward<Args>(args)...))
162 | {
163 |     return runtimedispatch(std::forward<Fn>(fn), std::forward<Args>(args)...);
164 | }
165 | }
166 | 
167 | template <typename Fn, typename cpulist = decltype(cpu_all), typename... Args>
168 | KFR_INLINE auto runtimedispatch(Fn&& fn, Args&&... args)
169 |     -> decltype(internal::runtimedispatch<Fn>(cpulist(), std::forward<Fn>(fn), std::forward<Args>(args)...))
170 | {
171 |     return internal::runtimedispatch(cpulist(), std::forward<Fn>(fn), std::forward<Args>(args)...);
172 | }
173 | }
174 | 


--------------------------------------------------------------------------------
/include/kfr/expressions/conversion.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | 
24 | #pragma once
25 | 
26 | #include "../base/function.hpp"
27 | #include "../base/operators.hpp"
28 | #include "../base/vec.hpp"
29 | #include "../expressions/basic.hpp"
30 | 
31 | namespace kfr
32 | {
33 | namespace internal
34 | {
35 | template <typename From, typename E>
36 | struct expression_convert : expression<E>
37 | {
38 |     template <cpu_t newcpu>
39 |     using retarget_this = expression_convert<From, retarget<E, newcpu>>;
40 | 
41 |     KFR_INLINE expression_convert(E&& expr) noexcept : expression<E>(std::forward<E>(expr)) {}
42 | 
43 |     template <typename T, size_t N>
44 |     KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N>)
45 |     {
46 |         return this->argument_first(index, vec_t<From, N>());
47 |     }
48 | };
49 | }
50 | 
51 | template <typename From, typename E>
52 | KFR_INLINE internal::expression_convert<From, decay<E>> convert(E&& expr)
53 | {
54 |     return internal::expression_convert<From, decay<E>>(std::forward<E>(expr));
55 | }
56 | KFR_FN(convert)
57 | }
58 | 


--------------------------------------------------------------------------------
/include/kfr/expressions/generators.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "../base/function.hpp"
 26 | #include "../base/log_exp.hpp"
 27 | #include "../base/select.hpp"
 28 | #include "../base/sin_cos.hpp"
 29 | #include "../base/vec.hpp"
 30 | 
 31 | #pragma clang diagnostic push
 32 | #if CID_HAS_WARNING("-Winaccessible-base")
 33 | #pragma clang diagnostic ignored "-Winaccessible-base"
 34 | #endif
 35 | 
 36 | namespace kfr
 37 | {
 38 | 
 39 | namespace internal
 40 | {
 41 | 
 42 | template <cpu_t cpu = cpu_t::native>
 43 | struct in_generators : in_log_exp<cpu>, in_select<cpu>, in_sin_cos<cpu>
 44 | {
 45 | private:
 46 |     using in_log_exp<cpu>::exp;
 47 |     using in_log_exp<cpu>::exp2;
 48 |     using in_select<cpu>::select;
 49 |     using in_sin_cos<cpu>::cossin;
 50 | 
 51 | public:
 52 |     template <typename T, size_t width_, typename Class>
 53 |     struct generator
 54 |     {
 55 |         constexpr static size_t width = width_;
 56 |         using type                    = T;
 57 | 
 58 |         template <typename U, size_t N>
 59 |         KFR_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N> t) const
 60 |         {
 61 |             return cast<U>(generate(t));
 62 |         }
 63 | 
 64 |         void resync(T start) const { ptr_cast<Class>(this)->sync(start); }
 65 | 
 66 |     protected:
 67 |         void call_next() const { ptr_cast<Class>(this)->next(); }
 68 |         template <size_t N>
 69 |         void call_shift(csize_t<N>) const
 70 |         {
 71 |             ptr_cast<Class>(this)->shift(csize<N>);
 72 |         }
 73 | 
 74 |         template <size_t N>
 75 |         void shift(csize_t<N>) const
 76 |         {
 77 |             const vec<T, width> oldvalue = value;
 78 |             call_next();
 79 |             value = slice<N, width>(oldvalue, value);
 80 |         }
 81 | 
 82 |         template <size_t N, KFR_ENABLE_IF(N == width)>
 83 |         KFR_INLINE vec<T, N> generate(vec_t<T, N>) const
 84 |         {
 85 |             const vec<T, N> result = value;
 86 |             call_next();
 87 |             return result;
 88 |         }
 89 | 
 90 |         template <size_t N, KFR_ENABLE_IF(N < width)>
 91 |         KFR_INLINE vec<T, N> generate(vec_t<T, N>) const
 92 |         {
 93 |             const vec<T, N> result = narrow<N>(value);
 94 |             shift(csize<N>);
 95 |             return result;
 96 |         }
 97 | 
 98 |         template <size_t N, KFR_ENABLE_IF(N > width)>
 99 |         KFR_INLINE vec<T, N> generate(vec_t<T, N> x) const
100 |         {
101 |             const auto lo = generate(low(x));
102 |             const auto hi = generate(high(x));
103 |             return concat(lo, hi);
104 |         }
105 | 
106 |         mutable vec<T, width> value;
107 |     };
108 | 
109 |     template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)>
110 |     struct generator_linear : generator<T, width, generator_linear<T, width>>
111 |     {
112 |         template <cpu_t newcpu>
113 |         using retarget_this = typename in_generators<newcpu>::template generator_linear<T>;
114 | 
115 |         constexpr generator_linear(T start, T step) noexcept : step(step), vstep(step* width)
116 |         {
117 |             this->resync(start);
118 |         }
119 | 
120 |         KFR_INLINE void sync(T start) const noexcept { this->value = start + enumerate<T, width>() * step; }
121 | 
122 |         KFR_INLINE void next() const noexcept { this->value += vstep; }
123 | 
124 |     protected:
125 |         T step;
126 |         T vstep;
127 |     };
128 | 
129 |     template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)>
130 |     struct generator_exp : generator<T, width, generator_exp<T, width>>
131 |     {
132 |         template <cpu_t newcpu>
133 |         using retarget_this = typename in_generators<newcpu>::template generator_exp<T>;
134 | 
135 |         generator_exp(T start, T step) noexcept : step(step), vstep(exp(make_vector(step* width))[0] - 1)
136 |         {
137 |             this->resync(start);
138 |         }
139 | 
140 |         KFR_INLINE void sync(T start) const noexcept
141 |         {
142 |             this->value = exp(start + enumerate<T, width>() * step);
143 |         }
144 | 
145 |         KFR_INLINE void next() const noexcept { this->value += this->value * vstep; }
146 | 
147 |     protected:
148 |         T step;
149 |         T vstep;
150 |     };
151 | 
152 |     template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)>
153 |     struct generator_exp2 : generator<T, width, generator_exp2<T, width>>
154 |     {
155 |         template <cpu_t newcpu>
156 |         using retarget_this = typename in_generators<newcpu>::template generator_exp2<T>;
157 | 
158 |         generator_exp2(T start, T step) noexcept : step(step), vstep(exp2(make_vector(step* width))[0] - 1)
159 |         {
160 |             this->resync(start);
161 |         }
162 | 
163 |         KFR_INLINE void sync(T start) const noexcept
164 |         {
165 |             this->value = exp2(start + enumerate<T, width>() * step);
166 |         }
167 | 
168 |         KFR_INLINE void next() const noexcept { this->value += this->value * vstep; }
169 | 
170 |     protected:
171 |         T step;
172 |         T vstep;
173 |     };
174 | 
175 |     template <typename T, size_t width = get_vector_width<T, cpu>(1, 2)>
176 |     struct generator_cossin : generator<T, width, generator_cossin<T, width>>
177 |     {
178 |         template <cpu_t newcpu>
179 |         using retarget_this = typename in_generators<newcpu>::template generator_cossin<T>;
180 | 
181 |         generator_cossin(T start, T step)
182 |             : step(step), alpha(2 * sqr(sin(width / 2 * step / 2))), beta(-sin(width / 2 * step))
183 |         {
184 |             this->resync(start);
185 |         }
186 |         KFR_INLINE void sync(T start) const noexcept { this->value = init_cossin(step, start); }
187 | 
188 |         KFR_INLINE void next() const noexcept
189 |         {
190 |             this->value = this->value - subadd(alpha * this->value, beta * swap<2>(this->value));
191 |         }
192 | 
193 |     protected:
194 |         T step;
195 |         T alpha;
196 |         T beta;
197 |         KFR_NOINLINE static vec<T, width> init_cossin(T w, T phase)
198 |         {
199 |             return cossin(dup(phase + enumerate<T, width / 2>() * w));
200 |         }
201 |     };
202 | 
203 |     template <typename T, size_t width = get_vector_width<T, cpu>(2, 4)>
204 |     struct generator_sin : generator<T, width, generator_sin<T, width>>
205 |     {
206 |         template <cpu_t newcpu>
207 |         using retarget_this = typename in_generators<newcpu>::template generator_sin<T>;
208 | 
209 |         generator_sin(T start, T step)
210 |             : step(step), alpha(2 * sqr(sin(width * step / 2))), beta(sin(width * step))
211 |         {
212 |             this->resync(start);
213 |         }
214 |         KFR_INLINE void sync(T start) const noexcept
215 |         {
216 |             const vec<T, width* 2> cs = splitpairs(cossin(dup(start + enumerate<T, width>() * step)));
217 |             this->cos_value = low(cs);
218 |             this->value     = high(cs);
219 |         }
220 | 
221 |         KFR_INLINE void next() const noexcept
222 |         {
223 |             const vec<T, width> c = this->cos_value;
224 |             const vec<T, width> s = this->value;
225 | 
226 |             const vec<T, width> cc = alpha * c + beta * s;
227 |             const vec<T, width> ss = alpha * s - beta * c;
228 | 
229 |             this->cos_value = c - cc;
230 |             this->value     = s - ss;
231 |         }
232 | 
233 |         template <size_t N>
234 |         void shift(csize_t<N>) const noexcept
235 |         {
236 |             const vec<T, width> oldvalue    = this->value;
237 |             const vec<T, width> oldcosvalue = this->cos_value;
238 |             next();
239 |             this->value     = slice<N, width>(oldvalue, this->value);
240 |             this->cos_value = slice<N, width>(oldcosvalue, this->cos_value);
241 |         }
242 | 
243 |     protected:
244 |         T step;
245 |         T alpha;
246 |         T beta;
247 |         mutable vec<T, width> cos_value;
248 |     };
249 | };
250 | }
251 | 
252 | template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
253 | KFR_SINTRIN internal::in_generators<>::generator_linear<TF> gen_linear(T1 start, T2 step)
254 | {
255 |     return internal::in_generators<>::generator_linear<TF>(start, step);
256 | }
257 | template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
258 | KFR_SINTRIN internal::in_generators<>::generator_exp<TF> gen_exp(T1 start, T2 step)
259 | {
260 |     return internal::in_generators<>::generator_exp<TF>(start, step);
261 | }
262 | template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
263 | KFR_SINTRIN internal::in_generators<>::generator_exp2<TF> gen_exp2(T1 start, T2 step)
264 | {
265 |     return internal::in_generators<>::generator_exp2<TF>(start, step);
266 | }
267 | template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
268 | KFR_SINTRIN internal::in_generators<>::generator_sin<TF> gen_cossin(T1 start, T2 step)
269 | {
270 |     return internal::in_generators<>::generator_cossin<TF>(start, step);
271 | }
272 | template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
273 | KFR_SINTRIN internal::in_generators<>::generator_sin<TF> gen_sin(T1 start, T2 step)
274 | {
275 |     return internal::in_generators<>::generator_sin<TF>(start, step);
276 | }
277 | }
278 | 
279 | #pragma clang diagnostic pop
280 | 


--------------------------------------------------------------------------------
/include/kfr/expressions/operators.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | #pragma once
24 | 
25 | #include "../base/function.hpp"
26 | #include "../base/operators.hpp"
27 | #include "../base/vec.hpp"
28 | 
29 | namespace kfr
30 | {
31 | 
32 | #define KFR_EXPR_UNARY(fn, op)                                                                               \
33 |     template <typename A1, KFR_ENABLE_IF(is_input_expression<A1>::value)>                                    \
34 |     KFR_INLINE auto operator op(A1&& a1)->decltype(bind_expression(fn(), std::forward<A1>(a1)))              \
35 |     {                                                                                                        \
36 |         return bind_expression(fn(), std::forward<A1>(a1));                                                  \
37 |     }
38 | 
39 | #define KFR_EXPR_BINARY(fn, op)                                                                              \
40 |     template <typename A1, typename A2, KFR_ENABLE_IF(is_input_expressions<A1, A2>::value)>                  \
41 |     KFR_INLINE auto operator op(A1&& a1, A2&& a2)                                                            \
42 |         ->decltype(bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2)))                        \
43 |     {                                                                                                        \
44 |         return bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2));                            \
45 |     }
46 | 
47 | KFR_EXPR_UNARY(fn_neg, -)
48 | KFR_EXPR_UNARY(fn_bitwisenot, ~)
49 | 
50 | KFR_EXPR_BINARY(fn_add, +)
51 | KFR_EXPR_BINARY(fn_sub, -)
52 | KFR_EXPR_BINARY(fn_mul, *)
53 | KFR_EXPR_BINARY(fn_div, /)
54 | KFR_EXPR_BINARY(fn_bitwiseand, &)
55 | KFR_EXPR_BINARY(fn_bitwiseor, |)
56 | KFR_EXPR_BINARY(fn_bitwisexor, ^)
57 | KFR_EXPR_BINARY(fn_shl, <<)
58 | KFR_EXPR_BINARY(fn_shr, >>)
59 | 
60 | KFR_EXPR_BINARY(fn_equal, ==)
61 | KFR_EXPR_BINARY(fn_notequal, !=)
62 | KFR_EXPR_BINARY(fn_less, <)
63 | KFR_EXPR_BINARY(fn_greater, >)
64 | KFR_EXPR_BINARY(fn_lessorequal, <=)
65 | KFR_EXPR_BINARY(fn_greaterorequal, >=)
66 | }
67 | 


--------------------------------------------------------------------------------
/include/kfr/expressions/pointer.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "../base/vec.hpp"
 26 | #include "basic.hpp"
 27 | #include <memory>
 28 | 
 29 | namespace kfr
 30 | {
 31 | 
 32 | constexpr size_t maximum_expression_width() { return bitness_const(16, 32); }
 33 | 
 34 | template <typename T, size_t maxwidth = maximum_expression_width()>
 35 | using expression_vtable = carray<void*, 2 + ilog2(maxwidth) + 1>;
 36 | 
 37 | struct dummy_content
 38 | {
 39 | };
 40 | 
 41 | struct expression_resource
 42 | {
 43 |     virtual ~expression_resource() {}
 44 |     virtual void* instance() { return nullptr; }
 45 | };
 46 | template <typename E>
 47 | struct alignas(E) expression_resource_impl : expression_resource
 48 | {
 49 |     expression_resource_impl(E&& e) noexcept : e(std::move(e)) {}
 50 |     virtual ~expression_resource_impl() {}
 51 |     virtual void* instance() override final { return &e; }
 52 | private:
 53 |     E e;
 54 | };
 55 | 
 56 | template <typename E>
 57 | std::shared_ptr<expression_resource> make_resource(E&& e)
 58 | {
 59 |     using T = expression_resource_impl<decay<E>>;
 60 |     return std::static_pointer_cast<expression_resource>(
 61 |         std::allocate_shared<T>(allocator<T>(), std::move(e)));
 62 | }
 63 | 
 64 | template <typename T, size_t maxwidth = maximum_expression_width()>
 65 | struct expression_pointer : input_expression
 66 | {
 67 |     using value_type = T;
 68 | 
 69 |     static_assert(is_poweroftwo(maxwidth), "N must be a power of two");
 70 |     expression_pointer() noexcept : instance(nullptr), vtable(nullptr) {}
 71 |     expression_pointer(void* instance, const expression_vtable<T, maxwidth>* vtable,
 72 |                        std::shared_ptr<expression_resource> resource = nullptr)
 73 |         : instance(instance), vtable(vtable), resource(std::move(resource))
 74 |     {
 75 |     }
 76 |     template <typename U, size_t N>
 77 |     KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
 78 |     {
 79 |         using func_t = simd<T, N> (*)(void*, size_t);
 80 | 
 81 |         static_assert(is_poweroftwo(N), "N must be a power of two");
 82 |         constexpr size_t findex = ilog2(N);
 83 |         static_assert(N <= maxwidth, "N is greater than maxwidth");
 84 |         func_t func = reinterpret_cast<func_t>(vtable->get(csize<2 + findex>));
 85 |         vec<U, N> result = cast<U>(func(instance, index));
 86 |         return result;
 87 |     }
 88 |     KFR_INLINE void begin_block(size_t size) const
 89 |     {
 90 |         using func_t = void (*)(void*, size_t);
 91 |         func_t func  = reinterpret_cast<func_t>(vtable->get(csize<0>));
 92 |         func(instance, size);
 93 |     }
 94 |     KFR_INLINE void end_block(size_t size) const
 95 |     {
 96 |         using func_t = void (*)(void*, size_t);
 97 |         func_t func  = reinterpret_cast<func_t>(vtable->get(csize<1>));
 98 |         func(instance, size);
 99 |     }
100 | 
101 | private:
102 |     void* instance;
103 |     const expression_vtable<T, maxwidth>* vtable;
104 |     std::shared_ptr<expression_resource> resource;
105 | };
106 | 
107 | namespace internal
108 | {
109 | template <typename T, size_t N, typename Fn, typename Ret = simd<T, N>,
110 |           typename NonMemFn = Ret (*)(Fn*, size_t, vec_t<T, N>)>
111 | KFR_INLINE NonMemFn make_expression_func()
112 | {
113 |     return [](Fn* fn, size_t index, vec_t<T, N> x) { return *(fn->operator()(cinput, index, x)); };
114 | }
115 | 
116 | template <typename Fn, typename NonMemFn = void (*)(Fn*, size_t)>
117 | KFR_INLINE NonMemFn make_expression_begin_block()
118 | {
119 |     return [](Fn* fn, size_t size) { return fn->begin_block(size); };
120 | }
121 | template <typename Fn, typename NonMemFn = void (*)(Fn*, size_t)>
122 | KFR_INLINE NonMemFn make_expression_end_block()
123 | {
124 |     return [](Fn* fn, size_t size) { return fn->end_block(size); };
125 | }
126 | 
127 | template <typename T, size_t maxwidth, typename E>
128 | expression_vtable<T, maxwidth> make_expression_vtable_impl()
129 | {
130 |     expression_vtable<T, maxwidth> result;
131 |     constexpr size_t size = result.size() - 2;
132 | 
133 |     result.get(csize<0>) = reinterpret_cast<void*>(&internal::make_expression_begin_block<decay<E>>);
134 |     result.get(csize<1>) = reinterpret_cast<void*>(&internal::make_expression_end_block<decay<E>>);
135 | 
136 |     cforeach(csizeseq<size>, [&](auto u) {
137 |         constexpr size_t N = 1 << val_of(u);
138 |         result.get(csize<2 + val_of(u)>) =
139 |             reinterpret_cast<void*>(internal::make_expression_func<T, N, decay<E>>());
140 |     });
141 |     return result;
142 | }
143 | 
144 | template <typename T, size_t maxwidth, typename E>
145 | KFR_INLINE expression_vtable<T, maxwidth>* make_expression_vtable()
146 | {
147 |     static_assert(is_input_expression<E>::value, "E must be an expression");
148 |     static expression_vtable<T, maxwidth> vtable = internal::make_expression_vtable_impl<T, maxwidth, E>();
149 |     return &vtable;
150 | }
151 | }
152 | 
153 | template <typename E, typename T = value_type_of<E>, size_t maxwidth = maximum_expression_width()>
154 | KFR_INLINE expression_pointer<T, maxwidth> to_pointer(E& expr)
155 | {
156 |     static_assert(is_input_expression<E>::value, "E must be an expression");
157 |     return expression_pointer<T, maxwidth>(std::addressof(expr),
158 |                                            internal::make_expression_vtable<T, maxwidth, E>());
159 | }
160 | 
161 | template <typename E, typename T = value_type_of<E>, size_t maxwidth = maximum_expression_width()>
162 | KFR_INLINE expression_pointer<T, maxwidth> to_pointer(E&& expr)
163 | {
164 |     static_assert(is_input_expression<E>::value, "E must be an expression");
165 |     std::shared_ptr<expression_resource> ptr = make_resource(std::move(expr));
166 |     return expression_pointer<T, maxwidth>(
167 |         ptr->instance(), internal::make_expression_vtable<T, maxwidth, E>(), std::move(ptr));
168 | }
169 | }
170 | 


--------------------------------------------------------------------------------
/include/kfr/io/file.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "../base/function.hpp"
 26 | #include "../base/univector.hpp"
 27 | #include "../base/vec.hpp"
 28 | #include <cstdio>
 29 | #include <string>
 30 | 
 31 | namespace kfr
 32 | {
 33 | 
 34 | namespace internal
 35 | {
 36 | struct expression_file_base
 37 | {
 38 |     expression_file_base()                            = delete;
 39 |     expression_file_base(const expression_file_base&) = delete;
 40 |     expression_file_base(expression_file_base&&)      = default;
 41 |     expression_file_base(FILE* file) : file(file) {}
 42 |     ~expression_file_base() { fclose(file); }
 43 |     bool ok() const { return file != nullptr; }
 44 |     FILE* file;
 45 | };
 46 | 
 47 | struct expression_sequential_file_writer : expression_file_base, output_expression
 48 | {
 49 |     using expression_file_base::expression_file_base;
 50 |     template <typename U, size_t N>
 51 |     void operator()(coutput_t, size_t, vec<U, N> value)
 52 |     {
 53 |         write(value);
 54 |     }
 55 |     template <typename U>
 56 |     void write(const U& value)
 57 |     {
 58 |         fwrite(std::addressof(value), 1, sizeof(U), file);
 59 |     }
 60 | };
 61 | 
 62 | struct expression_sequential_file_reader : expression_file_base, input_expression
 63 | {
 64 |     using expression_file_base::expression_file_base;
 65 |     template <typename U, size_t N>
 66 |     vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const
 67 |     {
 68 |         vec<U, N> input = qnan;
 69 |         read(input);
 70 |         return input;
 71 |     }
 72 |     template <typename U>
 73 |     void read(U& value) const
 74 |     {
 75 |         fread(std::addressof(value), 1, sizeof(U), file);
 76 |     }
 77 | };
 78 | 
 79 | template <typename T>
 80 | struct expression_file_writer : expression_file_base, output_expression
 81 | {
 82 |     using expression_file_base::expression_file_base;
 83 |     template <typename U, size_t N>
 84 |     void operator()(coutput_t, size_t index, vec<U, N> value)
 85 |     {
 86 |         if (position != index)
 87 |             fseeko(file, static_cast<off_t>(index * sizeof(T)), SEEK_SET);
 88 |         const vec<T, N> output = cast<T>(value);
 89 |         fwrite(output.data(), sizeof(T), output.size(), file);
 90 |         position = index + N;
 91 |     }
 92 |     size_t position = 0;
 93 | };
 94 | 
 95 | template <typename T>
 96 | struct expression_file_reader : expression_file_base, input_expression
 97 | {
 98 |     using expression_file_base::expression_file_base;
 99 |     template <typename U, size_t N>
100 |     vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
101 |     {
102 |         if (position != index)
103 |             fseeko(file, static_cast<off_t>(index * sizeof(T)), SEEK_SET);
104 |         vec<T, N> input = qnan;
105 |         fread(input.data(), sizeof(T), input.size(), file);
106 |         position = index + N;
107 |         return cast<U>(input);
108 |     }
109 |     size_t position = 0;
110 | };
111 | }
112 | 
113 | inline internal::expression_sequential_file_reader sequential_file_reader(const std::string& file_name)
114 | {
115 |     return internal::expression_sequential_file_reader(fopen(file_name.c_str(), "rb"));
116 | }
117 | inline internal::expression_sequential_file_writer sequential_file_writer(const std::string& file_name)
118 | {
119 |     return internal::expression_sequential_file_writer(fopen(file_name.c_str(), "wb"));
120 | }
121 | 
122 | template <typename T = u8>
123 | internal::expression_file_reader<T> file_reader(const std::string& file_name)
124 | {
125 |     return internal::expression_file_reader<T>(fopen(file_name.c_str(), "rb"));
126 | }
127 | template <typename T = u8>
128 | internal::expression_file_writer<T> file_writer(const std::string& file_name)
129 | {
130 |     return internal::expression_file_writer<T>(fopen(file_name.c_str(), "wb"));
131 | }
132 | }
133 | 


--------------------------------------------------------------------------------
/include/kfr/io/python_plot.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | #include "../base/vec.hpp"
 25 | #include "../cometa/string.hpp"
 26 | #include <cstdlib>
 27 | 
 28 | #ifdef KFR_OS_WIN
 29 | #include <direct.h>
 30 | #define cross_getcwd _getcwd
 31 | #else
 32 | #include <unistd.h>
 33 | #define cross_getcwd getcwd
 34 | #endif
 35 | 
 36 | namespace kfr
 37 | {
 38 | namespace internal
 39 | {
 40 | 
 41 | void python(const std::string& name, const std::string& code)
 42 | {
 43 |     std::string filename;
 44 |     {
 45 |         char curdir[1024];
 46 |         cross_getcwd(curdir, arraysize(curdir));
 47 |         filename = curdir;
 48 |     }
 49 | #ifdef KFR_OS_WIN
 50 |     const char* slash = "\\";
 51 | #else
 52 |     const char* slash = "/";
 53 | #endif
 54 |     filename = filename + slash + name + ".py";
 55 | 
 56 |     FILE* f = fopen(filename.c_str(), "w");
 57 |     fwrite(code.c_str(), 1, code.size(), f);
 58 |     fclose(f);
 59 |     std::system(("python \"" + filename + "\"").c_str());
 60 | }
 61 | }
 62 | 
 63 | static std::string concat_args() { return {}; }
 64 | 
 65 | template <typename... Ts>
 66 | static std::string concat_args(const std::string& left, const Ts&... rest)
 67 | {
 68 |     const std::string right = concat_args(rest...);
 69 |     return left.empty() ? right : right.empty() ? left : left + ", " + right;
 70 | }
 71 | 
 72 | static void plot_show(const std::string& name, const std::string& wavfile, const std::string& options = "")
 73 | {
 74 |     print(name, "...");
 75 |     std::string ss;
 76 |     ss += "#!/usr/bin/env python\n"
 77 |           "import dspplot\n\n"
 78 |           "dspplot.plot(" +
 79 |           concat_args("r'" + wavfile + "'", options) + ")\n";
 80 | 
 81 |     internal::python(name, ss);
 82 |     print("done\n");
 83 | }
 84 | 
 85 | static void plot_show(const std::string& name, const char* x, const std::string& options = "")
 86 | {
 87 |     plot_show(name, std::string(x), options);
 88 | }
 89 | 
 90 | template <typename T>
 91 | void plot_show(const std::string& name, T&& x, const std::string& options = "")
 92 | {
 93 |     print(name, "...");
 94 |     auto array = make_array_ref(std::forward<T>(x));
 95 |     std::string ss;
 96 |     ss += "#!/usr/bin/env python\n"
 97 |           "import dspplot\n\n"
 98 |           "data = [\n";
 99 |     for (size_t i = 0; i < array.size(); i++)
100 |         ss += as_string(fmt<'g', 20, 17>(array[i]), ",\n");
101 |     ss += "]\n";
102 | 
103 |     ss += "dspplot.plot(" + concat_args("data", options) + ")\n";
104 | 
105 |     internal::python(name, ss);
106 |     print("done\n");
107 | }
108 | 
109 | template <typename T>
110 | void plot_save(const std::string& name, T&& x, const std::string& options = "")
111 | {
112 |     plot_show(name, std::forward<T>(x), concat_args(options, "file='../svg/" + name + ".svg'"));
113 | }
114 | 
115 | template <typename T1, typename T2>
116 | void perfplot_show(const std::string& name, T1&& data, T2&& labels, const std::string& options = "")
117 | {
118 |     print(name, "...");
119 |     auto array        = make_array_ref(std::forward<T1>(data));
120 |     auto labels_array = make_array_ref(std::forward<T2>(labels));
121 |     std::string ss;
122 |     ss += "#!/usr/bin/env python\n";
123 |     ss += "import dspplot\n\n";
124 |     ss += "data = [\n";
125 |     for (size_t i = 0; i < array.size(); i++)
126 |     {
127 |         auto subarray = make_array_ref(array[i]);
128 |         ss += "[\n";
129 |         for (size_t i = 0; i < subarray.size(); i++)
130 |             ss += as_string("    ", fmt<'g', 20, 17>(subarray[i]), ",\n");
131 |         ss += "],";
132 |     }
133 |     ss += "]\n";
134 | 
135 |     ss += "labels = [\n";
136 |     for (size_t i = 0; i < labels_array.size(); i++)
137 |     {
138 |         const std::string label = labels_array[i];
139 |         ss += "    '" + label + "',";
140 |     }
141 |     ss += "]\n";
142 | 
143 |     ss += "dspplot.perfplot(" + concat_args("data, labels", options) + ")\n";
144 | 
145 |     internal::python(name, ss);
146 |     print("done\n");
147 | }
148 | 
149 | template <typename T1, typename T2>
150 | void perfplot_save(const std::string& name, T1&& data, T2&& labels, const std::string& options = "")
151 | {
152 |     perfplot_show(name, std::forward<T1>(data), std::forward<T2>(labels),
153 |                   concat_args(options, "file='../perf/" + name + ".svg'"));
154 | }
155 | }
156 | 


--------------------------------------------------------------------------------
/include/kfr/io/tostring.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "../base/complex.hpp"
 26 | #include "../base/univector.hpp"
 27 | #include "../base/vec.hpp"
 28 | 
 29 | namespace cometa
 30 | {
 31 | 
 32 | template <typename T>
 33 | inline std::string repr(const kfr::complex<T>& v);
 34 | 
 35 | template <typename T, int N>
 36 | inline std::string repr(kfr::simd<T, N> v);
 37 | 
 38 | template <typename T, size_t N>
 39 | inline std::string repr(kfr::vec<T, N> v);
 40 | 
 41 | template <typename T, size_t Tag>
 42 | inline std::string repr(const kfr::univector<T, Tag>& v);
 43 | }
 44 | #include "../cometa/string.hpp"
 45 | #include <cmath>
 46 | 
 47 | namespace cometa
 48 | {
 49 | 
 50 | namespace details
 51 | {
 52 | 
 53 | constexpr size_t number_width           = 9;
 54 | constexpr size_t number_precision       = 6;
 55 | constexpr size_t number_precision_short = 2;
 56 | constexpr size_t number_columns         = 8;
 57 | 
 58 | template <typename T>
 59 | std::string fmtvalue(std::true_type, const T& x)
 60 | {
 61 |     std::string str = as_string(fmt<'g', number_width, number_precision>(x));
 62 |     if (str.size() > number_width)
 63 |         str = as_string(fmt<'g', number_width, number_precision_short>(x));
 64 |     return str;
 65 | }
 66 | 
 67 | template <typename T>
 68 | std::string fmtvalue(std::true_type, const kfr::complex<T>& x)
 69 | {
 70 |     std::string restr = as_string(fmt<'g', number_width, number_precision>(x.real()));
 71 |     if (restr.size() > number_width)
 72 |         restr = as_string(fmt<'g', number_width, number_precision_short>(x.real()));
 73 | 
 74 |     std::string imstr = as_string(fmt<'g', -1, number_precision>(std::abs(x.imag())));
 75 |     if (imstr.size() > number_width)
 76 |         imstr = as_string(fmt<'g', -1, number_precision_short>(std::abs(x.imag())));
 77 | 
 78 |     return restr + (x.imag() < T(0) ? "-" : "+") + padleft(number_width, imstr + "j");
 79 | }
 80 | 
 81 | template <typename T>
 82 | std::string fmtvalue(std::false_type, const T& x)
 83 | {
 84 |     return as_string(fmtwidth<number_width>(repr(x)));
 85 | }
 86 | }
 87 | 
 88 | template <typename T>
 89 | inline std::string repr(const kfr::complex<T>& v)
 90 | {
 91 |     return as_string(v.real()) + " + " + as_string(v.imag()) + "j";
 92 | }
 93 | 
 94 | template <typename T>
 95 | inline std::string repr(const T* source, size_t N)
 96 | {
 97 |     std::string str;
 98 |     for (size_t i = 0; i < N; i++)
 99 |     {
100 |         if (i > 0)
101 |         {
102 |             if (i % details::number_columns == 0)
103 |                 str += "\n";
104 |             else
105 |                 str += " ";
106 |         }
107 |         str += as_string(details::fmtvalue(std::is_floating_point<T>(), source[i]));
108 |     }
109 |     return str;
110 | }
111 | 
112 | template <typename T>
113 | inline std::string repr(const kfr::complex<T>* source, size_t N)
114 | {
115 |     std::string str;
116 |     for (size_t i = 0; i < N; i++)
117 |     {
118 |         if (i > 0)
119 |         {
120 |             if (i % (details::number_columns / 2) == 0)
121 |                 str += "\n";
122 |             else
123 |                 str += " ";
124 |         }
125 |         str += as_string(details::fmtvalue(std::true_type{}, source[i]));
126 |     }
127 |     return str;
128 | }
129 | 
130 | template <typename T, int N>
131 | inline std::string repr(kfr::simd<T, N> v)
132 | {
133 |     return repr(tovec(v));
134 | }
135 | 
136 | template <typename T, size_t N>
137 | inline std::string repr(kfr::vec<T, N> v)
138 | {
139 |     return repr(v.data(), v.size());
140 | }
141 | 
142 | template <typename T, size_t Tag>
143 | inline std::string repr(const kfr::univector<T, Tag>& v)
144 | {
145 |     return repr(v.data(), v.size());
146 | }
147 | }
148 | 


--------------------------------------------------------------------------------
/include/kfr/math.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | #pragma once
24 | 
25 | #include "base/vec.hpp"
26 | 
27 | #include "base/abs.hpp"
28 | #include "base/asin_acos.hpp"
29 | #include "base/atan.hpp"
30 | #include "base/complex.hpp"
31 | #include "base/constants.hpp"
32 | #include "base/digitreverse.hpp"
33 | #include "base/gamma.hpp"
34 | #include "base/log_exp.hpp"
35 | #include "base/logical.hpp"
36 | #include "base/min_max.hpp"
37 | #include "base/operators.hpp"
38 | #include "base/read_write.hpp"
39 | #include "base/round.hpp"
40 | #include "base/saturation.hpp"
41 | #include "base/select.hpp"
42 | #include "base/shuffle.hpp"
43 | #include "base/sin_cos.hpp"
44 | #include "base/sqrt.hpp"
45 | #include "base/tan.hpp"
46 | #include "kfr/base/hyperbolic.hpp"
47 | 
48 | namespace kfr
49 | {
50 | using namespace native;
51 | }
52 | 


--------------------------------------------------------------------------------
/include/kfr/misc/compiletime.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | #pragma once
24 | #include "../base/constants.hpp"
25 | #include "../base/operators.hpp"
26 | #include "../base/types.hpp"
27 | 
28 | namespace kfr
29 | {
30 | 
31 | namespace compiletime
32 | {
33 | 
34 | template <typename T>
35 | constexpr inline T select(bool c, T x, T y)
36 | {
37 |     return c ? x : y;
38 | }
39 | template <typename T>
40 | constexpr inline T trunc(T x)
41 | {
42 |     return static_cast<T>(static_cast<long long>(x));
43 | }
44 | template <typename T>
45 | constexpr inline T abs(T x)
46 | {
47 |     return x < T() ? -x : x;
48 | }
49 | template <typename T>
50 | constexpr inline T mulsign(T x, T y)
51 | {
52 |     return y < T() ? -x : x;
53 | }
54 | template <typename T>
55 | constexpr inline T sin(T x)
56 | {
57 |     x              = x - trunc(x / c_pi<T, 2>) * c_pi<T, 2>;
58 |     constexpr T c2 = -0.16665853559970855712890625;
59 |     constexpr T c4 = +8.31427983939647674560546875e-3;
60 |     constexpr T c6 = -1.85423981747590005397796630859375e-4;
61 | 
62 |     x -= c_pi<T>;
63 |     T y = abs(x);
64 |     y   = select(y > c_pi<T, 1, 2>, c_pi<T> - y, y);
65 |     y   = mulsign(y, -x);
66 | 
67 |     const T y2 = y * y;
68 |     T formula  = c6;
69 |     const T y3 = y2 * y;
70 |     formula    = fmadd(formula, y2, c4);
71 |     formula    = fmadd(formula, y2, c2);
72 |     formula    = formula * y3 + y;
73 |     return formula;
74 | }
75 | template <typename T>
76 | constexpr inline T cos(T x)
77 | {
78 |     return sin(x + c_pi<T, 1, 2>);
79 | }
80 | }
81 | }
82 | 


--------------------------------------------------------------------------------
/include/kfr/misc/random.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | #include "../base/function.hpp"
 25 | #include "../base/operators.hpp"
 26 | #include "../base/shuffle.hpp"
 27 | #include "../base/vec.hpp"
 28 | 
 29 | namespace kfr
 30 | {
 31 | 
 32 | using random_state = u32x4;
 33 | 
 34 | struct seed_from_rdtsc_t
 35 | {
 36 | };
 37 | 
 38 | constexpr seed_from_rdtsc_t seed_from_rdtsc{};
 39 | 
 40 | struct random_bit_generator
 41 | {
 42 |     random_bit_generator(seed_from_rdtsc_t) noexcept
 43 |         : state(bitcast<u32>(make_vector(__builtin_readcyclecounter(),
 44 |                                          (__builtin_readcyclecounter() << 11) ^ 0x710686d615e2257bull)))
 45 |     {
 46 |         (void)operator()();
 47 |     }
 48 |     constexpr random_bit_generator(u32 x0, u32 x1, u32 x2, u32 x3) noexcept : state(x0, x1, x2, x3)
 49 |     {
 50 |         (void)operator()();
 51 |     }
 52 |     constexpr random_bit_generator(u64 x0, u64 x1) noexcept : state(bitcast<u32>(make_vector(x0, x1)))
 53 |     {
 54 |         (void)operator()();
 55 |     }
 56 | 
 57 |     inline random_state operator()()
 58 |     {
 59 |         constexpr static random_state mul{ 214013u, 17405u, 214013u, 69069u };
 60 |         constexpr static random_state add{ 2531011u, 10395331u, 13737667u, 1u };
 61 |         state = bitcast<u32>(rotateright<3>(bitcast<u8>(fmadd(state, mul, add))));
 62 |         return state;
 63 |     }
 64 | 
 65 | protected:
 66 |     random_state state;
 67 | };
 68 | 
 69 | template <size_t N, KFR_ENABLE_IF(N <= sizeof(random_state))>
 70 | inline vec<u8, N> random_bits(random_bit_generator& gen)
 71 | {
 72 |     return narrow<N>(bitcast<u8>(gen()));
 73 | }
 74 | template <size_t N, KFR_ENABLE_IF(N > sizeof(random_state))>
 75 | inline vec<u8, N> random_bits(random_bit_generator& gen)
 76 | {
 77 |     constexpr size_t N2 = prev_poweroftwo(N - 1);
 78 |     return concat(random_bits<N2>(gen), random_bits<N - N2>(gen));
 79 | }
 80 | 
 81 | template <typename T, size_t N, KFR_ENABLE_IF(std::is_integral<T>::value)>
 82 | inline vec<T, N> random_uniform(random_bit_generator& gen)
 83 | {
 84 |     return bitcast<T>(random_bits<N * sizeof(T)>(gen));
 85 | }
 86 | 
 87 | template <typename T, size_t N, KFR_ENABLE_IF(std::is_same<T, f32>::value)>
 88 | inline vec<f32, N> randommantissa(random_bit_generator& gen)
 89 | {
 90 |     return bitcast<f32>((random_uniform<u32, N>(gen) & 0x7FFFFFu) | 0x3f800000u) + 0.0f;
 91 | }
 92 | 
 93 | template <typename T, size_t N, KFR_ENABLE_IF(std::is_same<T, f64>::value)>
 94 | inline vec<f64, N> randommantissa(random_bit_generator& gen)
 95 | {
 96 |     return bitcast<f64>((random_uniform<u64, N>(gen) & 0x000FFFFFFFFFFFFFull) | 0x3FF0000000000000ull) + 0.0;
 97 | }
 98 | 
 99 | template <typename T, size_t N>
100 | inline enable_if_f<vec<T, N>> random_uniform(random_bit_generator& gen)
101 | {
102 |     return randommantissa<T, N>(gen) - 1.f;
103 | }
104 | 
105 | template <size_t N, typename T>
106 | inline enable_if_f<vec<T, N>> random_range(random_bit_generator& gen, T min, T max)
107 | {
108 |     return mix(random_uniform<T, N>(gen), min, max);
109 | }
110 | 
111 | template <size_t N, typename T>
112 | inline enable_if_not_f<vec<T, N>> random_range(random_bit_generator& gen, T min, T max)
113 | {
114 |     using big_type = findinttype<sqr(std::numeric_limits<T>::min()), sqr(std::numeric_limits<T>::max())>;
115 | 
116 |     vec<T, N> u                = random_uniform<T, N>(gen);
117 |     const vec<big_type, N> tmp = cast<big_type>(u);
118 |     return cast<T>((tmp * (max - min) + min) >> typebits<T>::bits);
119 | }
120 | 
121 | namespace internal
122 | {
123 | template <typename T>
124 | struct expression_random_uniform : input_expression
125 | {
126 |     using value_type = T;
127 |     constexpr expression_random_uniform(const random_bit_generator& gen) noexcept : gen(gen) {}
128 |     template <typename U, size_t N>
129 |     vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const
130 |     {
131 |         return cast<U>(random_uniform<T, N>(gen));
132 |     }
133 |     mutable random_bit_generator gen;
134 | };
135 | 
136 | template <typename T>
137 | struct expression_random_range : input_expression
138 | {
139 |     using value_type = T;
140 |     constexpr expression_random_range(const random_bit_generator& gen, T min, T max) noexcept : gen(gen),
141 |                                                                                                 min(min),
142 |                                                                                                 max(max)
143 |     {
144 |     }
145 | 
146 |     template <typename U, size_t N>
147 |     vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const
148 |     {
149 |         return cast<U>(random_range<N, T>(gen, min, max));
150 |     }
151 |     mutable random_bit_generator gen;
152 |     const T min;
153 |     const T max;
154 | };
155 | }
156 | 
157 | template <typename T>
158 | inline internal::expression_random_uniform<T> gen_random_uniform(const random_bit_generator& gen)
159 | {
160 |     return internal::expression_random_uniform<T>(gen);
161 | }
162 | 
163 | template <typename T>
164 | inline internal::expression_random_range<T> gen_random_range(const random_bit_generator& gen, T min, T max)
165 | {
166 |     return internal::expression_random_range<T>(gen, min, max);
167 | }
168 | 
169 | template <typename T>
170 | inline internal::expression_random_uniform<T> gen_random_uniform()
171 | {
172 |     return internal::expression_random_uniform<T>(random_bit_generator(seed_from_rdtsc));
173 | }
174 | 
175 | template <typename T>
176 | inline internal::expression_random_range<T> gen_random_range(T min, T max)
177 | {
178 |     return internal::expression_random_range<T>(random_bit_generator(seed_from_rdtsc), min, max);
179 | }
180 | }
181 | 


--------------------------------------------------------------------------------
/include/kfr/misc/small_buffer.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
  3 |  * This file is part of KFR
  4 |  *
  5 |  * KFR is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * KFR is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with KFR.
 17 |  *
 18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
 19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
 20 |  * disclosing the source code of your own applications.
 21 |  * See http://www.kfrlib.com for details.
 22 |  */
 23 | #pragma once
 24 | 
 25 | #include "../base/memory.hpp"
 26 | #include <algorithm>
 27 | #include <cstdint>
 28 | 
 29 | namespace kfr
 30 | {
 31 | 
 32 | template <typename T, std::size_t Capacity = 16>
 33 | struct small_buffer
 34 | {
 35 | public:
 36 |     small_buffer() noexcept : m_size(0), m_data(m_preallocated) {}
 37 | 
 38 |     small_buffer(std::size_t size) : small_buffer() { resize(size); }
 39 | 
 40 |     friend void swap(small_buffer<T, Capacity>& first, small_buffer<T, Capacity>& second) noexcept
 41 |     {
 42 |         using std::swap;
 43 | 
 44 |         swap(first.m_size, second.m_size);
 45 |         swap(first.m_data, second.m_data);
 46 |         swap(first.m_preallocated, second.m_preallocated);
 47 |         first.m_data  = first.m_size <= Capacity ? first.m_preallocated : first.m_data;
 48 |         second.m_data = second.m_size <= Capacity ? second.m_preallocated : second.m_data;
 49 |     }
 50 |     small_buffer(small_buffer<T, Capacity>&& other) : small_buffer() { swap(other, *this); }
 51 | 
 52 |     small_buffer(const small_buffer<T, Capacity>& other) : small_buffer() { assign(other); }
 53 |     small_buffer<T, Capacity>& operator=(small_buffer<T, Capacity> other)
 54 |     {
 55 |         swap(other, *this);
 56 |         return *this;
 57 |     }
 58 | 
 59 |     ~small_buffer() { clear(); }
 60 | 
 61 |     void assign(const small_buffer<T, Capacity>& other)
 62 |     {
 63 |         resize(other.m_size);
 64 |         std::copy_n(other.m_data, m_size, m_data);
 65 |     }
 66 | 
 67 |     void resize(std::size_t newsize)
 68 |     {
 69 |         T* m_newdata;
 70 |         if (newsize <= Capacity)
 71 |         {
 72 |             m_newdata = m_preallocated;
 73 |         }
 74 |         else
 75 |         {
 76 |             m_newdata = aligned_allocate<T>(newsize);
 77 |         }
 78 |         std::copy_n(std::make_move_iterator(m_data), std::min(newsize, m_size), m_newdata);
 79 |         if (m_data != m_preallocated)
 80 |             aligned_deallocate(m_data);
 81 |         m_data = m_newdata;
 82 |         m_size = newsize;
 83 |     }
 84 |     bool empty() const { return !size(); }
 85 |     std::size_t size() const { return m_size; }
 86 |     const T* begin() const { return m_data; }
 87 |     const T* end() const { return m_data + m_size; }
 88 |     const T* cbegin() const { return m_data; }
 89 |     const T* cend() const { return m_data + m_size; }
 90 |     T* begin() { return m_data; }
 91 |     T* end() { return m_data + m_size; }
 92 |     void clear() { resize(0); }
 93 |     const T& front() const { return m_data[0]; }
 94 |     const T& back() const { return m_data[m_size - 1]; }
 95 |     T& front() { return m_data[0]; }
 96 |     T& back() { return m_data[m_size - 1]; }
 97 |     void pop_back() { resize(m_size - 1); }
 98 |     T* data() { return m_data; }
 99 |     const T* data() const { return m_data; }
100 |     T& operator[](std::size_t i) { return m_data[i]; }
101 |     const T& operator[](std::size_t i) const { return m_data[i]; }
102 |     void push_back(const T& value)
103 |     {
104 |         resize(m_size + 1);
105 |         m_data[m_size - 1] = value;
106 |     }
107 | 
108 | protected:
109 |     T m_preallocated[Capacity];
110 |     std::size_t m_size;
111 |     T* m_data;
112 | };
113 | }
114 | 


--------------------------------------------------------------------------------
/include/kfr/misc/sort.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | #pragma once
24 | 
25 | #include "../base/min_max.hpp"
26 | #include "../base/shuffle.hpp"
27 | #include "../base/vec.hpp"
28 | 
29 | namespace kfr
30 | {
31 | /**
32 |  * Sort the elements in the vector in ascending order
33 |  * @param x input vector
34 |  * @return sorted vector
35 |  * @code
36 |  * CHECK(sort(make_vector(1000, 1, 2, -10)) == make_vector(-10, 1, 2, 1000));
37 |  * @endcode
38 |  */
39 | template <typename T, size_t N>
40 | KFR_INLINE vec<T, N> sort(vec<T, N> x)
41 | {
42 |     using namespace kfr::native;
43 |     constexpr size_t Nhalf = N / 2;
44 |     vec<T, Nhalf> e = low(x);
45 |     vec<T, Nhalf> o = high(x);
46 |     constexpr auto blend0 = cconcat(csizes<1>, csizeseq<Nhalf - 1, 0, 0>);
47 |     for (size_t i = 0; i < Nhalf; i++)
48 |     {
49 |         vec<T, Nhalf> t;
50 |         t = min(e, o);
51 |         o = max(e, o);
52 |         o = rotateright<1>(o);
53 |         e = t;
54 |         t = max(e, o);
55 |         o = min(e, o);
56 |         e = t;
57 |         t = blend(e, o, blend0);
58 |         o = blend(o, e, blend0);
59 |         o = rotateleft<1>(o);
60 |         e = t;
61 |     }
62 |     return interleavehalfs(concat(e, o));
63 | }
64 | 
65 | /**
66 |  * Sort the elements in the vector in descending order
67 |  * @param x input vector
68 |  * @return sorted vector
69 |  * @code
70 |  * CHECK(sort(make_vector(1000, 1, 2, -10)) == make_vector(1000, 2, 1, -10));
71 |  * @endcode
72 |  */
73 | template <typename T, size_t N>
74 | KFR_INLINE vec<T, N> sortdesc(vec<T, N> x)
75 | {
76 |     using namespace kfr::native;
77 |     constexpr size_t Nhalf = N / 2;
78 |     vec<T, Nhalf> e = low(x);
79 |     vec<T, Nhalf> o = high(x);
80 |     constexpr auto blend0 = cconcat(csizes<1>, csizeseq<Nhalf - 1, 0, 0>);
81 |     for (size_t i = 0; i < Nhalf; i++)
82 |     {
83 |         vec<T, Nhalf> t;
84 |         t = max(e, o);
85 |         o = min(e, o);
86 |         o = rotateright<1>(o);
87 |         e = t;
88 |         t = min(e, o);
89 |         o = max(e, o);
90 |         e = t;
91 |         t = blend(e, o, blend0);
92 |         o = blend(o, e, blend0);
93 |         o = rotateleft<1>(o);
94 |         e = t;
95 |     }
96 |     return interleavehalfs(concat(e, o));
97 | }
98 | }
99 | 


--------------------------------------------------------------------------------
/include/kfr/vec.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | #pragma once
24 | 
25 | #include "base/vec.hpp"
26 | 


--------------------------------------------------------------------------------
/include/kfr/version.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 3 |  * This file is part of KFR
 4 |  *
 5 |  * KFR is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * KFR is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with KFR.
17 |  *
18 |  * If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
19 |  * Buying a commercial license is mandatory as soon as you develop commercial activities without
20 |  * disclosing the source code of your own applications.
21 |  * See http://www.kfrlib.com for details.
22 |  */
23 | #pragma once
24 | 
25 | #include "base/types.hpp"
26 | #include <string>
27 | 
28 | namespace kfr
29 | {
30 | static std::string library_version()
31 | {
32 |     return "KFR " + std::string(version_string) + bitness_const(" x86 ", " x86-64 ") +
33 |            CID_STRINGIFY(KFR_ARCH_NAME);
34 | }
35 | }
36 | 


--------------------------------------------------------------------------------
/sources.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 2 | # This file is part of KFR
 3 | # 
 4 | # KFR is free software: you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | # 
 9 | # KFR is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | # 
14 | # You should have received a copy of the GNU General Public License
15 | # along with KFR.
16 | 
17 | 
18 | set(
19 |     KFR_SRC
20 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/abs.hpp
21 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/asin_acos.hpp
22 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/atan.hpp
23 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/complex.hpp
24 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/constants.hpp
25 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/digitreverse.hpp
26 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/dispatch.hpp
27 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/expression.hpp
28 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/function.hpp
29 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/gamma.hpp
30 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/log_exp.hpp
31 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/logical.hpp
32 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp
33 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/min_max.hpp
34 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/operators.hpp
35 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/read_write.hpp
36 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/round.hpp
37 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/saturation.hpp
38 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/select.hpp
39 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/shuffle.hpp
40 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/sin_cos.hpp
41 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/sinh_cosh.hpp
42 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/sqrt.hpp
43 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/tan.hpp
44 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/types.hpp
45 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/univector.hpp
46 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/vec.hpp
47 |     ${PROJECT_SOURCE_DIR}/include/kfr/data/bitrev.hpp
48 |     ${PROJECT_SOURCE_DIR}/include/kfr/data/sincos.hpp
49 |     ${PROJECT_SOURCE_DIR}/include/kfr/dft/bitrev.hpp
50 |     ${PROJECT_SOURCE_DIR}/include/kfr/dft/fft.hpp
51 |     ${PROJECT_SOURCE_DIR}/include/kfr/dft/ft.hpp
52 |     ${PROJECT_SOURCE_DIR}/include/kfr/dft/reference_dft.hpp
53 |     ${PROJECT_SOURCE_DIR}/include/kfr/dispatch/cpuid.hpp
54 |     ${PROJECT_SOURCE_DIR}/include/kfr/dispatch/runtimedispatch.hpp
55 |     ${PROJECT_SOURCE_DIR}/include/kfr/expressions/basic.hpp
56 |     ${PROJECT_SOURCE_DIR}/include/kfr/expressions/conversion.hpp
57 |     ${PROJECT_SOURCE_DIR}/include/kfr/expressions/generators.hpp
58 |     ${PROJECT_SOURCE_DIR}/include/kfr/expressions/operators.hpp
59 |     ${PROJECT_SOURCE_DIR}/include/kfr/expressions/pointer.hpp
60 |     ${PROJECT_SOURCE_DIR}/include/kfr/expressions/reduce.hpp
61 |     ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp
62 |     ${PROJECT_SOURCE_DIR}/include/kfr/io/file.hpp
63 |     ${PROJECT_SOURCE_DIR}/include/kfr/io/python_plot.hpp
64 |     ${PROJECT_SOURCE_DIR}/include/kfr/io/tostring.hpp
65 |     ${PROJECT_SOURCE_DIR}/include/kfr/math.hpp
66 |     ${PROJECT_SOURCE_DIR}/include/kfr/misc/compiletime.hpp
67 |     ${PROJECT_SOURCE_DIR}/include/kfr/misc/random.hpp
68 |     ${PROJECT_SOURCE_DIR}/include/kfr/misc/small_buffer.hpp
69 |     ${PROJECT_SOURCE_DIR}/include/kfr/misc/sort.hpp
70 |     ${PROJECT_SOURCE_DIR}/include/kfr/vec.hpp
71 |     ${PROJECT_SOURCE_DIR}/include/kfr/version.hpp
72 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/kfr.h
73 |     ${PROJECT_SOURCE_DIR}/include/kfr/base/intrinsics.h
74 |     ${PROJECT_SOURCE_DIR}/include/kfr/cometa.hpp
75 |     ${PROJECT_SOURCE_DIR}/include/kfr/cometa/string.hpp
76 | 
77 |     ${PROJECT_SOURCE_DIR}/tests/testo/testo.hpp
78 |     ${PROJECT_SOURCE_DIR}/tests/testo/print_colored.hpp
79 | )
80 | 


--------------------------------------------------------------------------------
/syntax-check.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | 
 4 | import fnmatch
 5 | import subprocess
 6 | import os
 7 | import sys
 8 | 
 9 | path = os.path.dirname(os.path.realpath(__file__))
10 | 
11 | filenames = []
12 | for root, dirnames, files in os.walk(os.path.join(path, 'include')):
13 |     for filename in fnmatch.filter(files, '*.hpp'):
14 |         filenames.append(os.path.join(root, filename))
15 |         
16 | 
17 | target = ""
18 | if sys.platform.startswith('win32'):
19 | 	target = "--target=x86_64-w64-windows-gnu"
20 |     
21 | fails = 0
22 | for filename in filenames:
23 |     print(filename, '...')
24 |     c = subprocess.call(["clang", "-fsyntax-only", filename, "-std=c++14", "-I"+os.path.join(path, "include"), "-Wno-pragma-once-outside-header", target])
25 |     if c != 0:
26 |         fails+=1
27 |         
28 | exit(fails)
29 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2016 D Levin (http://www.kfrlib.com)
 2 | # This file is part of KFR
 3 | # 
 4 | # KFR is free software: you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | # 
 9 | # KFR is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | # 
14 | # You should have received a copy of the GNU General Public License
15 | # along with KFR.
16 | 
17 | 
18 | cmake_minimum_required(VERSION 3.0)
19 | 
20 | if (NOT MSVC)
21 |     add_compile_options(-fno-exceptions -fno-rtti -ftemplate-backtrace-limit=0 -march=native)
22 |     link_libraries(stdc++ pthread m)
23 | else ()
24 |     add_compile_options(/arch:AVX)
25 | endif ()
26 | 
27 | include_directories(../include)
28 | 
29 | add_executable(dft_test dft_test.cpp ${KFR_SRC})
30 | 
31 | enable_testing()
32 | 
33 | add_test(NAME dft_test
34 |         COMMAND ${PROJECT_BINARY_DIR}/tests/dft_test)


--------------------------------------------------------------------------------
/tests/dft_test.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * KFR (http://kfrlib.com)
 3 |  * Copyright (C) 2016  D Levin
 4 |  * See LICENSE.txt for details
 5 |  */
 6 | 
 7 | // library_version()
 8 | #include <kfr/version.hpp>
 9 | 
10 | #include <tuple>
11 | 
12 | #include "testo/testo.hpp"
13 | #include <kfr/cometa/string.hpp>
14 | #include <kfr/dft/fft.hpp>
15 | #include <kfr/dft/reference_dft.hpp>
16 | #include <kfr/expressions/basic.hpp>
17 | #include <kfr/expressions/operators.hpp>
18 | #include <kfr/expressions/reduce.hpp>
19 | #include <kfr/io/tostring.hpp>
20 | #include <kfr/math.hpp>
21 | #include <kfr/misc/random.hpp>
22 | #include <kfr/version.hpp>
23 | 
24 | using namespace kfr;
25 | 
26 | TEST(fft_accuracy)
27 | {
28 |     testo::active_test()->show_progress = true;
29 |     random_bit_generator gen(2247448713, 915890490, 864203735, 2982561);
30 | 
31 |     testo::matrix(named("type")       = ctypes<float, double>, //
32 |                   named("inverse")    = std::make_tuple(false, true), //
33 |                   named("log2(size)") = make_range(1, 21), //
34 |                   [&gen](auto type, bool inverse, size_t log2size) {
35 |                       using float_type  = type_of<decltype(type)>;
36 |                       const size_t size = 1 << log2size;
37 | 
38 |                       univector<complex<float_type>> in =
39 |                           typed<float_type>(gen_random_range(gen, -1.0, +1.0), size * 2);
40 |                       univector<complex<float_type>> out    = in;
41 |                       univector<complex<float_type>> refout = out;
42 |                       const dft_plan<float_type> dft(size);
43 |                       univector<u8> temp(dft.temp_size);
44 | 
45 |                       reference_dft(refout.data(), in.data(), size, inverse);
46 |                       dft.execute(out, out, temp, inverse);
47 | 
48 |                       const float_type rms_diff = rms(cabs(refout - out));
49 |                       const double ops          = log2size * 100;
50 |                       const double epsilon      = std::numeric_limits<float_type>::epsilon();
51 |                       CHECK(rms_diff < epsilon * ops);
52 |                   });
53 | }
54 | 
55 | int main(int argc, char** argv)
56 | {
57 |     println(library_version());
58 | 
59 |     return testo::run_all("", true);
60 | }
61 | 


--------------------------------------------------------------------------------
/tests/testo/print_colored.hpp:
--------------------------------------------------------------------------------
  1 | ﻿#pragma once
  2 | #include <cstdint>
  3 | 
  4 | #if defined(_WIN32)
  5 | #include <windows.h>
  6 | #endif
  7 | 
  8 | namespace print_colored
  9 | {
 10 | 
 11 | enum text_color : uint32_t
 12 | {
 13 |     Black         = 0x00,
 14 |     DarkBlue      = 0x01,
 15 |     DarkGreen     = 0x02,
 16 |     DarkCyan      = 0x03,
 17 |     DarkRed       = 0x04,
 18 |     DarkMagenta   = 0x05,
 19 |     DarkYellow    = 0x06,
 20 |     LightGrey     = 0x07,
 21 |     Gray          = 0x08,
 22 |     Blue          = 0x09,
 23 |     Green         = 0x0A,
 24 |     Cyan          = 0x0B,
 25 |     Red           = 0x0C,
 26 |     Magenta       = 0x0D,
 27 |     Yellow        = 0x0E,
 28 |     White         = 0x0F,
 29 |     BgBlack       = 0x00,
 30 |     BgDarkBlue    = 0x10,
 31 |     BgDarkGreen   = 0x20,
 32 |     BgDarkCyan    = 0x30,
 33 |     BgDarkRed     = 0x40,
 34 |     BgDarkMagenta = 0x50,
 35 |     BgDarkYellow  = 0x60,
 36 |     BgLightGrey   = 0x70,
 37 |     BgGray        = 0x80,
 38 |     BgBlue        = 0x90,
 39 |     BgGreen       = 0xA0,
 40 |     BgCyan        = 0xB0,
 41 |     BgRed         = 0xC0,
 42 |     BgMagenta     = 0xD0,
 43 |     BgYellow      = 0xE0,
 44 |     BgWhite       = 0xF0,
 45 | 
 46 |     Normal = BgBlack | LightGrey
 47 | };
 48 | 
 49 | enum console_buffer
 50 | {
 51 |     ConsoleStdOutput,
 52 |     ConsoleStdError
 53 | };
 54 | 
 55 | #if defined(_WIN32)
 56 | typedef HANDLE console_handle_t;
 57 | 
 58 | inline console_handle_t console_handle(console_buffer console = ConsoleStdOutput)
 59 | {
 60 |     static HANDLE con_out = ::GetStdHandle(STD_OUTPUT_HANDLE);
 61 |     static HANDLE con_err = ::GetStdHandle(STD_ERROR_HANDLE);
 62 |     return console == ConsoleStdOutput ? con_out : con_err;
 63 | }
 64 | 
 65 | #endif
 66 | 
 67 | struct console_color
 68 | {
 69 | public:
 70 |     console_color(text_color c, console_buffer console = ConsoleStdOutput)
 71 |         : m_old(get(console)), m_console(console)
 72 |     {
 73 |         set(c, m_console);
 74 |     }
 75 | 
 76 |     ~console_color() { set(m_old, m_console); }
 77 | 
 78 | private:
 79 |     text_color get(console_buffer console = ConsoleStdOutput)
 80 |     {
 81 | #ifdef _WIN32
 82 |         CONSOLE_SCREEN_BUFFER_INFO info;
 83 |         ::GetConsoleScreenBufferInfo(console_handle(console), &info);
 84 |         return static_cast<text_color>(info.wAttributes & 0xFF);
 85 | #else
 86 |         return static_color();
 87 | #endif
 88 |     }
 89 | 
 90 |     void set(text_color new_color, console_buffer console = ConsoleStdOutput)
 91 |     {
 92 | #ifdef _WIN32
 93 |         ::SetConsoleTextAttribute(console_handle(console), static_cast<WORD>(new_color));
 94 | #else
 95 |         if (new_color != Normal)
 96 |         {
 97 |             uint8_t t    = new_color & 0xF;
 98 |             uint8_t b    = (new_color & 0xF0) >> 4;
 99 |             uint8_t tnum = 30 + ((t & 1) << 2 | (t & 2) | (t & 4) >> 2);
100 |             uint8_t bnum = 40 + ((b & 1) << 2 | (b & 2) | (b & 4) >> 2);
101 |             if (t & 8)
102 |                 tnum += 60;
103 |             if (b & 8)
104 |                 bnum += 60;
105 |             printf("\x1B[%d;%dm", tnum, bnum);
106 |         }
107 |         else
108 |         {
109 |             printf("\x1B[0m");
110 |         }
111 |         static_color() = new_color;
112 | #endif
113 |     }
114 | 
115 |     text_color m_old;
116 |     console_buffer m_console;
117 | #ifndef _WIN32
118 |     static text_color& static_color()
119 |     {
120 |         static text_color color = Normal;
121 |         return color;
122 |     }
123 | #endif
124 | };
125 | 
126 | template <text_color color, console_buffer console = ConsoleStdOutput>
127 | struct colored_text_tpl : public console_color
128 | {
129 | public:
130 |     colored_text_tpl() : console_color(color, console) {}
131 | 
132 | private:
133 | };
134 | 
135 | typedef colored_text_tpl<DarkBlue> darkblue_text;
136 | typedef colored_text_tpl<DarkGreen> darkgreen_text;
137 | typedef colored_text_tpl<DarkCyan> darkcyan_text;
138 | typedef colored_text_tpl<DarkRed> darkred_text;
139 | typedef colored_text_tpl<DarkMagenta> darkmagenta_text;
140 | typedef colored_text_tpl<DarkYellow> darkyellow_text;
141 | typedef colored_text_tpl<LightGrey> lightgrey_text;
142 | typedef colored_text_tpl<Gray> gray_text;
143 | typedef colored_text_tpl<Blue> blue_text;
144 | typedef colored_text_tpl<Green> green_text;
145 | typedef colored_text_tpl<Cyan> cyan_text;
146 | typedef colored_text_tpl<Red> red_text;
147 | typedef colored_text_tpl<Magenta> magenta_text;
148 | typedef colored_text_tpl<Yellow> yellow_text;
149 | typedef colored_text_tpl<White> white_text;
150 | }
151 | 


--------------------------------------------------------------------------------