├── license.txt ├── PhaseHalfPiFpu.h ├── Upsampler2xFpu.h ├── Downsampler2xFpu.h ├── PhaseHalfPiF64Fpu.h ├── Upsampler2xF64Fpu.h ├── Downsampler2xF64Fpu.h ├── fnc.h ├── StageData3dnow.h ├── StageDataSse.h ├── StageDataF64Avx512.h ├── StageDataAvx.h ├── StageDataAvx512.h ├── StageDataF64Avx.h ├── StageDataF64Sse2.h ├── README.md ├── fnc_neon.h ├── fnc.hpp ├── StageDataNeonV2.h ├── StageDataNeonV4.h ├── StageDataNeonV2F64.h ├── fnc_neon.hpp ├── def.h ├── StageProc3dnow.h ├── StageProcSseV2.h ├── StageProcF64Sse2.h ├── StageProcSseV4.h ├── StageProc4Sse.h ├── StageProc4Neon.h ├── StageProc8Avx.h ├── StageProcSseV4.hpp ├── StageProc16Avx512.h ├── StageProc8F64Avx512.h ├── StageProc2F64Neon.h ├── StageProcNeonV2.h ├── StageProc4F64Avx.h ├── StageProcSseV2.hpp ├── StageProc2F64Sse2.h ├── StageProcNeonV2.hpp ├── StageProcF64Sse2.hpp ├── StageProcNeonV4.h ├── StageProcNeonV4.hpp ├── Upsampler2xF64Sse2.h ├── StageProc3dnow.hpp ├── Upsampler2xFpuTpl.h ├── Upsampler2x4Sse.h ├── Upsampler2xSse.h ├── PhaseHalfPiF64Sse2.h ├── Upsampler2x4Neon.h ├── Upsampler2xNeon.h ├── Upsampler2x3dnow.h ├── Upsampler2x8Avx.h ├── Downsampler2xF64Sse2.h ├── Upsampler2x16Avx512.h ├── Upsampler2x8F64Avx512.h ├── Upsampler2x4F64Avx.h ├── Upsampler2x2F64Neon.h ├── Downsampler2xFpuTpl.h ├── Upsampler2x2F64Sse2.h ├── Downsampler2xSse.h ├── PhaseHalfPi4Sse.h ├── PhaseHalfPi8Avx.h ├── PhaseHalfPiFpuTpl.h ├── Upsampler2xNeonOld.h ├── PhaseHalfPi3dnow.h ├── PhaseHalfPi4F64Avx.h ├── PhaseHalfPi2F64Sse2.h ├── PhaseHalfPi16Avx512.h ├── Downsampler2x3dnow.h ├── Upsampler2xSseOld.h ├── PhaseHalfPi8F64Avx512.h ├── PhaseHalfPi4Neon.h ├── Downsampler2xNeon.h ├── PhaseHalfPiSse.h └── PhaseHalfPiNeon.h /license.txt: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2004 Sam Hocevar 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | -------------------------------------------------------------------------------- /PhaseHalfPiFpu.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPiFpu.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_PhaseHalfPiFpu_HEADER_INCLUDED) 20 | #define hiir_PhaseHalfPiFpu_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/PhaseHalfPiFpuTpl.h" 27 | 28 | 29 | 30 | namespace hiir 31 | { 32 | 33 | 34 | 35 | template 36 | using PhaseHalfPiFpu = PhaseHalfPiFpuTpl ; 37 | 38 | 39 | 40 | } // namespace hiir 41 | 42 | 43 | 44 | //#include "hiir/PhaseHalfPiFpu.hpp" 45 | 46 | 47 | 48 | #endif // hiir_PhaseHalfPiFpu_HEADER_INCLUDED 49 | 50 | 51 | 52 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | -------------------------------------------------------------------------------- /Upsampler2xFpu.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2xFpu.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_Upsampler2xFpu_HEADER_INCLUDED) 20 | #define hiir_Upsampler2xFpu_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/Upsampler2xFpuTpl.h" 27 | 28 | 29 | 30 | namespace hiir 31 | { 32 | 33 | 34 | 35 | template 36 | using Upsampler2xFpu = Upsampler2xFpuTpl ; 37 | 38 | 39 | 40 | } // namespace hiir 41 | 42 | 43 | 44 | //#include "hiir/Upsampler2xFpu.hpp" 45 | 46 | 47 | 48 | #endif // hiir_Upsampler2xFpu_HEADER_INCLUDED 49 | 50 | 51 | 52 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | -------------------------------------------------------------------------------- /Downsampler2xFpu.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Downsampler2xFpu.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_Downsampler2xFpu_HEADER_INCLUDED) 20 | #define hiir_Downsampler2xFpu_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/Downsampler2xFpuTpl.h" 27 | 28 | 29 | 30 | namespace hiir 31 | { 32 | 33 | 34 | 35 | template 36 | using Downsampler2xFpu = Downsampler2xFpuTpl ; 37 | 38 | 39 | 40 | } // namespace hiir 41 | 42 | 43 | 44 | //#include "hiir/Downsampler2xFpu.hpp" 45 | 46 | 47 | 48 | #endif // hiir_Downsampler2xFpu_HEADER_INCLUDED 49 | 50 | 51 | 52 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | -------------------------------------------------------------------------------- /PhaseHalfPiF64Fpu.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPiF64Fpu.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_PhaseHalfPiF64Fpu_HEADER_INCLUDED) 20 | #define hiir_PhaseHalfPiF64Fpu_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/PhaseHalfPiFpuTpl.h" 27 | 28 | 29 | 30 | namespace hiir 31 | { 32 | 33 | 34 | 35 | template 36 | using PhaseHalfPiF64Fpu = PhaseHalfPiFpuTpl ; 37 | 38 | 39 | 40 | } // namespace hiir 41 | 42 | 43 | 44 | //#include "hiir/PhaseHalfPiF64Fpu.hpp" 45 | 46 | 47 | 48 | #endif // hiir_PhaseHalfPiF64Fpu_HEADER_INCLUDED 49 | 50 | 51 | 52 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | -------------------------------------------------------------------------------- /Upsampler2xF64Fpu.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2xF64Fpu.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_Upsampler2xF64Fpu_HEADER_INCLUDED) 20 | #define hiir_Upsampler2xF64Fpu_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/Upsampler2xFpuTpl.h" 27 | 28 | 29 | 30 | namespace hiir 31 | { 32 | 33 | 34 | 35 | template 36 | using Upsampler2xF64Fpu = Upsampler2xFpuTpl ; 37 | 38 | 39 | 40 | } // namespace hiir 41 | 42 | 43 | 44 | //#include "hiir/Upsampler2xF64Fpu.hpp" 45 | 46 | 47 | 48 | #endif // hiir_Upsampler2xF64Fpu_HEADER_INCLUDED 49 | 50 | 51 | 52 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | -------------------------------------------------------------------------------- /Downsampler2xF64Fpu.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Downsampler2xF64Fpu.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_Downsampler2xF64Fpu_HEADER_INCLUDED) 20 | #define hiir_Downsampler2xF64Fpu_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/Downsampler2xFpuTpl.h" 27 | 28 | 29 | 30 | namespace hiir 31 | { 32 | 33 | 34 | 35 | template 36 | using Downsampler2xF64Fpu = Downsampler2xFpuTpl ; 37 | 38 | 39 | 40 | } // namespace hiir 41 | 42 | 43 | 44 | //#include "hiir/Downsampler2xF64Fpu.hpp" 45 | 46 | 47 | 48 | #endif // hiir_Downsampler2xF64Fpu_HEADER_INCLUDED 49 | 50 | 51 | 52 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | -------------------------------------------------------------------------------- /fnc.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | fnc.h 4 | Author: Laurent de Soras, 2005 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if ! defined (hiir_fnc_HEADER_INCLUDED) 19 | #define hiir_fnc_HEADER_INCLUDED 20 | 21 | #if defined (_MSC_VER) 22 | #pragma once 23 | #pragma warning (4 : 4250) // "Inherits via dominance." 24 | #endif 25 | 26 | 27 | 28 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 29 | 30 | 31 | 32 | namespace hiir 33 | { 34 | 35 | 36 | 37 | inline int round_int (double x); 38 | inline int ceil_int (double x); 39 | 40 | template 41 | T ipowp (T x, long n); 42 | 43 | 44 | 45 | } // namespace hiir 46 | 47 | 48 | 49 | #include "hiir/fnc.hpp" 50 | 51 | 52 | 53 | #endif // hiir_fnc_HEADER_INCLUDED 54 | 55 | 56 | 57 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 58 | -------------------------------------------------------------------------------- /StageData3dnow.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageData3dnow.h 4 | Author: Laurent de Soras, 2005 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if ! defined (hiir_StageData3dnow_HEADER_INCLUDED) 19 | #define hiir_StageData3dnow_HEADER_INCLUDED 20 | 21 | #if defined (_MSC_VER) 22 | #pragma once 23 | #pragma warning (4 : 4250) // "Inherits via dominance." 24 | #endif 25 | 26 | 27 | 28 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 29 | 30 | #include 31 | 32 | 33 | 34 | namespace hiir 35 | { 36 | 37 | 38 | 39 | class StageData3dnow 40 | { 41 | 42 | public: 43 | __m64 _coefs; // Coefficients are inverted, by pair: a_{2n+1}, a_{2n} 44 | __m64 _mem; // Output of the stage (y) 45 | 46 | }; // class StageData3dnow 47 | 48 | 49 | 50 | } // namespace hiir 51 | 52 | 53 | 54 | #endif // hiir_StageData3dnow_HEADER_INCLUDED 55 | 56 | 57 | 58 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 59 | -------------------------------------------------------------------------------- /StageDataSse.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageDataSse.h 4 | Author: Laurent de Soras, 2005 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if ! defined (hiir_StageDataSse_HEADER_INCLUDED) 19 | #define hiir_StageDataSse_HEADER_INCLUDED 20 | 21 | #if defined (_MSC_VER) 22 | #pragma once 23 | #pragma warning (4 : 4250) // "Inherits via dominance." 24 | #endif 25 | 26 | 27 | 28 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 29 | 30 | #include 31 | 32 | 33 | 34 | namespace hiir 35 | { 36 | 37 | 38 | 39 | class StageDataSse 40 | { 41 | 42 | public: 43 | 44 | union 45 | { 46 | __m128 _coef4; // Just to ensure alignement 47 | float _coef [4]; // a_{4n+1}, a_{4n}, a_{4n+3}, a_{4n+2} 48 | }; 49 | union 50 | { 51 | __m128 _mem4; 52 | float _mem [4]; // y of the stage 53 | }; 54 | 55 | }; // class StageDataSse 56 | 57 | 58 | 59 | } // namespace hiir 60 | 61 | 62 | 63 | #endif // hiir_StageDataSse_HEADER_INCLUDED 64 | 65 | 66 | 67 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 68 | -------------------------------------------------------------------------------- /StageDataF64Avx512.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageDataF64Avx512.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if ! defined (hiir_StageDataF64Avx512_HEADER_INCLUDED) 19 | #define hiir_StageDataF64Avx512_HEADER_INCLUDED 20 | 21 | #if defined (_MSC_VER) 22 | #pragma once 23 | #pragma warning (4 : 4250) // "Inherits via dominance." 24 | #endif 25 | 26 | 27 | 28 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 29 | 30 | #include 31 | 32 | 33 | 34 | namespace hiir 35 | { 36 | 37 | 38 | 39 | class StageDataF64Avx512 40 | { 41 | 42 | public: 43 | 44 | union 45 | { 46 | __m512d _coef8; // Just to ensure alignement 47 | double _coef [8]; 48 | }; 49 | union 50 | { 51 | __m512d _mem8; 52 | double _mem [8]; // y of the stage 53 | }; 54 | 55 | }; // class StageDataF64Avx512 56 | 57 | 58 | 59 | } // namespace hiir 60 | 61 | 62 | 63 | #endif // hiir_StageDataF64Avx512_HEADER_INCLUDED 64 | 65 | 66 | 67 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 68 | -------------------------------------------------------------------------------- /StageDataAvx.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageDataAvx.h 4 | Port of StageDataAvx.h from SSE to AVX by Dario Mambro 5 | StageDataAvx.h by Laurent de Soras 6 | 7 | --- Legal stuff --- 8 | 9 | This program is free software. It comes without any warranty, to 10 | the extent permitted by applicable law. You can redistribute it 11 | and/or modify it under the terms of the Do What The Fuck You Want 12 | To Public License, Version 2, as published by Sam Hocevar. See 13 | http://sam.zoy.org/wtfpl/COPYING for more details. 14 | 15 | *Tab=3***********************************************************************/ 16 | 17 | 18 | 19 | #if !defined(hiir_StageDataAvx_HEADER_INCLUDED) 20 | #define hiir_StageDataAvx_HEADER_INCLUDED 21 | 22 | #if defined(_MSC_VER) 23 | #pragma once 24 | #pragma warning(4 : 4250) // "Inherits via dominance." 25 | #endif 26 | 27 | 28 | 29 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 30 | 31 | #include 32 | 33 | 34 | 35 | namespace hiir 36 | { 37 | 38 | 39 | 40 | class StageDataAvx 41 | { 42 | 43 | public: 44 | 45 | union 46 | { 47 | __m256 _coef8; // Just to ensure alignement 48 | float _coef [8]; 49 | }; 50 | union 51 | { 52 | __m256 _mem8; 53 | float _mem [8]; // y of the stage 54 | }; 55 | 56 | }; // class StageDataAvx 57 | 58 | } // namespace hiir 59 | 60 | 61 | 62 | #endif // hiir_StageDataAvx_HEADER_INCLUDED 63 | 64 | 65 | 66 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 67 | -------------------------------------------------------------------------------- /StageDataAvx512.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageDataAvx512.h 4 | Port of StageDataAvx512.h from SSE to AVX by Dario Mambro 5 | StageDataAvx512.h by Laurent de Soras 6 | 7 | --- Legal stuff --- 8 | 9 | This program is free software. It comes without any warranty, to 10 | the extent permitted by applicable law. You can redistribute it 11 | and/or modify it under the terms of the Do What The Fuck You Want 12 | To Public License, Version 2, as published by Sam Hocevar. See 13 | http://sam.zoy.org/wtfpl/COPYING for more details. 14 | 15 | *Tab=3***********************************************************************/ 16 | 17 | 18 | 19 | #if !defined(hiir_StageDataAvx512_HEADER_INCLUDED) 20 | #define hiir_StageDataAvx512_HEADER_INCLUDED 21 | 22 | #if defined(_MSC_VER) 23 | #pragma once 24 | #pragma warning(4 : 4250) // "Inherits via dominance." 25 | #endif 26 | 27 | 28 | 29 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 30 | 31 | #include 32 | 33 | 34 | 35 | namespace hiir 36 | { 37 | 38 | 39 | 40 | class StageDataAvx512 41 | { 42 | 43 | public: 44 | 45 | union 46 | { 47 | __m512 _coef16; // Just to ensure alignement 48 | float _coef [16]; 49 | }; 50 | union 51 | { 52 | __m512 _mem16; 53 | float _mem [16]; // y of the stage 54 | }; 55 | 56 | }; // class StageDataAvx512 57 | 58 | } // namespace hiir 59 | 60 | 61 | 62 | #endif // hiir_StageDataAvx512_HEADER_INCLUDED 63 | 64 | 65 | 66 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 67 | -------------------------------------------------------------------------------- /StageDataF64Avx.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageDataF64Avx.h 4 | Port of StageDataSse.h from float to double by Dario Mambro 5 | StageDataSse.h by Laurent de Soras, 2005 6 | 7 | --- Legal stuff --- 8 | 9 | This program is free software. It comes without any warranty, to 10 | the extent permitted by applicable law. You can redistribute it 11 | and/or modify it under the terms of the Do What The Fuck You Want 12 | To Public License, Version 2, as published by Sam Hocevar. See 13 | http://sam.zoy.org/wtfpl/COPYING for more details. 14 | 15 | *Tab=3***********************************************************************/ 16 | 17 | 18 | 19 | #if ! defined (hiir_StageDataF64Avx_HEADER_INCLUDED) 20 | #define hiir_StageDataF64Avx_HEADER_INCLUDED 21 | 22 | #if defined (_MSC_VER) 23 | #pragma once 24 | #pragma warning (4 : 4250) // "Inherits via dominance." 25 | #endif 26 | 27 | 28 | 29 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 30 | 31 | #include 32 | 33 | 34 | 35 | namespace hiir 36 | { 37 | 38 | 39 | 40 | class StageDataF64Avx 41 | { 42 | 43 | public: 44 | 45 | union 46 | { 47 | __m256d _coef4; // Just to ensure alignement 48 | double _coef [4]; 49 | }; 50 | union 51 | { 52 | __m256d _mem4; 53 | double _mem [4]; // y of the stage 54 | }; 55 | 56 | }; // class StageDataF64Avx 57 | 58 | 59 | 60 | } // namespace hiir 61 | 62 | 63 | 64 | #endif // hiir_StageDataF64Avx_HEADER_INCLUDED 65 | 66 | 67 | 68 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 69 | -------------------------------------------------------------------------------- /StageDataF64Sse2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageDataF64Sse2.h 4 | Port of StageDataSse.h from float to double by Dario Mambro 5 | StageDataSse.h by Laurent de Soras, 2005 6 | 7 | --- Legal stuff --- 8 | 9 | This program is free software. It comes without any warranty, to 10 | the extent permitted by applicable law. You can redistribute it 11 | and/or modify it under the terms of the Do What The Fuck You Want 12 | To Public License, Version 2, as published by Sam Hocevar. See 13 | http://sam.zoy.org/wtfpl/COPYING for more details. 14 | 15 | *Tab=3***********************************************************************/ 16 | 17 | 18 | 19 | #if ! defined (hiir_StageDataF64Sse2_HEADER_INCLUDED) 20 | #define hiir_StageDataF64Sse2_HEADER_INCLUDED 21 | 22 | #if defined (_MSC_VER) 23 | #pragma once 24 | #pragma warning (4 : 4250) // "Inherits via dominance." 25 | #endif 26 | 27 | 28 | 29 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 30 | 31 | #include 32 | 33 | 34 | 35 | namespace hiir 36 | { 37 | 38 | 39 | 40 | class StageDataF64Sse2 41 | { 42 | 43 | public: 44 | 45 | union 46 | { 47 | __m128d _coef2; // Just to ensure alignement 48 | double _coef [2]; 49 | }; 50 | union 51 | { 52 | __m128d _mem2; 53 | double _mem [2]; // y of the stage 54 | }; 55 | 56 | }; // class StageDataF64Sse2 57 | 58 | 59 | 60 | } // namespace hiir 61 | 62 | 63 | 64 | #endif // hiir_StageDataF64Sse2_HEADER_INCLUDED 65 | 66 | 67 | 68 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HIIR 2 | 3 | This used to be a fork of the [HIIR library by Laurent De Soras](http://ldesoras.free.fr/prod.html), *"an oversampling and Hilbert transform library in C++"*, with some new files that add support for double precision floating point numbers and AVX instructions, making the library able to work with: 4 | 5 | - 8 interleaved channels of single precision floating point data (using AVX instructions). 6 | - 4 interleaved channels of double precision floating point data (using AVX instructions). 7 | - 2 interleaved channels of double precision floating point data (using SSE2 instructions). 8 | 9 | The original HIIR library is already able to work with: 10 | 11 | - 4 interleaved channels of single precision floating point data (using SSE or Neon instructions), see `Upsampler2x4xSse.hpp` and `Upsampler2x4xNeon.hpp`. 12 | 13 | As of March 2020 this functionality has been merged in the official release of HIIR, version 1.30, which also supports 14 | 15 | - 16 interleaved channels of single precision floating point data (using AVX512 instructions). 16 | - 8 interleaved channels of double precision floating point data (using AVX512 instructions). 17 | 18 | This repository has been updated to version 1.30, but is header-only, and does not contain the `test` folder. 19 | 20 | As of April 2021 I added support for double precision floating point data using Neon on ARM AArch64, making the library able to work with: 21 | 22 | - 2 interleaved channels of double precision floating point data (using Neon instructions). 23 | 24 | For usage instructions see the original library readme: `readme.txt`. 25 | 26 | For a crossplatform wrapper around this library - and more -, see [oversimple](https://github.com/unevens/oversimple). -------------------------------------------------------------------------------- /fnc_neon.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | fnc_neon.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_fnc_neon_HEADER_INCLUDED) 20 | #define hiir_fnc_neon_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/def.h" 27 | 28 | #include 29 | 30 | 31 | 32 | namespace hiir 33 | { 34 | 35 | 36 | 37 | hiir_FORCEINLINE float32x4_t load4a (const float *ptr); 38 | hiir_FORCEINLINE float32x4_t load4u (const float *ptr); 39 | hiir_FORCEINLINE float32x2_t load2a (const float *ptr); 40 | hiir_FORCEINLINE float32x2_t load2u (const float *ptr); 41 | hiir_FORCEINLINE void storea (float *ptr, float32x4_t x); 42 | hiir_FORCEINLINE void storeu (float *ptr, float32x4_t x); 43 | hiir_FORCEINLINE void storea (float *ptr, float32x2_t x); 44 | hiir_FORCEINLINE void storeu (float *ptr, float32x2_t x); 45 | 46 | 47 | 48 | } // namespace hiir 49 | 50 | 51 | 52 | #include "hiir/fnc_neon.hpp" 53 | 54 | 55 | 56 | #endif // hiir_fnc_neon_HEADER_INCLUDED 57 | 58 | 59 | 60 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 61 | 62 | -------------------------------------------------------------------------------- /fnc.hpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | fnc.hpp 4 | Author: Laurent de Soras, 2005 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if defined (hiir_fnc_CURRENT_CODEHEADER) 19 | #error Recursive inclusion of fnc code header. 20 | #endif 21 | #define hiir_fnc_CURRENT_CODEHEADER 22 | 23 | #if ! defined (hiir_fnc_CODEHEADER_INCLUDED) 24 | #define hiir_fnc_CODEHEADER_INCLUDED 25 | 26 | 27 | 28 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 29 | 30 | #include 31 | #include 32 | 33 | 34 | 35 | namespace hiir 36 | { 37 | 38 | 39 | 40 | int round_int (double x) 41 | { 42 | return int (floor (x + 0.5)); 43 | } 44 | 45 | 46 | 47 | int ceil_int (double x) 48 | { 49 | return int (ceil (x)); 50 | } 51 | 52 | 53 | 54 | template 55 | T ipowp (T x, long n) 56 | { 57 | assert (n >= 0); 58 | 59 | T z (1); 60 | while (n != 0) 61 | { 62 | if ((n & 1) != 0) 63 | { 64 | z *= x; 65 | } 66 | n >>= 1; 67 | x *= x; 68 | } 69 | 70 | return z; 71 | } 72 | 73 | 74 | 75 | } // namespace hiir 76 | 77 | 78 | 79 | #endif // hiir_fnc_CODEHEADER_INCLUDED 80 | 81 | #undef hiir_fnc_CURRENT_CODEHEADER 82 | 83 | 84 | 85 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 86 | -------------------------------------------------------------------------------- /StageDataNeonV2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageDataNeonV2.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_StageDataNeonV2_HEADER_INCLUDED) 20 | #define hiir_StageDataNeonV2_HEADER_INCLUDED 21 | 22 | #if defined (_MSC_VER) 23 | #pragma warning (4 : 4250) 24 | #endif 25 | 26 | 27 | 28 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 29 | 30 | #include 31 | 32 | 33 | 34 | namespace hiir 35 | { 36 | 37 | 38 | 39 | class StageDataNeonV2 40 | { 41 | 42 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 43 | 44 | public: 45 | 46 | union 47 | { 48 | __attribute__ ((aligned (16))) float32x2_t 49 | _coef2; 50 | __attribute__ ((aligned (16))) float 51 | _coef [2]; 52 | }; 53 | union 54 | { 55 | __attribute__ ((aligned (16))) float32x2_t 56 | _mem2; 57 | __attribute__ ((aligned (16))) float 58 | _mem [2]; // y of the stage 59 | }; 60 | 61 | 62 | 63 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 64 | 65 | protected: 66 | 67 | 68 | 69 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 70 | 71 | private: 72 | 73 | 74 | 75 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 76 | 77 | private: 78 | 79 | }; // class StageDataNeonV2 80 | 81 | 82 | 83 | } // namespace hiir 84 | 85 | 86 | 87 | //#include "hiir/StageDataNeonV2.hpp" 88 | 89 | 90 | 91 | #endif // hiir_StageDataNeonV2_HEADER_INCLUDED 92 | 93 | 94 | 95 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 96 | -------------------------------------------------------------------------------- /StageDataNeonV4.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageDataNeonV4.h 4 | Author: Laurent de Soras, 2016 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_StageDataNeonV4_HEADER_INCLUDED) 20 | #define hiir_StageDataNeonV4_HEADER_INCLUDED 21 | 22 | #if defined (_MSC_VER) 23 | #pragma warning (4 : 4250) 24 | #endif 25 | 26 | 27 | 28 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 29 | 30 | #include 31 | 32 | 33 | 34 | namespace hiir 35 | { 36 | 37 | 38 | 39 | class StageDataNeonV4 40 | { 41 | 42 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 43 | 44 | public: 45 | 46 | union 47 | { 48 | __attribute__ ((aligned (16))) float32x4_t 49 | _coef4; 50 | __attribute__ ((aligned (16))) float 51 | _coef [4]; // a_{4n+1}, a_{4n}, a_{4n+3}, a_{4n+2} 52 | }; 53 | union 54 | { 55 | __attribute__ ((aligned (16))) float32x4_t 56 | _mem4; 57 | __attribute__ ((aligned (16))) float 58 | _mem [4]; // y of the stage 59 | }; 60 | 61 | 62 | 63 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 64 | 65 | protected: 66 | 67 | 68 | 69 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 70 | 71 | private: 72 | 73 | 74 | 75 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 76 | 77 | private: 78 | 79 | }; // class StageDataNeonV4 80 | 81 | 82 | 83 | } // namespace hiir 84 | 85 | 86 | 87 | //#include "hiir/StageDataNeonV4.hpp" 88 | 89 | 90 | 91 | #endif // hiir_StageDataNeonV4_HEADER_INCLUDED 92 | 93 | 94 | 95 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 96 | -------------------------------------------------------------------------------- /StageDataNeonV2F64.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageDataNeonV2F64.h 4 | Author: Laurent de Soras, 2020 5 | Ported to double by Dario Mambro, 2020 6 | 7 | --- Legal stuff --- 8 | 9 | This program is free software. It comes without any warranty, to 10 | the extent permitted by applicable law. You can redistribute it 11 | and/or modify it under the terms of the Do What The Fuck You Want 12 | To Public License, Version 2, as published by Sam Hocevar. See 13 | http://sam.zoy.org/wtfpl/COPYING for more details. 14 | 15 | *Tab=3***********************************************************************/ 16 | 17 | 18 | 19 | #pragma once 20 | #if ! defined (hiir_StageDataNeonV2F64_HEADER_INCLUDED) 21 | #define hiir_StageDataNeonV2F64_HEADER_INCLUDED 22 | 23 | #if defined (_MSC_VER) 24 | #pragma warning (4 : 4250) 25 | #endif 26 | 27 | 28 | 29 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 30 | 31 | #include 32 | 33 | 34 | 35 | namespace hiir 36 | { 37 | 38 | 39 | 40 | class StageDataNeonV2F64 41 | { 42 | 43 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 44 | 45 | public: 46 | 47 | union 48 | { 49 | __attribute__ ((aligned (32))) float64x2_t 50 | _coef2; 51 | __attribute__ ((aligned (32))) double 52 | _coef [2]; 53 | }; 54 | union 55 | { 56 | __attribute__ ((aligned (32))) float64x2_t 57 | _mem2; 58 | __attribute__ ((aligned (32))) double 59 | _mem [2]; // y of the stage 60 | }; 61 | 62 | 63 | 64 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 65 | 66 | protected: 67 | 68 | 69 | 70 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 71 | 72 | private: 73 | 74 | 75 | 76 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 77 | 78 | private: 79 | 80 | }; // class StageDataNeonV2F64 81 | 82 | 83 | 84 | } // namespace hiir 85 | 86 | 87 | 88 | //#include "hiir/StageDataNeonV2F64.hpp" 89 | 90 | 91 | 92 | #endif // hiir_StageDataNeonV2F64_HEADER_INCLUDED 93 | 94 | 95 | 96 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 97 | -------------------------------------------------------------------------------- /fnc_neon.hpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | fnc_neon.hpp 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if ! defined (hiir_fnc_neon_CODEHEADER_INCLUDED) 19 | #define hiir_fnc_neon_CODEHEADER_INCLUDED 20 | 21 | 22 | 23 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 24 | 25 | #include 26 | 27 | #include 28 | 29 | 30 | 31 | namespace hiir 32 | { 33 | 34 | 35 | 36 | float32x4_t load4a (const float *ptr) 37 | { 38 | return vld1q_f32 (ptr); 39 | } 40 | 41 | 42 | 43 | float32x4_t load4u (const float *ptr) 44 | { 45 | return vreinterpretq_f32_u8 (vld1q_u8 ( 46 | reinterpret_cast (ptr) 47 | )); 48 | } 49 | 50 | 51 | 52 | float32x2_t load2a (const float *ptr) 53 | { 54 | return vld1_f32 (ptr); 55 | } 56 | 57 | 58 | 59 | float32x2_t load2u (const float *ptr) 60 | { 61 | return vreinterpret_f32_u8 (vld1_u8 ( 62 | reinterpret_cast (ptr) 63 | )); 64 | } 65 | 66 | 67 | 68 | void storea (float *ptr, float32x4_t x) 69 | { 70 | vst1q_f32 (ptr, x); 71 | } 72 | 73 | 74 | 75 | void storeu (float *ptr, float32x4_t x) 76 | { 77 | vst1q_u8 (reinterpret_cast (ptr), vreinterpretq_u8_f32 (x)); 78 | } 79 | 80 | 81 | 82 | void storea (float *ptr, float32x2_t x) 83 | { 84 | vst1_f32 (ptr, x); 85 | } 86 | 87 | 88 | 89 | void storeu (float *ptr, float32x2_t x) 90 | { 91 | vst1_u8 (reinterpret_cast (ptr), vreinterpret_u8_f32 (x)); 92 | } 93 | 94 | 95 | 96 | } // namespace hiir 97 | 98 | 99 | 100 | #endif // hiir_fnc_neon_CODEHEADER_INCLUDED 101 | 102 | 103 | 104 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 105 | 106 | -------------------------------------------------------------------------------- /def.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | def.h 4 | Author: Laurent de Soras, 2005 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if ! defined (hiir_def_HEADER_INCLUDED) 19 | #define hiir_def_HEADER_INCLUDED 20 | 21 | #if defined (_MSC_VER) 22 | #pragma once 23 | #pragma warning (4 : 4250) // "Inherits via dominance." 24 | #endif 25 | 26 | 27 | 28 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 29 | 30 | 31 | 32 | namespace hiir 33 | { 34 | 35 | 36 | 37 | // Architecture class 38 | #define hiir_ARCHI_X86 (1) 39 | #define hiir_ARCHI_ARM (2) 40 | #define hiir_ARCHI_PPC (3) 41 | #define hiir_ARCHI_OTHER (666) 42 | 43 | #if defined (__i386__) || defined (_M_IX86) || defined (_X86_) || defined (_M_X64) || defined (__x86_64__) || defined (__INTEL__) 44 | #define hiir_ARCHI hiir_ARCHI_X86 45 | #elif defined (__arm__) || defined (__arm) || defined (__arm64__) || defined (__arm64) || defined (_M_ARM) || defined (__aarch64__) 46 | #define hiir_ARCHI hiir_ARCHI_ARM 47 | #elif defined (__POWERPC__) || defined (__powerpc) || defined (_powerpc) 48 | #define hiir_ARCHI hiir_ARCHI_PPC 49 | #else 50 | #define hiir_ARCHI hiir_ARCHI_OTHER 51 | #endif 52 | 53 | // Inlining 54 | #if defined (_MSC_VER) 55 | #define hiir_FORCEINLINE __forceinline 56 | #elif defined (__GNUC__) 57 | #define hiir_FORCEINLINE inline __attribute__((always_inline)) 58 | #else 59 | #define hiir_FORCEINLINE inline 60 | #endif 61 | 62 | // Alignment 63 | #if defined (_MSC_VER) 64 | #define hiir_TYPEDEF_ALIGN( alignsize, srctype, dsttype) \ 65 | typedef __declspec (align (alignsize)) srctype dsttype 66 | #elif defined (__GNUC__) 67 | #define hiir_TYPEDEF_ALIGN( alignsize, srctype, dsttype) \ 68 | typedef srctype dsttype __attribute__ ((aligned (alignsize))) 69 | #else 70 | #error Undefined for this compiler 71 | #endif 72 | 73 | 74 | 75 | const double PI = 3.1415926535897932384626433832795; 76 | 77 | 78 | 79 | } // namespace hiir 80 | 81 | 82 | 83 | #endif // hiir_def_HEADER_INCLUDED 84 | 85 | 86 | 87 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 88 | -------------------------------------------------------------------------------- /StageProc3dnow.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProc3dnow.h 4 | Author: Laurent de Soras, 2005 5 | 6 | Template parameters: 7 | - CUR: Number of remaining coefficients to process, >= 0 8 | 9 | --- Legal stuff --- 10 | 11 | This program is free software. It comes without any warranty, to 12 | the extent permitted by applicable law. You can redistribute it 13 | and/or modify it under the terms of the Do What The Fuck You Want 14 | To Public License, Version 2, as published by Sam Hocevar. See 15 | http://sam.zoy.org/wtfpl/COPYING for more details. 16 | 17 | *Tab=3***********************************************************************/ 18 | 19 | 20 | 21 | #if ! defined (hiir_StageProc3dnow_HEADER_INCLUDED) 22 | #define hiir_StageProc3dnow_HEADER_INCLUDED 23 | 24 | #if defined (_MSC_VER) 25 | #pragma once 26 | #pragma warning (4 : 4250) // "Inherits via dominance." 27 | #endif 28 | 29 | 30 | 31 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 32 | 33 | #include "hiir/def.h" 34 | 35 | 36 | 37 | namespace hiir 38 | { 39 | 40 | 41 | 42 | template 43 | class StageProc3dnow 44 | { 45 | 46 | static_assert ((CUR >= 0), "CUR must be >= 0"); 47 | 48 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 49 | 50 | public: 51 | 52 | static hiir_FORCEINLINE void 53 | process_sample_pos (); 54 | static hiir_FORCEINLINE void 55 | process_sample_neg (); 56 | 57 | 58 | 59 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 60 | 61 | protected: 62 | 63 | 64 | 65 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 66 | 67 | private: 68 | 69 | 70 | 71 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 72 | 73 | private: 74 | 75 | StageProc3dnow () = delete; 76 | StageProc3dnow (const StageProc3dnow &other) = delete; 77 | StageProc3dnow (StageProc3dnow &&other) = delete; 78 | ~StageProc3dnow () = delete; 79 | StageProc3dnow & 80 | operator = (const StageProc3dnow &other) = delete; 81 | StageProc3dnow & 82 | operator = (StageProc3dnow &&other) = delete; 83 | bool operator == (const StageProc3dnow &other) const = delete; 84 | bool operator != (const StageProc3dnow &other) const = delete; 85 | 86 | }; // class StageProc3dnow 87 | 88 | 89 | 90 | } // namespace hiir 91 | 92 | 93 | 94 | #include "hiir/StageProc3dnow.hpp" 95 | 96 | 97 | 98 | #endif // hiir_StageProc3dnow_HEADER_INCLUDED 99 | 100 | 101 | 102 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 103 | -------------------------------------------------------------------------------- /StageProcSseV2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProcSseV2.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_StageProcSseV2_HEADER_INCLUDED) 20 | #define hiir_StageProcSseV2_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/def.h" 27 | 28 | #include 29 | 30 | 31 | 32 | namespace hiir 33 | { 34 | 35 | 36 | 37 | class StageDataSse; 38 | 39 | template 40 | class StageProcSseV2 41 | { 42 | 43 | static_assert ((CUR >= 0), "CUR must be >= 0"); 44 | 45 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 46 | 47 | public: 48 | 49 | static hiir_FORCEINLINE void 50 | process_sample_pos (__m128 &x, StageDataSse *stage_arr); 51 | static hiir_FORCEINLINE void 52 | process_sample_neg (__m128 &x, StageDataSse *stage_arr); 53 | 54 | static hiir_FORCEINLINE void 55 | process_sample_pos_rec (__m128 &x, StageDataSse *stage_arr); 56 | static hiir_FORCEINLINE void 57 | process_sample_neg_rec (__m128 &x, StageDataSse *stage_arr); 58 | 59 | 60 | 61 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 62 | 63 | protected: 64 | 65 | 66 | 67 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 68 | 69 | private: 70 | 71 | 72 | 73 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 74 | 75 | private: 76 | 77 | StageProcSseV2 () = delete; 78 | StageProcSseV2 (const StageProcSseV2 &other) = delete; 79 | StageProcSseV2 (StageProcSseV2 &&other) = delete; 80 | ~StageProcSseV2 () = delete; 81 | StageProcSseV2 & 82 | operator = (const StageProcSseV2 &other) = delete; 83 | StageProcSseV2 & 84 | operator = (StageProcSseV2 &&other) = delete; 85 | bool operator == (const StageProcSseV2 &other) const = delete; 86 | bool operator != (const StageProcSseV2 &other) const = delete; 87 | 88 | }; // class StageProcSseV2 89 | 90 | 91 | 92 | } // namespace hiir 93 | 94 | 95 | 96 | #include "hiir/StageProcSseV2.hpp" 97 | 98 | 99 | 100 | #endif // hiir_StageProcSseV2_HEADER_INCLUDED 101 | 102 | 103 | 104 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 105 | -------------------------------------------------------------------------------- /StageProcF64Sse2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProcF64Sse2.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_StageProcF64Sse2_HEADER_INCLUDED) 20 | #define hiir_StageProcF64Sse2_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/def.h" 27 | 28 | #include 29 | 30 | 31 | 32 | namespace hiir 33 | { 34 | 35 | 36 | 37 | class StageDataF64Sse2; 38 | 39 | template 40 | class StageProcF64Sse2 41 | { 42 | 43 | static_assert ((CUR >= 0), "CUR must be >= 0"); 44 | 45 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 46 | 47 | public: 48 | 49 | static hiir_FORCEINLINE void 50 | process_sample_pos (__m128d &x, StageDataF64Sse2 *stage_arr); 51 | static hiir_FORCEINLINE void 52 | process_sample_neg (__m128d &x, StageDataF64Sse2 *stage_arr); 53 | 54 | static hiir_FORCEINLINE void 55 | process_sample_pos_rec (__m128d &x, StageDataF64Sse2 *stage_arr); 56 | static hiir_FORCEINLINE void 57 | process_sample_neg_rec (__m128d &x, StageDataF64Sse2 *stage_arr); 58 | 59 | 60 | 61 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 62 | 63 | protected: 64 | 65 | 66 | 67 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 68 | 69 | private: 70 | 71 | 72 | 73 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 74 | 75 | private: 76 | 77 | StageProcF64Sse2 () = delete; 78 | StageProcF64Sse2 (const StageProcF64Sse2 &other) = delete; 79 | StageProcF64Sse2 (StageProcF64Sse2 &&other) = delete; 80 | ~StageProcF64Sse2 () = delete; 81 | StageProcF64Sse2 & 82 | operator = (const StageProcF64Sse2 &other) = delete; 83 | StageProcF64Sse2 & 84 | operator = (StageProcF64Sse2 &&other) = delete; 85 | bool operator == (const StageProcF64Sse2 &other) const = delete; 86 | bool operator != (const StageProcF64Sse2 &other) const = delete; 87 | 88 | }; // class StageProcF64Sse2 89 | 90 | 91 | 92 | } // namespace hiir 93 | 94 | 95 | 96 | #include "hiir/StageProcF64Sse2.hpp" 97 | 98 | 99 | 100 | #endif // hiir_StageProcF64Sse2_HEADER_INCLUDED 101 | 102 | 103 | 104 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 105 | -------------------------------------------------------------------------------- /StageProcSseV4.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProcSseV4.h 4 | Author: Laurent de Soras, 2005 5 | 6 | Template parameters: 7 | - CUR: index of the coefficient to process, >= 0 8 | 9 | --- Legal stuff --- 10 | 11 | This program is free software. It comes without any warranty, to 12 | the extent permitted by applicable law. You can redistribute it 13 | and/or modify it under the terms of the Do What The Fuck You Want 14 | To Public License, Version 2, as published by Sam Hocevar. See 15 | http://sam.zoy.org/wtfpl/COPYING for more details. 16 | 17 | *Tab=3***********************************************************************/ 18 | 19 | 20 | 21 | #if ! defined (hiir_StageProcSseV4_HEADER_INCLUDED) 22 | #define hiir_StageProcSseV4_HEADER_INCLUDED 23 | 24 | #if defined (_MSC_VER) 25 | #pragma once 26 | #pragma warning (4 : 4250) // "Inherits via dominance." 27 | #endif 28 | 29 | 30 | 31 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 32 | 33 | #include "hiir/def.h" 34 | 35 | #include 36 | 37 | 38 | 39 | namespace hiir 40 | { 41 | 42 | 43 | 44 | class StageDataSse; 45 | 46 | template 47 | class StageProcSseV4 48 | { 49 | 50 | static_assert ((CUR >= 0), "CUR must be >= 0"); 51 | 52 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | 54 | public: 55 | 56 | static hiir_FORCEINLINE void 57 | process_sample_pos (StageDataSse *stage_ptr, __m128 &y, __m128 &mem); 58 | static hiir_FORCEINLINE void 59 | process_sample_neg (StageDataSse *stage_ptr, __m128 &y, __m128 &mem); 60 | 61 | 62 | 63 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 64 | 65 | protected: 66 | 67 | 68 | 69 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 70 | 71 | private: 72 | 73 | enum { PREV = CUR - 1 }; 74 | 75 | 76 | 77 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 78 | 79 | private: 80 | 81 | StageProcSseV4 () = delete; 82 | StageProcSseV4 (const StageProcSseV4 &other) = delete; 83 | StageProcSseV4 (StageProcSseV4 &&other) = delete; 84 | ~StageProcSseV4 () = delete; 85 | StageProcSseV4 & 86 | operator = (const StageProcSseV4 &other) = delete; 87 | StageProcSseV4 & 88 | operator = (StageProcSseV4 &&other) = delete; 89 | bool operator == (const StageProcSseV4 &other) = delete; 90 | bool operator != (const StageProcSseV4 &other) = delete; 91 | 92 | }; // class StageProcSseV4 93 | 94 | 95 | 96 | } // namespace hiir 97 | 98 | 99 | 100 | #include "hiir/StageProcSseV4.hpp" 101 | 102 | 103 | 104 | #endif // hiir_StageProcSseV4_HEADER_INCLUDED 105 | 106 | 107 | 108 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 109 | -------------------------------------------------------------------------------- /StageProc4Sse.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProc4Sse.h 4 | Author: Laurent de Soras, 2015 5 | 6 | Template parameters: 7 | - REMAINING: Number of remaining coefficients to process, >= 0 8 | 9 | --- Legal stuff --- 10 | 11 | This program is free software. It comes without any warranty, to 12 | the extent permitted by applicable law. You can redistribute it 13 | and/or modify it under the terms of the Do What The Fuck You Want 14 | To Public License, Version 2, as published by Sam Hocevar. See 15 | http://sam.zoy.org/wtfpl/COPYING for more details. 16 | 17 | *Tab=3***********************************************************************/ 18 | 19 | 20 | 21 | #if ! defined (hiir_StageProc4Sse_HEADER_INCLUDED) 22 | #define hiir_StageProc4Sse_HEADER_INCLUDED 23 | 24 | #if defined (_MSC_VER) 25 | #pragma once 26 | #pragma warning (4 : 4250) // "Inherits via dominance." 27 | #endif 28 | 29 | 30 | 31 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 32 | 33 | #include "hiir/def.h" 34 | 35 | #include 36 | 37 | 38 | 39 | namespace hiir 40 | { 41 | 42 | 43 | 44 | class StageDataSse; 45 | 46 | template 47 | class StageProc4Sse 48 | { 49 | 50 | static_assert ((REMAINING >= 0), "REMAINING must be >= 0"); 51 | 52 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | 54 | public: 55 | 56 | static hiir_FORCEINLINE void 57 | process_sample_pos (const int nbr_coefs, __m128 &spl_0, __m128 &spl_1, StageDataSse *stage_arr); 58 | static hiir_FORCEINLINE void 59 | process_sample_neg (const int nbr_coefs, __m128 &spl_0, __m128 &spl_1, StageDataSse *stage_arr); 60 | 61 | 62 | 63 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 64 | 65 | protected: 66 | 67 | 68 | 69 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 70 | 71 | private: 72 | 73 | 74 | 75 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 76 | 77 | private: 78 | 79 | StageProc4Sse () = delete; 80 | StageProc4Sse (const StageProc4Sse &other) = delete; 81 | StageProc4Sse (StageProc4Sse &&other) = delete; 82 | ~StageProc4Sse () = delete; 83 | StageProc4Sse & 84 | operator = (const StageProc4Sse &other) = delete; 85 | StageProc4Sse & 86 | operator = (StageProc4Sse &&other) = delete; 87 | bool operator == (const StageProc4Sse &other) = delete; 88 | bool operator != (const StageProc4Sse &other) = delete; 89 | 90 | }; // class StageProc4Sse 91 | 92 | 93 | 94 | } // namespace hiir 95 | 96 | 97 | 98 | #include "hiir/StageProc4Sse.hpp" 99 | 100 | 101 | 102 | #endif // hiir_StageProc4Sse_HEADER_INCLUDED 103 | 104 | 105 | 106 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 107 | -------------------------------------------------------------------------------- /StageProc4Neon.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProc4Neon.h 4 | Author: Laurent de Soras, 2016 5 | 6 | Template parameters: 7 | - REMAINING: Number of remaining coefficients to process, >= 0 8 | 9 | --- Legal stuff --- 10 | 11 | This program is free software. It comes without any warranty, to 12 | the extent permitted by applicable law. You can redistribute it 13 | and/or modify it under the terms of the Do What The Fuck You Want 14 | To Public License, Version 2, as published by Sam Hocevar. See 15 | http://sam.zoy.org/wtfpl/COPYING for more details. 16 | 17 | *Tab=3***********************************************************************/ 18 | 19 | 20 | 21 | #pragma once 22 | #if ! defined (hiir_StageProc4Neon_HEADER_INCLUDED) 23 | #define hiir_StageProc4Neon_HEADER_INCLUDED 24 | 25 | #if defined (_MSC_VER) 26 | #pragma warning (4 : 4250) 27 | #endif 28 | 29 | 30 | 31 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 32 | 33 | #include "hiir/def.h" 34 | 35 | 36 | 37 | namespace hiir 38 | { 39 | 40 | 41 | 42 | class StageDataNeonV4; 43 | 44 | template 45 | class StageProc4Neon 46 | { 47 | static_assert (REMAINING >= 0, "REMAINING must be >= 0."); 48 | 49 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 50 | 51 | public: 52 | 53 | static hiir_FORCEINLINE void 54 | process_sample_pos (const int nbr_coefs, float32x4_t &spl_0, float32x4_t &spl_1, StageDataNeonV4 *stage_arr); 55 | static hiir_FORCEINLINE void 56 | process_sample_neg (const int nbr_coefs, float32x4_t &spl_0, float32x4_t &spl_1, StageDataNeonV4 *stage_arr); 57 | 58 | 59 | 60 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 61 | 62 | protected: 63 | 64 | 65 | 66 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 67 | 68 | private: 69 | 70 | 71 | 72 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 73 | 74 | private: 75 | 76 | StageProc4Neon () = delete; 77 | StageProc4Neon (const StageProc4Neon &other) = delete; 78 | StageProc4Neon (StageProc4Neon &&other) = delete; 79 | ~StageProc4Neon () = delete; 80 | StageProc4Neon & 81 | operator = (const StageProc4Neon &other) = delete; 82 | StageProc4Neon & 83 | operator = (StageProc4Neon &&other) = delete; 84 | bool operator == (const StageProc4Neon &other) const = delete; 85 | bool operator != (const StageProc4Neon &other) const = delete; 86 | 87 | }; // class StageProc4Neon 88 | 89 | 90 | 91 | } // namespace hiir 92 | 93 | 94 | 95 | #include "hiir/StageProc4Neon.hpp" 96 | 97 | 98 | 99 | #endif // hiir_StageProc4Neon_HEADER_INCLUDED 100 | 101 | 102 | 103 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 104 | -------------------------------------------------------------------------------- /StageProc8Avx.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProc8Avx.h 4 | Port of StageProc4See.h from SSE to AVX by Dario Mambro 5 | StageProc4See.h by Laurent de Soras 6 | 7 | Template parameters: 8 | - REMAINING: Number of remaining coefficients to process, >= 0 9 | 10 | --- Legal stuff --- 11 | 12 | This program is free software. It comes without any warranty, to 13 | the extent permitted by applicable law. You can redistribute it 14 | and/or modify it under the terms of the Do What The Fuck You Want 15 | To Public License, Version 2, as published by Sam Hocevar. See 16 | http://sam.zoy.org/wtfpl/COPYING for more details. 17 | 18 | *Tab=3***********************************************************************/ 19 | 20 | 21 | 22 | #if ! defined (hiir_StageProc8Avx_HEADER_INCLUDED) 23 | #define hiir_StageProc8Avx_HEADER_INCLUDED 24 | 25 | #if defined (_MSC_VER) 26 | #pragma once 27 | #pragma warning (4 : 4250) // "Inherits via dominance." 28 | #endif 29 | 30 | 31 | 32 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | #include "hiir/def.h" 35 | 36 | //#include 37 | #include 38 | 39 | 40 | namespace hiir 41 | { 42 | 43 | 44 | 45 | class StageDataAvx; 46 | 47 | template 48 | class StageProc8Avx 49 | { 50 | 51 | static_assert ((REMAINING >= 0), "REMAINING must be >= 0"); 52 | 53 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 54 | 55 | public: 56 | 57 | static hiir_FORCEINLINE void 58 | process_sample_pos (const int nbr_coefs, __m256 &spl_0, __m256 &spl_1, StageDataAvx *stage_arr); 59 | static hiir_FORCEINLINE void 60 | process_sample_neg (const int nbr_coefs, __m256 &spl_0, __m256 &spl_1, StageDataAvx *stage_arr); 61 | 62 | 63 | 64 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 65 | 66 | protected: 67 | 68 | 69 | 70 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 71 | 72 | private: 73 | 74 | 75 | 76 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 77 | 78 | private: 79 | 80 | StageProc8Avx () = delete; 81 | StageProc8Avx (const StageProc8Avx &other) = delete; 82 | StageProc8Avx (StageProc8Avx &&other) = delete; 83 | ~StageProc8Avx () = delete; 84 | StageProc8Avx & 85 | operator = (const StageProc8Avx &other) = delete; 86 | StageProc8Avx & 87 | operator = (StageProc8Avx &&other) = delete; 88 | bool operator == (const StageProc8Avx &other) = delete; 89 | bool operator != (const StageProc8Avx &other) = delete; 90 | 91 | }; // class StageProc8Avx 92 | 93 | 94 | 95 | } // namespace hiir 96 | 97 | 98 | 99 | #include "hiir/StageProc8Avx.hpp" 100 | 101 | 102 | 103 | #endif // hiir_StageProc8Avx_HEADER_INCLUDED 104 | 105 | 106 | 107 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 108 | -------------------------------------------------------------------------------- /StageProcSseV4.hpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProcSseV4.hpp 4 | Author: Laurent de Soras, 2005 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if defined (hiir_StageProcSseV4_CURRENT_CODEHEADER) 19 | #error Recursive inclusion of StageProcSseV4 code header. 20 | #endif 21 | #define hiir_StageProcSseV4_CURRENT_CODEHEADER 22 | 23 | #if ! defined (hiir_StageProcSseV4_CODEHEADER_INCLUDED) 24 | #define hiir_StageProcSseV4_CODEHEADER_INCLUDED 25 | 26 | 27 | 28 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 29 | 30 | #include "hiir/StageDataSse.h" 31 | 32 | #if defined (_MSC_VER) 33 | #pragma inline_depth (255) 34 | #endif 35 | 36 | 37 | 38 | namespace hiir 39 | { 40 | 41 | 42 | 43 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 44 | 45 | 46 | 47 | template 48 | void StageProcSseV4 ::process_sample_pos (StageDataSse *stage_ptr, __m128 &y, __m128 &mem) 49 | { 50 | StageProcSseV4 ::process_sample_pos (stage_ptr, y, mem); 51 | 52 | const __m128 x = mem; 53 | _mm_store_ps (stage_ptr [PREV]._mem, y); 54 | 55 | mem = _mm_load_ps (stage_ptr [CUR]._mem); 56 | y = _mm_sub_ps (y, mem); 57 | const __m128 coef = _mm_load_ps (stage_ptr [CUR]._coef); 58 | y = _mm_mul_ps (y, coef); 59 | y = _mm_add_ps (y, x); 60 | } 61 | 62 | template <> 63 | hiir_FORCEINLINE void StageProcSseV4 <0>::process_sample_pos (StageDataSse * /* stage_ptr */, __m128 & /* y */, __m128 & /* mem */) 64 | { 65 | // Nothing, stops the recursion 66 | } 67 | 68 | 69 | 70 | template 71 | void StageProcSseV4 ::process_sample_neg (StageDataSse *stage_ptr, __m128 &y, __m128 &mem) 72 | { 73 | StageProcSseV4 ::process_sample_neg (stage_ptr, y, mem); 74 | 75 | const __m128 x = mem; 76 | _mm_store_ps (stage_ptr [PREV]._mem, y); 77 | 78 | mem = _mm_load_ps (stage_ptr [CUR]._mem); 79 | y = _mm_add_ps (y, mem); 80 | const __m128 coef = _mm_load_ps (stage_ptr [CUR]._coef); 81 | y = _mm_mul_ps (y, coef); 82 | y = _mm_sub_ps (y, x); 83 | } 84 | 85 | template <> 86 | hiir_FORCEINLINE void StageProcSseV4 <0>::process_sample_neg (StageDataSse * /* stage_ptr */, __m128 & /* y */, __m128 & /* mem */) 87 | { 88 | // Nothing, stops the recursion 89 | } 90 | 91 | 92 | 93 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 94 | 95 | 96 | 97 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 98 | 99 | 100 | 101 | } // namespace hiir 102 | 103 | 104 | 105 | #endif // hiir_StageProcSseV4_CODEHEADER_INCLUDED 106 | 107 | #undef hiir_StageProcSseV4_CURRENT_CODEHEADER 108 | 109 | 110 | 111 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 112 | -------------------------------------------------------------------------------- /StageProc16Avx512.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProc16Avx512.h 4 | Author: Laurent de Soras, 2020 5 | 6 | Template parameters: 7 | - REMAINING: Number of remaining coefficients to process, >= 0 8 | 9 | --- Legal stuff --- 10 | 11 | This program is free software. It comes without any warranty, to 12 | the extent permitted by applicable law. You can redistribute it 13 | and/or modify it under the terms of the Do What The Fuck You Want 14 | To Public License, Version 2, as published by Sam Hocevar. See 15 | http://sam.zoy.org/wtfpl/COPYING for more details. 16 | 17 | *Tab=3***********************************************************************/ 18 | 19 | 20 | 21 | #if ! defined (hiir_StageProc16Avx512_HEADER_INCLUDED) 22 | #define hiir_StageProc16Avx512_HEADER_INCLUDED 23 | 24 | #if defined (_MSC_VER) 25 | #pragma once 26 | #pragma warning (4 : 4250) // "Inherits via dominance." 27 | #endif 28 | 29 | 30 | 31 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 32 | 33 | #include "hiir/def.h" 34 | 35 | //#include 36 | #include 37 | 38 | 39 | namespace hiir 40 | { 41 | 42 | 43 | 44 | class StageDataAvx512; 45 | 46 | template 47 | class StageProc16Avx512 48 | { 49 | 50 | static_assert ((REMAINING >= 0), "REMAINING must be >= 0"); 51 | 52 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | 54 | public: 55 | 56 | static hiir_FORCEINLINE void 57 | process_sample_pos (const int nbr_coefs, __m512 &spl_0, __m512 &spl_1, StageDataAvx512 *stage_arr); 58 | static hiir_FORCEINLINE void 59 | process_sample_neg (const int nbr_coefs, __m512 &spl_0, __m512 &spl_1, StageDataAvx512 *stage_arr); 60 | 61 | 62 | 63 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 64 | 65 | protected: 66 | 67 | 68 | 69 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 70 | 71 | private: 72 | 73 | 74 | 75 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 76 | 77 | private: 78 | 79 | StageProc16Avx512 () = delete; 80 | StageProc16Avx512 (const StageProc16Avx512 &other) = delete; 81 | StageProc16Avx512 (StageProc16Avx512 &&other) = delete; 82 | ~StageProc16Avx512 () = delete; 83 | StageProc16Avx512 & 84 | operator = (const StageProc16Avx512 &other) = delete; 85 | StageProc16Avx512 & 86 | operator = (StageProc16Avx512 &&other) = delete; 87 | bool operator == (const StageProc16Avx512 &other) = delete; 88 | bool operator != (const StageProc16Avx512 &other) = delete; 89 | 90 | }; // class StageProc16Avx512 91 | 92 | 93 | 94 | } // namespace hiir 95 | 96 | 97 | 98 | #include "hiir/StageProc16Avx512.hpp" 99 | 100 | 101 | 102 | #endif // hiir_StageProc16Avx512_HEADER_INCLUDED 103 | 104 | 105 | 106 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 107 | -------------------------------------------------------------------------------- /StageProc8F64Avx512.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProc8F64Avx512.h 4 | Author: Laurent de Soras, 2020 5 | 6 | Template parameters: 7 | - REMAINING: Number of remaining coefficients to process, >= 0 8 | 9 | --- Legal stuff --- 10 | 11 | This program is free software. It comes without any warranty, to 12 | the extent permitted by applicable law. You can redistribute it 13 | and/or modify it under the terms of the Do What The Fuck You Want 14 | To Public License, Version 2, as published by Sam Hocevar. See 15 | http://sam.zoy.org/wtfpl/COPYING for more details. 16 | 17 | *Tab=3***********************************************************************/ 18 | 19 | 20 | 21 | #if ! defined (hiir_StageProc8F64Avx512_HEADER_INCLUDED) 22 | #define hiir_StageProc8F64Avx512_HEADER_INCLUDED 23 | 24 | #if defined (_MSC_VER) 25 | #pragma once 26 | #pragma warning (4 : 4250) // "Inherits via dominance." 27 | #endif 28 | 29 | 30 | 31 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 32 | 33 | #include "hiir/def.h" 34 | 35 | #include 36 | 37 | 38 | 39 | namespace hiir 40 | { 41 | 42 | 43 | 44 | class StageDataF64Avx512; 45 | 46 | template 47 | class StageProc8F64Avx512 48 | { 49 | 50 | static_assert ((REMAINING >= 0), "REMAINING must be >= 0"); 51 | 52 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | 54 | public: 55 | 56 | static hiir_FORCEINLINE void 57 | process_sample_pos (const int nbr_coefs, __m512d &spl_0, __m512d &spl_1, StageDataF64Avx512 *stage_arr); 58 | static hiir_FORCEINLINE void 59 | process_sample_neg (const int nbr_coefs, __m512d &spl_0, __m512d &spl_1, StageDataF64Avx512 *stage_arr); 60 | 61 | 62 | 63 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 64 | 65 | protected: 66 | 67 | 68 | 69 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 70 | 71 | private: 72 | 73 | 74 | 75 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 76 | 77 | private: 78 | 79 | StageProc8F64Avx512 () = delete; 80 | StageProc8F64Avx512 (const StageProc8F64Avx512 &other) = delete; 81 | StageProc8F64Avx512 (StageProc8F64Avx512 &&other) = delete; 82 | ~StageProc8F64Avx512 () = delete; 83 | StageProc8F64Avx512 & 84 | operator = (const StageProc8F64Avx512 &other) = delete; 85 | StageProc8F64Avx512 & 86 | operator = (StageProc8F64Avx512 &&other) = delete; 87 | bool operator == (const StageProc8F64Avx512 &other) = delete; 88 | bool operator != (const StageProc8F64Avx512 &other) = delete; 89 | 90 | }; // class StageProc8F64Avx512 91 | 92 | 93 | 94 | } // namespace hiir 95 | 96 | 97 | 98 | #include "hiir/StageProc8F64Avx512.hpp" 99 | 100 | 101 | 102 | #endif // hiir_StageProc8F64Avx512_HEADER_INCLUDED 103 | 104 | 105 | 106 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 107 | -------------------------------------------------------------------------------- /StageProc2F64Neon.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProc2F64Neon.h 4 | Author: Laurent de Soras, 2016 5 | Ported to double by Dario Mambro, 2020 6 | 7 | Template parameters: 8 | - REMAINING: Number of remaining coefficients to process, >= 0 9 | 10 | --- Legal stuff --- 11 | 12 | This program is free software. It comes without any warranty, to 13 | the extent permitted by applicable law. You can redistribute it 14 | and/or modify it under the terms of the Do What The Fuck You Want 15 | To Public License, Version 2, as published by Sam Hocevar. See 16 | http://sam.zoy.org/wtfpl/COPYING for more details. 17 | 18 | *Tab=3***********************************************************************/ 19 | 20 | 21 | 22 | #pragma once 23 | #if ! defined (hiir_StageProc2F64Neon_HEADER_INCLUDED) 24 | #define hiir_StageProc2F64Neon_HEADER_INCLUDED 25 | 26 | #if defined (_MSC_VER) 27 | #pragma warning (4 : 4250) 28 | #endif 29 | 30 | 31 | 32 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | #include "hiir/def.h" 35 | 36 | 37 | 38 | namespace hiir 39 | { 40 | 41 | 42 | 43 | class StageDataNeonV4; 44 | 45 | template 46 | class StageProc2F64Neon 47 | { 48 | static_assert (REMAINING >= 0, "REMAINING must be >= 0."); 49 | 50 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 51 | 52 | public: 53 | 54 | static hiir_FORCEINLINE void 55 | process_sample_pos (const int nbr_coefs, float64x2_t &spl_0, float64x2_t &spl_1, StageDataNeonV2F64 *stage_arr); 56 | static hiir_FORCEINLINE void 57 | process_sample_neg (const int nbr_coefs, float64x2_t &spl_0, float64x2_t &spl_1, StageDataNeonV2F64 *stage_arr); 58 | 59 | 60 | 61 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 62 | 63 | protected: 64 | 65 | 66 | 67 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 68 | 69 | private: 70 | 71 | 72 | 73 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 74 | 75 | private: 76 | 77 | StageProc2F64Neon () = delete; 78 | StageProc2F64Neon (const StageProc2F64Neon &other) = delete; 79 | StageProc2F64Neon (StageProc2F64Neon &&other) = delete; 80 | ~StageProc2F64Neon () = delete; 81 | StageProc2F64Neon & 82 | operator = (const StageProc2F64Neon &other) = delete; 83 | StageProc2F64Neon & 84 | operator = (StageProc2F64Neon &&other) = delete; 85 | bool operator == (const StageProc2F64Neon &other) const = delete; 86 | bool operator != (const StageProc2F64Neon &other) const = delete; 87 | 88 | }; // class StageProc2F64Neon 89 | 90 | 91 | 92 | } // namespace hiir 93 | 94 | 95 | 96 | #include "hiir/StageProc2F64Neon.hpp" 97 | 98 | 99 | 100 | #endif // hiir_StageProc2F64Neon_HEADER_INCLUDED 101 | 102 | 103 | 104 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 105 | -------------------------------------------------------------------------------- /StageProcNeonV2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProcNeonV2.h 4 | Author: Laurent de Soras, 2016 5 | 6 | Template parameters: 7 | - CUR: index of the coefficient coefficient to process, >= 0 8 | 9 | --- Legal stuff --- 10 | 11 | This program is free software. It comes without any warranty, to 12 | the extent permitted by applicable law. You can redistribute it 13 | and/or modify it under the terms of the Do What The Fuck You Want 14 | To Public License, Version 2, as published by Sam Hocevar. See 15 | http://sam.zoy.org/wtfpl/COPYING for more details. 16 | 17 | *Tab=3***********************************************************************/ 18 | 19 | 20 | 21 | #pragma once 22 | #if ! defined (hiir_StageProcNeonV2_HEADER_INCLUDED) 23 | #define hiir_StageProcNeonV2_HEADER_INCLUDED 24 | 25 | #if defined (_MSC_VER) 26 | #pragma warning (4 : 4250) 27 | #endif 28 | 29 | 30 | 31 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 32 | 33 | #include "hiir/def.h" 34 | 35 | 36 | 37 | namespace hiir 38 | { 39 | 40 | 41 | 42 | class StageDataNeonV2; 43 | 44 | template 45 | class StageProcNeonV2 46 | { 47 | 48 | static_assert ((CUR >= 0), "CUR must be >= 0"); 49 | 50 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 51 | 52 | public: 53 | 54 | static hiir_FORCEINLINE void 55 | process_sample_pos (float32x2_t &x, StageDataNeonV2 *stage_ptr); 56 | static hiir_FORCEINLINE void 57 | process_sample_neg (float32x2_t &x, StageDataNeonV2 *stage_ptr); 58 | 59 | static hiir_FORCEINLINE void 60 | process_sample_pos_rec (float32x2_t &x, StageDataNeonV2 *stage_ptr); 61 | static hiir_FORCEINLINE void 62 | process_sample_neg_rec (float32x2_t &x, StageDataNeonV2 *stage_ptr); 63 | 64 | 65 | 66 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 67 | 68 | protected: 69 | 70 | 71 | 72 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 73 | 74 | private: 75 | 76 | 77 | 78 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 79 | 80 | private: 81 | 82 | StageProcNeonV2 () = delete; 83 | StageProcNeonV2 (const StageProcNeonV2 &other) = delete; 84 | StageProcNeonV2 (StageProcNeonV2 &&other) = delete; 85 | ~StageProcNeonV2 () = delete; 86 | StageProcNeonV2 & 87 | operator = (const StageProcNeonV2 &other) = delete; 88 | StageProcNeonV2 & 89 | operator = (StageProcNeonV2 &&other) = delete; 90 | bool operator == (const StageProcNeonV2 &other) const = delete; 91 | bool operator != (const StageProcNeonV2 &other) const = delete; 92 | 93 | }; // class StageProcNeonV2 94 | 95 | 96 | 97 | } // namespace hiir 98 | 99 | 100 | 101 | #include "hiir/StageProcNeonV2.hpp" 102 | 103 | 104 | 105 | #endif // hiir_StageProcNeonV2_HEADER_INCLUDED 106 | 107 | 108 | 109 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 110 | -------------------------------------------------------------------------------- /StageProc4F64Avx.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProc4F64Avx.h 4 | Port of StageProc4Sse.h from float to double by Dario Mambro 5 | StageProc4Sse.h Laurent de Soras, 2015 6 | 7 | Template parameters: 8 | - REMAINING: Number of remaining coefficients to process, >= 0 9 | 10 | --- Legal stuff --- 11 | 12 | This program is free software. It comes without any warranty, to 13 | the extent permitted by applicable law. You can redistribute it 14 | and/or modify it under the terms of the Do What The Fuck You Want 15 | To Public License, Version 2, as published by Sam Hocevar. See 16 | http://sam.zoy.org/wtfpl/COPYING for more details. 17 | 18 | *Tab=3***********************************************************************/ 19 | 20 | 21 | 22 | #if ! defined (hiir_StageProc4F64Avx_HEADER_INCLUDED) 23 | #define hiir_StageProc4F64Avx_HEADER_INCLUDED 24 | 25 | #if defined (_MSC_VER) 26 | #pragma once 27 | #pragma warning (4 : 4250) // "Inherits via dominance." 28 | #endif 29 | 30 | 31 | 32 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | #include "hiir/def.h" 35 | 36 | #include 37 | 38 | 39 | 40 | namespace hiir 41 | { 42 | 43 | 44 | 45 | class StageDataF64Avx; 46 | 47 | template 48 | class StageProc4F64Avx 49 | { 50 | 51 | static_assert ((REMAINING >= 0), "REMAINING must be >= 0"); 52 | 53 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 54 | 55 | public: 56 | 57 | static hiir_FORCEINLINE void 58 | process_sample_pos (const int nbr_coefs, __m256d &spl_0, __m256d &spl_1, StageDataF64Avx *stage_arr); 59 | static hiir_FORCEINLINE void 60 | process_sample_neg (const int nbr_coefs, __m256d &spl_0, __m256d &spl_1, StageDataF64Avx *stage_arr); 61 | 62 | 63 | 64 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 65 | 66 | protected: 67 | 68 | 69 | 70 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 71 | 72 | private: 73 | 74 | 75 | 76 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 77 | 78 | private: 79 | 80 | StageProc4F64Avx () = delete; 81 | StageProc4F64Avx (const StageProc4F64Avx &other) = delete; 82 | StageProc4F64Avx (StageProc4F64Avx &&other) = delete; 83 | ~StageProc4F64Avx () = delete; 84 | StageProc4F64Avx & 85 | operator = (const StageProc4F64Avx &other) = delete; 86 | StageProc4F64Avx & 87 | operator = (StageProc4F64Avx &&other) = delete; 88 | bool operator == (const StageProc4F64Avx &other) = delete; 89 | bool operator != (const StageProc4F64Avx &other) = delete; 90 | 91 | }; // class StageProc4F64Avx 92 | 93 | 94 | 95 | } // namespace hiir 96 | 97 | 98 | 99 | #include "hiir/StageProc4F64Avx.hpp" 100 | 101 | 102 | 103 | #endif // hiir_StageProc4F64Avx_HEADER_INCLUDED 104 | 105 | 106 | 107 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 108 | -------------------------------------------------------------------------------- /StageProcSseV2.hpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProcSseV2.hpp 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if ! defined (hiir_StageProcSseV2_CODEHEADER_INCLUDED) 19 | #define hiir_StageProcSseV2_CODEHEADER_INCLUDED 20 | 21 | 22 | 23 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 24 | 25 | 26 | 27 | namespace hiir 28 | { 29 | 30 | 31 | 32 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | 35 | 36 | template 37 | void StageProcSseV2 ::process_sample_pos (__m128 &x, StageDataSse *stage_arr) 38 | { 39 | StageProcSseV2 ::process_sample_pos_rec (x, stage_arr); 40 | _mm_store_ps (stage_arr [CUR]._mem, x); 41 | } 42 | 43 | 44 | 45 | template 46 | void StageProcSseV2 ::process_sample_neg (__m128 &x, StageDataSse *stage_arr) 47 | { 48 | StageProcSseV2 ::process_sample_neg_rec (x, stage_arr); 49 | _mm_store_ps (stage_arr [CUR]._mem, x); 50 | } 51 | 52 | 53 | 54 | template 55 | void StageProcSseV2 ::process_sample_pos_rec (__m128 &x, StageDataSse *stage_arr) 56 | { 57 | StageProcSseV2 ::process_sample_pos_rec (x, stage_arr); 58 | 59 | const auto tmp = _mm_load_ps (stage_arr [CUR - 1]._mem); 60 | _mm_store_ps (stage_arr [CUR - 1]._mem, x); 61 | 62 | x = _mm_sub_ps (x, _mm_load_ps (stage_arr [CUR]._mem )); 63 | x = _mm_mul_ps (x, _mm_load_ps (stage_arr [CUR]._coef)); 64 | x = _mm_add_ps (x, tmp); 65 | } 66 | 67 | template <> 68 | hiir_FORCEINLINE void StageProcSseV2 <0>::process_sample_pos_rec (__m128 & /* x */, StageDataSse * /* stage_arr */) 69 | { 70 | // Nothing, stops the recursion 71 | } 72 | 73 | 74 | 75 | template 76 | void StageProcSseV2 ::process_sample_neg_rec (__m128 &x, StageDataSse *stage_arr) 77 | { 78 | StageProcSseV2 ::process_sample_neg_rec (x, stage_arr); 79 | 80 | const auto tmp = _mm_load_ps (stage_arr [CUR - 1]._mem); 81 | _mm_store_ps (stage_arr [CUR - 1]._mem, x); 82 | 83 | x = _mm_add_ps (x, _mm_load_ps (stage_arr [CUR]._mem )); 84 | x = _mm_mul_ps (x, _mm_load_ps (stage_arr [CUR]._coef)); 85 | x = _mm_sub_ps (x, tmp); 86 | } 87 | 88 | template <> 89 | hiir_FORCEINLINE void StageProcSseV2 <0>::process_sample_neg_rec (__m128 & /* x */, StageDataSse * /* stage_arr */) 90 | { 91 | // Nothing, stops the recursion 92 | } 93 | 94 | 95 | 96 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 97 | 98 | 99 | 100 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 101 | 102 | 103 | 104 | } // namespace hiir 105 | 106 | 107 | 108 | #endif // hiir_StageProcSseV2_CODEHEADER_INCLUDED 109 | 110 | 111 | 112 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 113 | -------------------------------------------------------------------------------- /StageProc2F64Sse2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProc2F64Sse2.h 4 | Port of StageProc4Sse.h from float to double by Dario Mambro 5 | StageProc4Sse.h Laurent de Soras, 2015 6 | 7 | Template parameters: 8 | - REMAINING: Number of remaining coefficients to process, >= 0 9 | 10 | --- Legal stuff --- 11 | 12 | This program is free software. It comes without any warranty, to 13 | the extent permitted by applicable law. You can redistribute it 14 | and/or modify it under the terms of the Do What The Fuck You Want 15 | To Public License, Version 2, as published by Sam Hocevar. See 16 | http://sam.zoy.org/wtfpl/COPYING for more details. 17 | 18 | *Tab=3***********************************************************************/ 19 | 20 | 21 | 22 | #if ! defined (hiir_StageProc2F64Sse2_HEADER_INCLUDED) 23 | #define hiir_StageProc2F64Sse2_HEADER_INCLUDED 24 | 25 | #if defined (_MSC_VER) 26 | #pragma once 27 | #pragma warning (4 : 4250) // "Inherits via dominance." 28 | #endif 29 | 30 | 31 | 32 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | #include "hiir/def.h" 35 | 36 | #include 37 | 38 | 39 | 40 | namespace hiir 41 | { 42 | 43 | 44 | 45 | class StageDataF64Sse2; 46 | 47 | template 48 | class StageProc2F64Sse2 49 | { 50 | 51 | static_assert ((REMAINING >= 0), "REMAINING must be >= 0"); 52 | 53 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 54 | 55 | public: 56 | 57 | static hiir_FORCEINLINE void 58 | process_sample_pos (const int nbr_coefs, __m128d &spl_0, __m128d &spl_1, StageDataF64Sse2 *stage_arr); 59 | static hiir_FORCEINLINE void 60 | process_sample_neg (const int nbr_coefs, __m128d &spl_0, __m128d &spl_1, StageDataF64Sse2 *stage_arr); 61 | 62 | 63 | 64 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 65 | 66 | protected: 67 | 68 | 69 | 70 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 71 | 72 | private: 73 | 74 | 75 | 76 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 77 | 78 | private: 79 | 80 | StageProc2F64Sse2 () = delete; 81 | StageProc2F64Sse2 (const StageProc2F64Sse2 &other) = delete; 82 | StageProc2F64Sse2 (StageProc2F64Sse2 &&other) = delete; 83 | ~StageProc2F64Sse2 () = delete; 84 | StageProc2F64Sse2 & 85 | operator = (const StageProc2F64Sse2 &other) = delete; 86 | StageProc2F64Sse2 & 87 | operator = (StageProc2F64Sse2 &&other) = delete; 88 | bool operator == (const StageProc2F64Sse2 &other) = delete; 89 | bool operator != (const StageProc2F64Sse2 &other) = delete; 90 | 91 | }; // class StageProc2F64Sse2 92 | 93 | 94 | 95 | } // namespace hiir 96 | 97 | 98 | 99 | #include "hiir/StageProc2F64Sse2.hpp" 100 | 101 | 102 | 103 | #endif // hiir_StageProc2F64Sse2_HEADER_INCLUDED 104 | 105 | 106 | 107 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 108 | -------------------------------------------------------------------------------- /StageProcNeonV2.hpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProcNeonV2.hpp 4 | Author: Laurent de Soras, 2016 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if ! defined (hiir_StageProcNeonV2_CODEHEADER_INCLUDED) 19 | #define hiir_StageProcNeonV2_CODEHEADER_INCLUDED 20 | 21 | 22 | 23 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 24 | 25 | #include "hiir/fnc_neon.h" 26 | #include "hiir/StageDataNeonV2.h" 27 | 28 | 29 | 30 | namespace hiir 31 | { 32 | 33 | 34 | 35 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 36 | 37 | 38 | 39 | template 40 | void StageProcNeonV2 ::process_sample_pos (float32x2_t &x, StageDataNeonV2 *stage_ptr) 41 | { 42 | StageProcNeonV2 ::process_sample_pos_rec (x, stage_ptr); 43 | storea (stage_ptr [CUR]._mem, x); 44 | } 45 | 46 | 47 | 48 | template 49 | void StageProcNeonV2 ::process_sample_neg (float32x2_t &x, StageDataNeonV2 *stage_ptr) 50 | { 51 | StageProcNeonV2 ::process_sample_neg_rec (x, stage_ptr); 52 | storea (stage_ptr [CUR]._mem, x); 53 | } 54 | 55 | 56 | 57 | template 58 | void StageProcNeonV2 ::process_sample_pos_rec (float32x2_t &x, StageDataNeonV2 *stage_ptr) 59 | { 60 | StageProcNeonV2 ::process_sample_pos_rec (x, stage_ptr); 61 | 62 | const float32x2_t tmp = load2a (stage_ptr [CUR - 1]._mem); 63 | storea (stage_ptr [CUR - 1]._mem, x); 64 | 65 | x = vmla_f32 ( 66 | tmp, 67 | x - load2a (stage_ptr [CUR]._mem), 68 | load2a (stage_ptr [CUR]._coef) 69 | ); 70 | } 71 | 72 | template <> 73 | hiir_FORCEINLINE void StageProcNeonV2 <0>::process_sample_pos_rec (float32x2_t &/* x */, StageDataNeonV2 * /* stage_ptr */) 74 | { 75 | // Nothing, stops the recursion 76 | } 77 | 78 | 79 | 80 | template 81 | void StageProcNeonV2 ::process_sample_neg_rec (float32x2_t &x, StageDataNeonV2 *stage_ptr) 82 | { 83 | StageProcNeonV2 ::process_sample_neg_rec (x, stage_ptr); 84 | 85 | const float32x2_t tmp = load2a (stage_ptr [CUR - 1]._mem); 86 | storea (stage_ptr [CUR - 1]._mem, x); 87 | 88 | x += load2a (stage_ptr [CUR]._mem ); 89 | x *= load2a (stage_ptr [CUR]._coef); 90 | x -= tmp; 91 | } 92 | 93 | template <> 94 | hiir_FORCEINLINE void StageProcNeonV2 <0>::process_sample_neg_rec (float32x2_t &/* x */, StageDataNeonV2 * /* stage_ptr */) 95 | { 96 | // Nothing, stops the recursion 97 | } 98 | 99 | 100 | 101 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 102 | 103 | 104 | 105 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 106 | 107 | 108 | 109 | } // namespace hiir 110 | 111 | 112 | 113 | #endif // hiir_StageProcNeonV2_CODEHEADER_INCLUDED 114 | 115 | 116 | 117 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 118 | -------------------------------------------------------------------------------- /StageProcF64Sse2.hpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProcF64Sse2.hpp 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if ! defined (hiir_StageProcF64Sse2_CODEHEADER_INCLUDED) 19 | #define hiir_StageProcF64Sse2_CODEHEADER_INCLUDED 20 | 21 | 22 | 23 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 24 | 25 | 26 | 27 | namespace hiir 28 | { 29 | 30 | 31 | 32 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | 35 | 36 | template 37 | void StageProcF64Sse2 ::process_sample_pos (__m128d &x, StageDataF64Sse2 *stage_arr) 38 | { 39 | StageProcF64Sse2 ::process_sample_pos_rec (x, stage_arr); 40 | _mm_store_pd (stage_arr [CUR]._mem, x); 41 | } 42 | 43 | 44 | 45 | template 46 | void StageProcF64Sse2 ::process_sample_neg (__m128d &x, StageDataF64Sse2 *stage_arr) 47 | { 48 | StageProcF64Sse2 ::process_sample_neg_rec (x, stage_arr); 49 | _mm_store_pd (stage_arr [CUR]._mem, x); 50 | } 51 | 52 | 53 | 54 | template 55 | void StageProcF64Sse2 ::process_sample_pos_rec (__m128d &x, StageDataF64Sse2 *stage_arr) 56 | { 57 | StageProcF64Sse2 ::process_sample_pos_rec (x, stage_arr); 58 | 59 | const auto tmp = _mm_load_pd (stage_arr [CUR - 1]._mem); 60 | _mm_store_pd (stage_arr [CUR - 1]._mem, x); 61 | 62 | x = _mm_sub_pd (x, _mm_load_pd (stage_arr [CUR]._mem )); 63 | x = _mm_mul_pd (x, _mm_load_pd (stage_arr [CUR]._coef)); 64 | x = _mm_add_pd (x, tmp); 65 | } 66 | 67 | template <> 68 | void StageProcF64Sse2 <0>::process_sample_pos_rec (__m128d & /*x*/, StageDataF64Sse2 * /*stage_arr*/) 69 | { 70 | // Nothing, stops the recursion 71 | } 72 | 73 | 74 | 75 | template 76 | void StageProcF64Sse2 ::process_sample_neg_rec (__m128d &x, StageDataF64Sse2 *stage_arr) 77 | { 78 | StageProcF64Sse2 ::process_sample_neg_rec (x, stage_arr); 79 | 80 | const auto tmp = _mm_load_pd (stage_arr [CUR - 1]._mem); 81 | _mm_store_pd (stage_arr [CUR - 1]._mem, x); 82 | 83 | x = _mm_add_pd (x, _mm_load_pd (stage_arr [CUR]._mem )); 84 | x = _mm_mul_pd (x, _mm_load_pd (stage_arr [CUR]._coef)); 85 | x = _mm_sub_pd (x, tmp); 86 | } 87 | 88 | template <> 89 | void StageProcF64Sse2 <0>::process_sample_neg_rec (__m128d & /*x*/, StageDataF64Sse2 * /*stage_arr*/) 90 | { 91 | // Nothing, stops the recursion 92 | } 93 | 94 | 95 | 96 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 97 | 98 | 99 | 100 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 101 | 102 | 103 | 104 | } // namespace hiir 105 | 106 | 107 | 108 | #endif // hiir_StageProcF64Sse2_CODEHEADER_INCLUDED 109 | 110 | 111 | 112 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 113 | -------------------------------------------------------------------------------- /StageProcNeonV4.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProcNeonV4.h 4 | Author: Laurent de Soras, 2016 5 | 6 | Template parameters: 7 | - CUR: index of the coefficient coefficient to process, >= 0 8 | 9 | --- Legal stuff --- 10 | 11 | This program is free software. It comes without any warranty, to 12 | the extent permitted by applicable law. You can redistribute it 13 | and/or modify it under the terms of the Do What The Fuck You Want 14 | To Public License, Version 2, as published by Sam Hocevar. See 15 | http://sam.zoy.org/wtfpl/COPYING for more details. 16 | 17 | *Tab=3***********************************************************************/ 18 | 19 | 20 | 21 | #pragma once 22 | #if ! defined (hiir_StageProcNeonV4_HEADER_INCLUDED) 23 | #define hiir_StageProcNeonV4_HEADER_INCLUDED 24 | 25 | #if defined (_MSC_VER) 26 | #pragma warning (4 : 4250) 27 | #endif 28 | 29 | 30 | 31 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 32 | 33 | #include "hiir/def.h" 34 | 35 | 36 | 37 | namespace hiir 38 | { 39 | 40 | 41 | 42 | class StageDataNeonV4; 43 | 44 | template 45 | class StageProcNeonV4 46 | { 47 | 48 | static_assert ((CUR >= 0), "CUR must be >= 0"); 49 | 50 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 51 | 52 | public: 53 | 54 | static hiir_FORCEINLINE void 55 | process_sample_pos (StageDataNeonV4 *stage_ptr, float32x4_t &y, float32x4_t &mem); 56 | static hiir_FORCEINLINE void 57 | process_sample_neg (StageDataNeonV4 *stage_ptr, float32x4_t &y, float32x4_t &mem); 58 | 59 | static hiir_FORCEINLINE void 60 | process_sample_pos_rec (StageDataNeonV4 *stage_ptr, float32x4_t &y, float32x4_t &mem); 61 | static hiir_FORCEINLINE void 62 | process_sample_neg_rec (StageDataNeonV4 *stage_ptr, float32x4_t &y, float32x4_t &mem); 63 | 64 | 65 | 66 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 67 | 68 | protected: 69 | 70 | 71 | 72 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 73 | 74 | private: 75 | 76 | 77 | 78 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 79 | 80 | private: 81 | 82 | StageProcNeonV4 () = delete; 83 | StageProcNeonV4 (const StageProcNeonV4 &other) = delete; 84 | StageProcNeonV4 (StageProcNeonV4 &&other) = delete; 85 | ~StageProcNeonV4 () = delete; 86 | StageProcNeonV4 & 87 | operator = (const StageProcNeonV4 &other) = delete; 88 | StageProcNeonV4 & 89 | operator = (StageProcNeonV4 &&other) = delete; 90 | bool operator == (const StageProcNeonV4 &other) const = delete; 91 | bool operator != (const StageProcNeonV4 &other) const = delete; 92 | 93 | }; // class StageProcNeonV4 94 | 95 | 96 | 97 | } // namespace hiir 98 | 99 | 100 | 101 | #include "hiir/StageProcNeonV4.hpp" 102 | 103 | 104 | 105 | #endif // hiir_StageProcNeonV4_HEADER_INCLUDED 106 | 107 | 108 | 109 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 110 | -------------------------------------------------------------------------------- /StageProcNeonV4.hpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProcNeonV4.hpp 4 | Author: Laurent de Soras, 2016 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if ! defined (hiir_StageProcNeonV4_CODEHEADER_INCLUDED) 19 | #define hiir_StageProcNeonV4_CODEHEADER_INCLUDED 20 | 21 | 22 | 23 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 24 | 25 | #include "hiir/fnc_neon.h" 26 | #include "hiir/StageDataNeonV4.h" 27 | 28 | 29 | 30 | namespace hiir 31 | { 32 | 33 | 34 | 35 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 36 | 37 | 38 | 39 | template 40 | void StageProcNeonV4 ::process_sample_pos (StageDataNeonV4 *stage_ptr, float32x4_t &y, float32x4_t &mem) 41 | { 42 | StageProcNeonV4 ::process_sample_pos_rec (stage_ptr, y, mem); 43 | storea (stage_ptr [CUR]._mem, y); 44 | } 45 | 46 | 47 | 48 | template 49 | void StageProcNeonV4 ::process_sample_neg (StageDataNeonV4 *stage_ptr, float32x4_t &y, float32x4_t &mem) 50 | { 51 | StageProcNeonV4 ::process_sample_neg_rec (stage_ptr, y, mem); 52 | storea (stage_ptr [CUR]._mem, y); 53 | } 54 | 55 | 56 | 57 | template 58 | void StageProcNeonV4 ::process_sample_pos_rec (StageDataNeonV4 *stage_ptr, float32x4_t &y, float32x4_t &mem) 59 | { 60 | StageProcNeonV4 ::process_sample_pos_rec (stage_ptr, y, mem); 61 | 62 | const float32x4_t x = mem; 63 | storea (stage_ptr [CUR - 1]._mem, y); 64 | 65 | mem = load4a (stage_ptr [CUR]._mem); 66 | y = vmlaq_f32 (x, y - mem, load4a (stage_ptr [CUR]._coef)); 67 | } 68 | 69 | template <> 70 | inline void StageProcNeonV4 <0>::process_sample_pos_rec (StageDataNeonV4 * /* stage_ptr */, float32x4_t & /* y */, float32x4_t & /* mem */) 71 | { 72 | // Nothing, stops the recursion 73 | } 74 | 75 | 76 | 77 | template 78 | void StageProcNeonV4 ::process_sample_neg_rec (StageDataNeonV4 *stage_ptr, float32x4_t &y, float32x4_t &mem) 79 | { 80 | StageProcNeonV4 ::process_sample_neg_rec (stage_ptr, y, mem); 81 | 82 | const float32x4_t x = mem; 83 | storea (stage_ptr [CUR - 1]._mem, y); 84 | 85 | mem = load4a (stage_ptr [CUR]._mem); 86 | y += mem; 87 | y *= load4a (stage_ptr [CUR]._coef); 88 | y -= x; 89 | } 90 | 91 | template <> 92 | inline void StageProcNeonV4 <0>::process_sample_neg_rec (StageDataNeonV4 * /* stage_ptr */, float32x4_t & /* y */, float32x4_t & /* mem */) 93 | { 94 | // Nothing, stops the recursion 95 | } 96 | 97 | 98 | 99 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 100 | 101 | 102 | 103 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 104 | 105 | 106 | 107 | } // namespace hiir 108 | 109 | 110 | 111 | #endif // hiir_StageProcNeonV4_CODEHEADER_INCLUDED 112 | 113 | 114 | 115 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 116 | -------------------------------------------------------------------------------- /Upsampler2xF64Sse2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2xF64Sse2.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_Upsampler2xF64Sse2_HEADER_INCLUDED) 20 | #define hiir_Upsampler2xF64Sse2_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/def.h" 27 | #include "hiir/StageDataF64Sse2.h" 28 | 29 | #include 30 | 31 | #include 32 | 33 | 34 | 35 | namespace hiir 36 | { 37 | 38 | 39 | 40 | template 41 | class Upsampler2xF64Sse2 42 | { 43 | 44 | static_assert ((NC > 0), "Number of coefficient must be positive."); 45 | 46 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 47 | 48 | public: 49 | 50 | typedef double DataType; 51 | static const int _nbr_chn = 1; 52 | 53 | enum { NBR_COEFS = NC }; 54 | 55 | Upsampler2xF64Sse2 (); 56 | Upsampler2xF64Sse2 (const Upsampler2xF64Sse2 &other) = default; 57 | Upsampler2xF64Sse2 (Upsampler2xF64Sse2 &&other) = default; 58 | 59 | Upsampler2xF64Sse2 & 60 | operator = (const Upsampler2xF64Sse2 &other) = default; 61 | Upsampler2xF64Sse2 & 62 | operator = (Upsampler2xF64Sse2 &&other) = default; 63 | 64 | void set_coefs (const double coef_arr []); 65 | 66 | hiir_FORCEINLINE void 67 | process_sample (double &out_0, double &out_1, double input); 68 | void process_block (double out_ptr [], const double in_ptr [], long nbr_spl); 69 | 70 | void clear_buffers (); 71 | 72 | 73 | 74 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 75 | 76 | protected: 77 | 78 | 79 | 80 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 81 | 82 | private: 83 | 84 | static const int _stage_width = 2; 85 | static const int _nbr_stages = (NBR_COEFS + _stage_width - 1) / _stage_width; 86 | 87 | // Stage 0 contains only input memory 88 | typedef std::array Filter; 89 | 90 | Filter _filter; // Should be the first member (thus easier to align) 91 | 92 | 93 | 94 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 95 | 96 | private: 97 | 98 | bool operator == (const Upsampler2xF64Sse2 &other) const = delete; 99 | bool operator != (const Upsampler2xF64Sse2 &other) const = delete; 100 | 101 | }; // class Upsampler2xF64Sse2 102 | 103 | 104 | 105 | } // namespace hiir 106 | 107 | 108 | 109 | #include "hiir/Upsampler2xF64Sse2.hpp" 110 | 111 | 112 | 113 | #endif // hiir_Upsampler2xF64Sse2_HEADER_INCLUDED 114 | 115 | 116 | 117 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 118 | -------------------------------------------------------------------------------- /StageProc3dnow.hpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | StageProc3dnow.hpp 4 | Author: Laurent de Soras, 2015 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://sam.zoy.org/wtfpl/COPYING for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #if defined (hiir_StageProc3dnow_CURRENT_CODEHEADER) 19 | #error Recursive inclusion of StageProc3dnow code header. 20 | #endif 21 | #define hiir_StageProc3dnow_CURRENT_CODEHEADER 22 | 23 | #if ! defined (hiir_StageProc3dnow_CODEHEADER_INCLUDED) 24 | #define hiir_StageProc3dnow_CODEHEADER_INCLUDED 25 | 26 | 27 | 28 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 29 | 30 | #include "hiir/StageData3dnow.h" 31 | 32 | #if defined (_MSC_VER) 33 | #pragma inline_depth (255) 34 | #endif 35 | 36 | 37 | 38 | namespace hiir 39 | { 40 | 41 | 42 | 43 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 44 | 45 | 46 | 47 | #if defined (_MSC_VER) 48 | #pragma warning (push) 49 | #pragma warning (4 : 4799) 50 | #endif 51 | 52 | 53 | 54 | template 55 | void StageProc3dnow ::process_sample_pos () 56 | { 57 | StageProc3dnow ::process_sample_pos (); 58 | 59 | enum { PREV_CELL = (CUR - 1) * sizeof (StageData3dnow) }; 60 | enum { CURR_CELL = CUR * sizeof (StageData3dnow) }; 61 | 62 | __asm 63 | { 64 | movq mm1, [edx + PREV_CELL + 1*8] 65 | movq [edx + PREV_CELL + 1*8], mm0 66 | 67 | pfsub mm0, [edx + CURR_CELL + 1*8] 68 | pfmul mm0, [edx + CURR_CELL + 0*8] 69 | pfadd mm0, mm1 70 | } 71 | } 72 | 73 | template <> 74 | hiir_FORCEINLINE void StageProc3dnow <0>::process_sample_pos () 75 | { 76 | // Nothing, stops the recursion 77 | } 78 | 79 | 80 | 81 | template 82 | void StageProc3dnow ::process_sample_neg () 83 | { 84 | StageProc3dnow ::process_sample_neg (); 85 | 86 | enum { PREV_CELL = (CUR - 1) * sizeof (StageData3dnow) }; 87 | enum { CURR_CELL = CUR * sizeof (StageData3dnow) }; 88 | 89 | __asm 90 | { 91 | movq mm1, [edx + PREV_CELL + 1*8] 92 | movq [edx + PREV_CELL + 1*8], mm0 93 | 94 | pfadd mm0, [edx + CURR_CELL + 1*8] 95 | pfmul mm0, [edx + CURR_CELL + 0*8] 96 | pfsub mm0, mm1 97 | } 98 | } 99 | 100 | template <> 101 | hiir_FORCEINLINE void StageProc3dnow <0>::process_sample_neg () 102 | { 103 | // Nothing, stops the recursion 104 | } 105 | 106 | 107 | 108 | #if defined (_MSC_VER) 109 | #pragma warning (pop) 110 | #endif 111 | 112 | 113 | 114 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 115 | 116 | 117 | 118 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 119 | 120 | 121 | 122 | } // namespace hiir 123 | 124 | 125 | 126 | #endif // hiir_StageProc3dnow_CODEHEADER_INCLUDED 127 | 128 | #undef hiir_StageProc3dnow_CURRENT_CODEHEADER 129 | 130 | 131 | 132 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 133 | -------------------------------------------------------------------------------- /Upsampler2xFpuTpl.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2xFpuTpl.h 4 | Author: Laurent de Soras, 2005 5 | 6 | Upsamples by a factor 2 the input signal, using FPU. 7 | 8 | Template parameters: 9 | - NC: number of coefficients, > 0 10 | 11 | --- Legal stuff --- 12 | 13 | This program is free software. It comes without any warranty, to 14 | the extent permitted by applicable law. You can redistribute it 15 | and/or modify it under the terms of the Do What The Fuck You Want 16 | To Public License, Version 2, as published by Sam Hocevar. See 17 | http://sam.zoy.org/wtfpl/COPYING for more details. 18 | 19 | *Tab=3***********************************************************************/ 20 | 21 | 22 | 23 | #if ! defined (hiir_Upsampler2xFpuTpl_HEADER_INCLUDED) 24 | #define hiir_Upsampler2xFpuTpl_HEADER_INCLUDED 25 | 26 | #if defined (_MSC_VER) 27 | #pragma once 28 | #pragma warning (4 : 4250) // "Inherits via dominance." 29 | #endif 30 | 31 | 32 | 33 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 34 | 35 | #include "hiir/def.h" 36 | 37 | #include 38 | 39 | 40 | 41 | namespace hiir 42 | { 43 | 44 | 45 | 46 | template 47 | class Upsampler2xFpuTpl 48 | { 49 | 50 | static_assert ((NC > 0), "Number of coefficient must be positive."); 51 | 52 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | 54 | public: 55 | 56 | typedef DT DataType; 57 | static const int _nbr_chn = 1; 58 | 59 | enum { NBR_COEFS = NC }; 60 | 61 | Upsampler2xFpuTpl (); 62 | Upsampler2xFpuTpl (const Upsampler2xFpuTpl &other) = default; 63 | Upsampler2xFpuTpl (Upsampler2xFpuTpl &&other) = default; 64 | ~Upsampler2xFpuTpl () = default; 65 | 66 | Upsampler2xFpuTpl & 67 | operator = (const Upsampler2xFpuTpl &other) = default; 68 | Upsampler2xFpuTpl & 69 | operator = (Upsampler2xFpuTpl &&other) = default; 70 | 71 | void set_coefs (const double coef_arr [NBR_COEFS]); 72 | hiir_FORCEINLINE void 73 | process_sample (DataType &out_0, DataType &out_1, DataType input); 74 | void process_block (DataType out_ptr [], const DataType in_ptr [], long nbr_spl); 75 | void clear_buffers (); 76 | 77 | 78 | 79 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 80 | 81 | protected: 82 | 83 | 84 | 85 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 86 | 87 | private: 88 | 89 | typedef std::array HyperGluar; 90 | 91 | HyperGluar _coef; 92 | HyperGluar _x; 93 | HyperGluar _y; 94 | 95 | 96 | 97 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 98 | 99 | private: 100 | 101 | bool operator == (const Upsampler2xFpuTpl &other) = delete; 102 | bool operator != (const Upsampler2xFpuTpl &other) = delete; 103 | 104 | }; // class Upsampler2xFpuTpl 105 | 106 | 107 | 108 | } // namespace hiir 109 | 110 | 111 | 112 | #include "hiir/Upsampler2xFpuTpl.hpp" 113 | 114 | 115 | 116 | #endif // hiir_Upsampler2xFpuTpl_HEADER_INCLUDED 117 | 118 | 119 | 120 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 121 | -------------------------------------------------------------------------------- /Upsampler2x4Sse.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2x4Sse.h 4 | Author: Laurent de Soras, 2015 5 | 6 | Upsamples vectors of 4 float by a factor 2 the input signal, using the SSE 7 | instruction set. 8 | 9 | This object must be aligned on a 16-byte boundary! 10 | 11 | Template parameters: 12 | - NC: number of coefficients, > 0 13 | 14 | --- Legal stuff --- 15 | 16 | This program is free software. It comes without any warranty, to 17 | the extent permitted by applicable law. You can redistribute it 18 | and/or modify it under the terms of the Do What The Fuck You Want 19 | To Public License, Version 2, as published by Sam Hocevar. See 20 | http://sam.zoy.org/wtfpl/COPYING for more details. 21 | 22 | *Tab=3***********************************************************************/ 23 | 24 | 25 | 26 | #pragma once 27 | #if ! defined (hiir_Upsampler2x4Sse_HEADER_INCLUDED) 28 | #define hiir_Upsampler2x4Sse_HEADER_INCLUDED 29 | 30 | #if defined (_MSC_VER) 31 | #pragma warning (4 : 4250) 32 | #endif 33 | 34 | 35 | 36 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 37 | 38 | #include "hiir/def.h" 39 | #include "hiir/StageDataSse.h" 40 | 41 | #include 42 | 43 | #include 44 | 45 | 46 | 47 | namespace hiir 48 | { 49 | 50 | 51 | 52 | template 53 | class Upsampler2x4Sse 54 | { 55 | 56 | static_assert ((NC > 0), "Number of coefficient must be positive."); 57 | 58 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 59 | 60 | public: 61 | 62 | typedef float DataType; 63 | static const int _nbr_chn = 4; 64 | 65 | enum { NBR_COEFS = NC }; 66 | 67 | Upsampler2x4Sse (); 68 | Upsampler2x4Sse (const Upsampler2x4Sse &other) = default; 69 | Upsampler2x4Sse (Upsampler2x4Sse &&other) = default; 70 | ~Upsampler2x4Sse () = default; 71 | 72 | Upsampler2x4Sse & 73 | operator = (const Upsampler2x4Sse &other) = default; 74 | Upsampler2x4Sse & 75 | operator = (Upsampler2x4Sse &&other) = default; 76 | 77 | void set_coefs (const double coef_arr [NBR_COEFS]); 78 | hiir_FORCEINLINE void 79 | process_sample (__m128 &out_0, __m128 &out_1, __m128 input); 80 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 81 | void clear_buffers (); 82 | 83 | 84 | 85 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 86 | 87 | protected: 88 | 89 | 90 | 91 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 92 | 93 | private: 94 | 95 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 96 | 97 | Filter _filter; // Should be the first member (thus easier to align) 98 | 99 | 100 | 101 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 102 | 103 | private: 104 | 105 | bool operator == (const Upsampler2x4Sse &other) const = delete; 106 | bool operator != (const Upsampler2x4Sse &other) const = delete; 107 | 108 | }; // class Upsampler2x4Sse 109 | 110 | 111 | 112 | } // namespace hiir 113 | 114 | 115 | 116 | #include "hiir/Upsampler2x4Sse.hpp" 117 | 118 | 119 | 120 | #endif // hiir_Upsampler2x4Sse_HEADER_INCLUDED 121 | 122 | 123 | 124 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 125 | -------------------------------------------------------------------------------- /Upsampler2xSse.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2xSse.h 4 | Author: Laurent de Soras, 2020 5 | 6 | Upsamples by a factor 2 the input signal, using SSE instruction set. 7 | 8 | This object must be aligned on a 16-byte boundary! 9 | 10 | Template parameters: 11 | - NC: number of coefficients, > 0 12 | 13 | --- Legal stuff --- 14 | 15 | This program is free software. It comes without any warranty, to 16 | the extent permitted by applicable law. You can redistribute it 17 | and/or modify it under the terms of the Do What The Fuck You Want 18 | To Public License, Version 2, as published by Sam Hocevar. See 19 | http://www.wtfpl.net/ for more details. 20 | 21 | *Tab=3***********************************************************************/ 22 | 23 | 24 | 25 | #pragma once 26 | #if ! defined (hiir_Upsampler2xSse_HEADER_INCLUDED) 27 | #define hiir_Upsampler2xSse_HEADER_INCLUDED 28 | 29 | 30 | 31 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 32 | 33 | #include "hiir/def.h" 34 | #include "hiir/StageDataSse.h" 35 | 36 | #include 37 | 38 | #include 39 | 40 | 41 | 42 | namespace hiir 43 | { 44 | 45 | 46 | 47 | template 48 | class Upsampler2xSse 49 | { 50 | 51 | static_assert ((NC > 0), "Number of coefficient must be positive."); 52 | 53 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 54 | 55 | public: 56 | 57 | typedef float DataType; 58 | static const int _nbr_chn = 1; 59 | 60 | enum { NBR_COEFS = NC }; 61 | 62 | Upsampler2xSse (); 63 | Upsampler2xSse (const Upsampler2xSse &other) = default; 64 | Upsampler2xSse (Upsampler2xSse &&other) = default; 65 | ~Upsampler2xSse () = default; 66 | 67 | Upsampler2xSse & 68 | operator = (const Upsampler2xSse &other) = default; 69 | Upsampler2xSse & 70 | operator = (Upsampler2xSse &&other) = default; 71 | 72 | void set_coefs (const double coef_arr [NBR_COEFS]); 73 | 74 | hiir_FORCEINLINE void 75 | process_sample (float &out_0, float &out_1, float input); 76 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 77 | 78 | void clear_buffers (); 79 | 80 | 81 | 82 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 83 | 84 | protected: 85 | 86 | 87 | 88 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 89 | 90 | private: 91 | 92 | static const int _stage_width = 2; 93 | static const int _nbr_stages = (NBR_COEFS + _stage_width - 1) / _stage_width; 94 | 95 | // Stage 0 contains only input memory 96 | typedef std::array Filter; 97 | 98 | Filter _filter; // Should be the first member (thus easier to align) 99 | 100 | 101 | 102 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 103 | 104 | private: 105 | 106 | bool operator == (const Upsampler2xSse &other) const = delete; 107 | bool operator != (const Upsampler2xSse &other) const = delete; 108 | 109 | }; // class Upsampler2xSse 110 | 111 | 112 | 113 | } // namespace hiir 114 | 115 | 116 | 117 | #include "hiir/Upsampler2xSse.hpp" 118 | 119 | 120 | 121 | #endif // hiir_Upsampler2xSse_HEADER_INCLUDED 122 | 123 | 124 | 125 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 126 | -------------------------------------------------------------------------------- /PhaseHalfPiF64Sse2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPiF64Sse2.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_PhaseHalfPiF64Sse2_HEADER_INCLUDED) 20 | #define hiir_PhaseHalfPiF64Sse2_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/def.h" 27 | #include "hiir/StageDataF64Sse2.h" 28 | 29 | #include 30 | 31 | #include 32 | 33 | 34 | 35 | namespace hiir 36 | { 37 | 38 | 39 | 40 | template 41 | class PhaseHalfPiF64Sse2 42 | { 43 | 44 | static_assert ((NC > 0), "Number of coefficient must be positive."); 45 | 46 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 47 | 48 | public: 49 | 50 | typedef double DataType; 51 | static const int _nbr_chn = 1; 52 | 53 | enum { NBR_COEFS = NC }; 54 | 55 | PhaseHalfPiF64Sse2 (); 56 | PhaseHalfPiF64Sse2 (const PhaseHalfPiF64Sse2 &other) = default; 57 | PhaseHalfPiF64Sse2 (PhaseHalfPiF64Sse2 &&other) = default; 58 | 59 | PhaseHalfPiF64Sse2 & 60 | operator = (const PhaseHalfPiF64Sse2 &other) = default; 61 | PhaseHalfPiF64Sse2 & 62 | operator = (PhaseHalfPiF64Sse2 &&other) = default; 63 | 64 | void set_coefs (const double coef_arr []); 65 | 66 | hiir_FORCEINLINE void 67 | process_sample (double &out_0, double &out_1, double input); 68 | void process_block (double out_0_ptr [], double out_1_ptr [], const double in_ptr [], long nbr_spl); 69 | 70 | void clear_buffers (); 71 | 72 | 73 | 74 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 75 | 76 | protected: 77 | 78 | 79 | 80 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 81 | 82 | private: 83 | 84 | static const int _stage_width = 2; 85 | static const int _nbr_stages = (NBR_COEFS + _stage_width - 1) / _stage_width; 86 | static const int _nbr_phases = 2; 87 | 88 | // Stage 0 contains only input memory 89 | typedef std::array Filter; 90 | 91 | typedef std::array FilterBiPhase; 92 | 93 | FilterBiPhase _bifilter; // Should be the first member (thus easier to align) 94 | DataType _prev; 95 | int _phase; // 0 or 1 96 | 97 | 98 | 99 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 100 | 101 | private: 102 | 103 | bool operator == (const PhaseHalfPiF64Sse2 &other) const = delete; 104 | bool operator != (const PhaseHalfPiF64Sse2 &other) const = delete; 105 | 106 | }; // class PhaseHalfPiF64Sse2 107 | 108 | 109 | 110 | } // namespace hiir 111 | 112 | 113 | 114 | #include "hiir/PhaseHalfPiF64Sse2.hpp" 115 | 116 | 117 | 118 | #endif // hiir_PhaseHalfPiF64Sse2_HEADER_INCLUDED 119 | 120 | 121 | 122 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 123 | -------------------------------------------------------------------------------- /Upsampler2x4Neon.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2x4Neon.h 4 | Author: Laurent de Soras, 2016 5 | 6 | Upsamples vectors of 4 float by a factor 2 the input signal, using the NEON 7 | instruction set. 8 | 9 | This object must be aligned on a 16-byte boundary! 10 | 11 | Template parameters: 12 | - NC: number of coefficients, > 0 13 | 14 | --- Legal stuff --- 15 | 16 | This program is free software. It comes without any warranty, to 17 | the extent permitted by applicable law. You can redistribute it 18 | and/or modify it under the terms of the Do What The Fuck You Want 19 | To Public License, Version 2, as published by Sam Hocevar. See 20 | http://sam.zoy.org/wtfpl/COPYING for more details. 21 | 22 | *Tab=3***********************************************************************/ 23 | 24 | 25 | 26 | #pragma once 27 | #if ! defined (hiir_Upsampler2x4Neon_HEADER_INCLUDED) 28 | #define hiir_Upsampler2x4Neon_HEADER_INCLUDED 29 | 30 | #if defined (_MSC_VER) 31 | #pragma warning (4 : 4250) 32 | #endif 33 | 34 | 35 | 36 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 37 | 38 | #include "hiir/def.h" 39 | #include "hiir/StageDataNeonV4.h" 40 | 41 | #include 42 | 43 | #include 44 | 45 | 46 | 47 | namespace hiir 48 | { 49 | 50 | 51 | 52 | template 53 | class Upsampler2x4Neon 54 | { 55 | 56 | static_assert ((NC > 0), "Number of coefficient must be positive."); 57 | 58 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 59 | 60 | public: 61 | 62 | typedef float DataType; 63 | static const int _nbr_chn = 4; 64 | 65 | enum { NBR_COEFS = NC }; 66 | 67 | Upsampler2x4Neon (); 68 | Upsampler2x4Neon (const Upsampler2x4Neon &other) = default; 69 | Upsampler2x4Neon (Upsampler2x4Neon &&other) = default; 70 | ~Upsampler2x4Neon () = default; 71 | 72 | Upsampler2x4Neon & 73 | operator = (const Upsampler2x4Neon &other) = default; 74 | Upsampler2x4Neon & 75 | operator = (Upsampler2x4Neon &&other) = default; 76 | 77 | void set_coefs (const double coef_arr [NBR_COEFS]); 78 | hiir_FORCEINLINE void 79 | process_sample (float32x4_t &out_0, float32x4_t &out_1, float32x4_t input); 80 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 81 | void clear_buffers (); 82 | 83 | 84 | 85 | 86 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 87 | 88 | protected: 89 | 90 | 91 | 92 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 93 | 94 | private: 95 | 96 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 97 | 98 | Filter _filter; // Should be the first member (thus easier to align) 99 | 100 | 101 | 102 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 103 | 104 | private: 105 | 106 | bool operator == (const Upsampler2x4Neon &other) const = delete; 107 | bool operator != (const Upsampler2x4Neon &other) const = delete; 108 | 109 | }; // class Upsampler2x4Neon 110 | 111 | 112 | 113 | } // namespace hiir 114 | 115 | 116 | 117 | #include "hiir/Upsampler2x4Neon.hpp" 118 | 119 | 120 | 121 | #endif // hiir_Upsampler2x4Neon_HEADER_INCLUDED 122 | 123 | 124 | 125 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 126 | -------------------------------------------------------------------------------- /Upsampler2xNeon.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2xNeon.h 4 | Author: Laurent de Soras, 2020 5 | 6 | Upsamples by a factor 2 the input signal, using NEON instruction set. 7 | 8 | This object must be aligned on a 16-byte boundary! 9 | 10 | Template parameters: 11 | - NC: number of coefficients, > 0 12 | 13 | --- Legal stuff --- 14 | 15 | This program is free software. It comes without any warranty, to 16 | the extent permitted by applicable law. You can redistribute it 17 | and/or modify it under the terms of the Do What The Fuck You Want 18 | To Public License, Version 2, as published by Sam Hocevar. See 19 | http://sam.zoy.org/wtfpl/COPYING for more details. 20 | 21 | *Tab=3***********************************************************************/ 22 | 23 | 24 | 25 | #pragma once 26 | #if ! defined (hiir_Upsampler2xNeon_HEADER_INCLUDED) 27 | #define hiir_Upsampler2xNeon_HEADER_INCLUDED 28 | 29 | #if defined (_MSC_VER) 30 | #pragma warning (4 : 4250) 31 | #endif 32 | 33 | 34 | 35 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 36 | 37 | #include "hiir/StageDataNeonV2.h" 38 | 39 | #include 40 | 41 | 42 | 43 | namespace hiir 44 | { 45 | 46 | 47 | 48 | template 49 | class Upsampler2xNeon 50 | { 51 | 52 | static_assert ((NC > 0), "Number of coefficient must be positive."); 53 | 54 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 55 | 56 | public: 57 | 58 | typedef float DataType; 59 | static const int _nbr_chn = 1; 60 | 61 | enum { NBR_COEFS = NC }; 62 | 63 | Upsampler2xNeon (); 64 | Upsampler2xNeon (const Upsampler2xNeon &other) = default; 65 | Upsampler2xNeon (Upsampler2xNeon &&other) = default; 66 | ~Upsampler2xNeon () = default; 67 | 68 | Upsampler2xNeon & 69 | operator = (const Upsampler2xNeon &other) = default; 70 | Upsampler2xNeon & 71 | operator = (Upsampler2xNeon &&other) = default; 72 | 73 | void set_coefs (const double coef_arr [NBR_COEFS]); 74 | inline void process_sample (float &out_0, float &out_1, float input); 75 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 76 | void clear_buffers (); 77 | 78 | 79 | 80 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 81 | 82 | protected: 83 | 84 | 85 | 86 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 87 | 88 | private: 89 | 90 | static const int _stage_width = 2; 91 | static const int _nbr_stages = (NBR_COEFS + _stage_width - 1) / _stage_width; 92 | 93 | // Stage 0 contains only input memory 94 | typedef std::array Filter; 95 | 96 | Filter _filter; // Should be the first member (thus easier to align) 97 | 98 | 99 | 100 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 101 | 102 | private: 103 | 104 | bool operator == (const Upsampler2xNeon &other) const = delete; 105 | bool operator != (const Upsampler2xNeon &other) const = delete; 106 | 107 | }; // class Upsampler2xNeon 108 | 109 | 110 | 111 | } // namespace hiir 112 | 113 | 114 | 115 | #include "hiir/Upsampler2xNeon.hpp" 116 | 117 | 118 | 119 | #endif // hiir_Upsampler2xNeon_HEADER_INCLUDED 120 | 121 | 122 | 123 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 124 | -------------------------------------------------------------------------------- /Upsampler2x3dnow.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2x3dnow.h 4 | Author: Laurent de Soras, 2005 5 | 6 | Upsamples by a factor 2 the input signal, using 3DNow! instruction set. 7 | 8 | Template parameters: 9 | - NC: number of coefficients, > 0 10 | 11 | --- Legal stuff --- 12 | 13 | This program is free software. It comes without any warranty, to 14 | the extent permitted by applicable law. You can redistribute it 15 | and/or modify it under the terms of the Do What The Fuck You Want 16 | To Public License, Version 2, as published by Sam Hocevar. See 17 | http://sam.zoy.org/wtfpl/COPYING for more details. 18 | 19 | *Tab=3***********************************************************************/ 20 | 21 | 22 | 23 | #if ! defined (hiir_Upsampler2x3dnow_HEADER_INCLUDED) 24 | #define hiir_Upsampler2x3dnow_HEADER_INCLUDED 25 | 26 | #if defined (_MSC_VER) 27 | #pragma once 28 | #pragma warning (4 : 4250) // "Inherits via dominance." 29 | #endif 30 | 31 | 32 | 33 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 34 | 35 | #include "hiir/def.h" 36 | #include "hiir/StageData3dnow.h" 37 | 38 | #include 39 | 40 | 41 | 42 | namespace hiir 43 | { 44 | 45 | 46 | 47 | template 48 | class Upsampler2x3dnow 49 | { 50 | 51 | static_assert ((NC > 0), "Number of coefficient must be positive."); 52 | 53 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 54 | 55 | public: 56 | 57 | typedef float DataType; 58 | static const int _nbr_chn = 1; 59 | 60 | enum { NBR_COEFS = NC }; 61 | 62 | Upsampler2x3dnow (); 63 | Upsampler2x3dnow (const Upsampler2x3dnow &other) = default; 64 | Upsampler2x3dnow (Upsampler2x3dnow &&other) = default; 65 | ~Upsampler2x3dnow () = default; 66 | 67 | Upsampler2x3dnow & 68 | operator = (const Upsampler2x3dnow &other) = default; 69 | Upsampler2x3dnow & 70 | operator = (Upsampler2x3dnow &&other) = default; 71 | 72 | void set_coefs (const double coef_arr [NBR_COEFS]); 73 | hiir_FORCEINLINE void 74 | process_sample (float &out_0, float &out_1, float input); 75 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 76 | void clear_buffers (); 77 | 78 | 79 | 80 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 81 | 82 | protected: 83 | 84 | 85 | 86 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 87 | 88 | private: 89 | 90 | enum { STAGE_WIDTH = 2 }; 91 | enum { NBR_STAGES = (NBR_COEFS + STAGE_WIDTH - 1) / STAGE_WIDTH }; 92 | 93 | typedef std::array Filter; // Stage 0 contains only input memory 94 | 95 | Filter _filter; // Should be the first member (thus easier to align) 96 | 97 | 98 | 99 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 100 | 101 | private: 102 | 103 | bool operator == (const Upsampler2x3dnow &other) = delete; 104 | bool operator != (const Upsampler2x3dnow &other) = delete; 105 | 106 | }; // class Upsampler2x3dnow 107 | 108 | 109 | 110 | } // namespace hiir 111 | 112 | 113 | 114 | #include "hiir/Upsampler2x3dnow.hpp" 115 | 116 | 117 | 118 | #endif // hiir_Upsampler2x3dnow_HEADER_INCLUDED 119 | 120 | 121 | 122 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 123 | -------------------------------------------------------------------------------- /Upsampler2x8Avx.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2x8Avx.h 4 | Ported Upsampler2x4Sse.h from SSE to AVX by Dario Mambro 5 | Upsampler2x4Sse.h by Laurent de Soras 6 | 7 | Upsamples vectors of 8 float by a factor 2 the input signal, using the AVX 8 | instruction set. 9 | 10 | This object must be aligned on a 32-byte boundary! 11 | 12 | Template parameters: 13 | - NC: number of coefficients, > 0 14 | 15 | --- Legal stuff --- 16 | 17 | This program is free software. It comes without any warranty, to 18 | the extent permitted by applicable law. You can redistribute it 19 | and/or modify it under the terms of the Do What The Fuck You Want 20 | To Public License, Version 2, as published by Sam Hocevar. See 21 | http://sam.zoy.org/wtfpl/COPYING for more details. 22 | 23 | *Tab=3***********************************************************************/ 24 | 25 | 26 | 27 | #pragma once 28 | #if ! defined (hiir_Upsampler2x8Avx_HEADER_INCLUDED) 29 | #define hiir_Upsampler2x8Avx_HEADER_INCLUDED 30 | 31 | #if defined (_MSC_VER) 32 | #pragma warning (4 : 4250) 33 | #endif 34 | 35 | 36 | 37 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 38 | 39 | #include "hiir/def.h" 40 | #include "hiir/StageDataAvx.h" 41 | 42 | #include 43 | 44 | #include 45 | 46 | 47 | 48 | namespace hiir 49 | { 50 | 51 | 52 | 53 | template 54 | class Upsampler2x8Avx 55 | { 56 | 57 | static_assert ((NC > 0), "Number of coefficient must be positive."); 58 | 59 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 60 | 61 | public: 62 | 63 | typedef float DataType; 64 | static const int _nbr_chn = 8; 65 | 66 | enum { NBR_COEFS = NC }; 67 | 68 | Upsampler2x8Avx (); 69 | Upsampler2x8Avx (const Upsampler2x8Avx &other) = default; 70 | Upsampler2x8Avx (Upsampler2x8Avx &&other) = default; 71 | ~Upsampler2x8Avx () = default; 72 | 73 | Upsampler2x8Avx & 74 | operator = (const Upsampler2x8Avx &other) = default; 75 | Upsampler2x8Avx & 76 | operator = (Upsampler2x8Avx &&other) = default; 77 | 78 | void set_coefs (const double coef_arr [NBR_COEFS]); 79 | hiir_FORCEINLINE void 80 | process_sample (__m256 &out_0, __m256 &out_1, __m256 input); 81 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 82 | void clear_buffers (); 83 | 84 | 85 | 86 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 87 | 88 | protected: 89 | 90 | 91 | 92 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 93 | 94 | private: 95 | 96 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 97 | 98 | Filter _filter; // Should be the first member (thus easier to align) 99 | 100 | 101 | 102 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 103 | 104 | private: 105 | 106 | bool operator == (const Upsampler2x8Avx &other) const = delete; 107 | bool operator != (const Upsampler2x8Avx &other) const = delete; 108 | 109 | }; // class Upsampler2x8Avx 110 | 111 | 112 | 113 | } // namespace hiir 114 | 115 | 116 | 117 | #include "hiir/Upsampler2x8Avx.hpp" 118 | 119 | 120 | 121 | #endif // hiir_Upsampler2x8Avx_HEADER_INCLUDED 122 | 123 | 124 | 125 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 126 | -------------------------------------------------------------------------------- /Downsampler2xF64Sse2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Downsampler2xF64Sse2.h 4 | Author: Laurent de Soras, 2020 5 | 6 | --- Legal stuff --- 7 | 8 | This program is free software. It comes without any warranty, to 9 | the extent permitted by applicable law. You can redistribute it 10 | and/or modify it under the terms of the Do What The Fuck You Want 11 | To Public License, Version 2, as published by Sam Hocevar. See 12 | http://www.wtfpl.net/ for more details. 13 | 14 | *Tab=3***********************************************************************/ 15 | 16 | 17 | 18 | #pragma once 19 | #if ! defined (hiir_Downsampler2xF64Sse2_HEADER_INCLUDED) 20 | #define hiir_Downsampler2xF64Sse2_HEADER_INCLUDED 21 | 22 | 23 | 24 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 25 | 26 | #include "hiir/def.h" 27 | #include "hiir/StageDataF64Sse2.h" 28 | 29 | #include 30 | 31 | #include 32 | 33 | 34 | 35 | namespace hiir 36 | { 37 | 38 | 39 | 40 | template 41 | class Downsampler2xF64Sse2 42 | { 43 | 44 | static_assert ((NC > 0), "Number of coefficient must be positive."); 45 | 46 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 47 | 48 | public: 49 | 50 | typedef double DataType; 51 | static const int _nbr_chn = 1; 52 | 53 | enum { NBR_COEFS = NC }; 54 | 55 | Downsampler2xF64Sse2 (); 56 | Downsampler2xF64Sse2 (const Downsampler2xF64Sse2 &other) = default; 57 | Downsampler2xF64Sse2 (Downsampler2xF64Sse2 &&other) = default; 58 | 59 | Downsampler2xF64Sse2 & 60 | operator = (const Downsampler2xF64Sse2 &other) = default; 61 | Downsampler2xF64Sse2 & 62 | operator = (Downsampler2xF64Sse2 &&other) = default; 63 | 64 | void set_coefs (const double coef_arr []); 65 | 66 | hiir_FORCEINLINE double 67 | process_sample (const double in_ptr [2]); 68 | void process_block (double out_ptr [], const double in_ptr [], long nbr_spl); 69 | 70 | hiir_FORCEINLINE void 71 | process_sample_split (double &low, double &high, const double in_ptr [2]); 72 | void process_block_split (double out_l_ptr [], double out_h_ptr [], const double in_ptr [], long nbr_spl); 73 | 74 | void clear_buffers (); 75 | 76 | 77 | 78 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 79 | 80 | protected: 81 | 82 | 83 | 84 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 85 | 86 | private: 87 | 88 | static const int _stage_width = 2; 89 | static const int _nbr_stages = (NBR_COEFS + _stage_width - 1) / _stage_width; 90 | 91 | // Stage 0 contains only input memory 92 | typedef std::array Filter; 93 | 94 | Filter _filter; // Should be the first member (thus easier to align) 95 | 96 | 97 | 98 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 99 | 100 | private: 101 | 102 | bool operator == (const Downsampler2xF64Sse2 &other) const = delete; 103 | bool operator != (const Downsampler2xF64Sse2 &other) const = delete; 104 | 105 | }; // class Downsampler2xF64Sse2 106 | 107 | 108 | 109 | } // namespace hiir 110 | 111 | 112 | 113 | #include "hiir/Downsampler2xF64Sse2.hpp" 114 | 115 | 116 | 117 | #endif // hiir_Downsampler2xF64Sse2_HEADER_INCLUDED 118 | 119 | 120 | 121 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 122 | -------------------------------------------------------------------------------- /Upsampler2x16Avx512.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2x16Avx512.h 4 | Author: Laurent de Soras, 2020 5 | 6 | Upsamples vectors of 16 float by a factor 2 the input signal, using the 7 | AVX-512 instruction set. 8 | 9 | This object must be aligned on a 64-byte boundary! 10 | 11 | Template parameters: 12 | - NC: number of coefficients, > 0 13 | 14 | --- Legal stuff --- 15 | 16 | This program is free software. It comes without any warranty, to 17 | the extent permitted by applicable law. You can redistribute it 18 | and/or modify it under the terms of the Do What The Fuck You Want 19 | To Public License, Version 2, as published by Sam Hocevar. See 20 | http://sam.zoy.org/wtfpl/COPYING for more details. 21 | 22 | *Tab=3***********************************************************************/ 23 | 24 | 25 | 26 | #pragma once 27 | #if ! defined (hiir_Upsampler2x16Avx512_HEADER_INCLUDED) 28 | #define hiir_Upsampler2x16Avx512_HEADER_INCLUDED 29 | 30 | #if defined (_MSC_VER) 31 | #pragma warning (4 : 4250) 32 | #endif 33 | 34 | 35 | 36 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 37 | 38 | #include "hiir/def.h" 39 | #include "hiir/StageDataAvx512.h" 40 | 41 | #include 42 | 43 | #include 44 | 45 | 46 | 47 | namespace hiir 48 | { 49 | 50 | 51 | 52 | template 53 | class Upsampler2x16Avx512 54 | { 55 | 56 | static_assert ((NC > 0), "Number of coefficient must be positive."); 57 | 58 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 59 | 60 | public: 61 | 62 | typedef float DataType; 63 | static const int _nbr_chn = 16; 64 | 65 | enum { NBR_COEFS = NC }; 66 | 67 | Upsampler2x16Avx512 (); 68 | Upsampler2x16Avx512 (const Upsampler2x16Avx512 &other) = default; 69 | Upsampler2x16Avx512 (Upsampler2x16Avx512 &&other) = default; 70 | ~Upsampler2x16Avx512 () = default; 71 | 72 | Upsampler2x16Avx512 & 73 | operator = (const Upsampler2x16Avx512 &other) = default; 74 | Upsampler2x16Avx512 & 75 | operator = (Upsampler2x16Avx512 &&other) = default; 76 | 77 | void set_coefs (const double coef_arr [NBR_COEFS]); 78 | hiir_FORCEINLINE void 79 | process_sample (__m512 &out_0, __m512 &out_1, __m512 input); 80 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 81 | void clear_buffers (); 82 | 83 | 84 | 85 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 86 | 87 | protected: 88 | 89 | 90 | 91 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 92 | 93 | private: 94 | 95 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 96 | 97 | Filter _filter; // Should be the first member (thus easier to align) 98 | 99 | 100 | 101 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 102 | 103 | private: 104 | 105 | bool operator == (const Upsampler2x16Avx512 &other) const = delete; 106 | bool operator != (const Upsampler2x16Avx512 &other) const = delete; 107 | 108 | }; // class Upsampler2x16Avx512 109 | 110 | 111 | 112 | } // namespace hiir 113 | 114 | 115 | 116 | #include "hiir/Upsampler2x16Avx512.hpp" 117 | 118 | 119 | 120 | #endif // hiir_Upsampler2x16Avx512_HEADER_INCLUDED 121 | 122 | 123 | 124 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 125 | -------------------------------------------------------------------------------- /Upsampler2x8F64Avx512.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2x8F64Avx512.h 4 | Author: Laurent de Soras, 2020 5 | 6 | Upsamples vectors of 8 double by a factor 2 the input signal, using the 7 | AVX-512 instruction set. 8 | 9 | This object must be aligned on a 64-byte boundary! 10 | 11 | Template parameters: 12 | - NC: number of coefficients, > 0 13 | 14 | --- Legal stuff --- 15 | 16 | This program is free software. It comes without any warranty, to 17 | the extent permitted by applicable law. You can redistribute it 18 | and/or modify it under the terms of the Do What The Fuck You Want 19 | To Public License, Version 2, as published by Sam Hocevar. See 20 | http://sam.zoy.org/wtfpl/COPYING for more details. 21 | 22 | *Tab=3***********************************************************************/ 23 | 24 | 25 | 26 | #pragma once 27 | #if ! defined (hiir_Upsampler2x8F64Avx512_HEADER_INCLUDED) 28 | #define hiir_Upsampler2x8F64Avx512_HEADER_INCLUDED 29 | 30 | #if defined (_MSC_VER) 31 | #pragma warning (4 : 4250) 32 | #endif 33 | 34 | 35 | 36 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 37 | 38 | #include "hiir/def.h" 39 | #include "hiir/StageDataF64Avx512.h" 40 | 41 | #include 42 | 43 | #include 44 | 45 | 46 | 47 | namespace hiir 48 | { 49 | 50 | 51 | 52 | template 53 | class Upsampler2x8F64Avx512 54 | { 55 | 56 | static_assert ((NC > 0), "Number of coefficient must be positive."); 57 | 58 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 59 | 60 | public: 61 | 62 | typedef double DataType; 63 | static const int _nbr_chn = 8; 64 | 65 | enum { NBR_COEFS = NC }; 66 | 67 | Upsampler2x8F64Avx512 (); 68 | Upsampler2x8F64Avx512 (const Upsampler2x8F64Avx512 &other) = default; 69 | Upsampler2x8F64Avx512 (Upsampler2x8F64Avx512 &&other) = default; 70 | ~Upsampler2x8F64Avx512 () = default; 71 | 72 | Upsampler2x8F64Avx512 & 73 | operator = (const Upsampler2x8F64Avx512 &other) = default; 74 | Upsampler2x8F64Avx512 & 75 | operator = (Upsampler2x8F64Avx512 &&other) = default; 76 | 77 | void set_coefs (const double coef_arr [NBR_COEFS]); 78 | hiir_FORCEINLINE void 79 | process_sample (__m512d &out_0, __m512d &out_1, __m512d input); 80 | void process_block (double out_ptr [], const double in_ptr [], long nbr_spl); 81 | void clear_buffers (); 82 | 83 | 84 | 85 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 86 | 87 | protected: 88 | 89 | 90 | 91 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 92 | 93 | private: 94 | 95 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 96 | 97 | Filter _filter; // Should be the first member (thus easier to align) 98 | 99 | 100 | 101 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 102 | 103 | private: 104 | 105 | bool operator == (const Upsampler2x8F64Avx512 &other) const = delete; 106 | bool operator != (const Upsampler2x8F64Avx512 &other) const = delete; 107 | 108 | }; // class Upsampler2x8F64Avx512 109 | 110 | 111 | 112 | } // namespace hiir 113 | 114 | 115 | 116 | #include "hiir/Upsampler2x8F64Avx512.hpp" 117 | 118 | 119 | 120 | #endif // hiir_Upsampler2x8F64Avx512_HEADER_INCLUDED 121 | 122 | 123 | 124 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 125 | -------------------------------------------------------------------------------- /Upsampler2x4F64Avx.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2x4F64Avx.h 4 | Port of Upsampler2x4Sse.h from float to double by Dario Mambro 5 | Upsampler2x4Sse.h by Laurent de Soras, 2015 6 | 7 | Upsamples vectors of 4 double by a factor 2 the input signal, using the AVX 8 | instruction set. 9 | 10 | This object must be aligned on a 32-byte boundary! 11 | 12 | Template parameters: 13 | - NC: number of coefficients, > 0 14 | 15 | --- Legal stuff --- 16 | 17 | This program is free software. It comes without any warranty, to 18 | the extent permitted by applicable law. You can redistribute it 19 | and/or modify it under the terms of the Do What The Fuck You Want 20 | To Public License, Version 2, as published by Sam Hocevar. See 21 | http://sam.zoy.org/wtfpl/COPYING for more details. 22 | 23 | *Tab=3***********************************************************************/ 24 | 25 | 26 | 27 | #pragma once 28 | #if ! defined (hiir_Upsampler2x4F64Avx_HEADER_INCLUDED) 29 | #define hiir_Upsampler2x4F64Avx_HEADER_INCLUDED 30 | 31 | #if defined (_MSC_VER) 32 | #pragma warning (4 : 4250) 33 | #endif 34 | 35 | 36 | 37 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 38 | 39 | #include "hiir/def.h" 40 | #include "hiir/StageDataF64Avx.h" 41 | 42 | #include 43 | 44 | #include 45 | 46 | 47 | 48 | namespace hiir 49 | { 50 | 51 | 52 | 53 | template 54 | class Upsampler2x4F64Avx 55 | { 56 | 57 | static_assert ((NC > 0), "Number of coefficient must be positive."); 58 | 59 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 60 | 61 | public: 62 | 63 | typedef double DataType; 64 | static const int _nbr_chn = 4; 65 | 66 | enum { NBR_COEFS = NC }; 67 | 68 | Upsampler2x4F64Avx (); 69 | Upsampler2x4F64Avx (const Upsampler2x4F64Avx &other) = default; 70 | Upsampler2x4F64Avx (Upsampler2x4F64Avx &&other) = default; 71 | ~Upsampler2x4F64Avx () = default; 72 | 73 | Upsampler2x4F64Avx & 74 | operator = (const Upsampler2x4F64Avx &other) = default; 75 | Upsampler2x4F64Avx & 76 | operator = (Upsampler2x4F64Avx &&other) = default; 77 | 78 | void set_coefs (const double coef_arr [NBR_COEFS]); 79 | hiir_FORCEINLINE void 80 | process_sample (__m256d &out_0, __m256d &out_1, __m256d input); 81 | void process_block (double out_ptr [], const double in_ptr [], long nbr_spl); 82 | void clear_buffers (); 83 | 84 | 85 | 86 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 87 | 88 | protected: 89 | 90 | 91 | 92 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 93 | 94 | private: 95 | 96 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 97 | 98 | Filter _filter; // Should be the first member (thus easier to align) 99 | 100 | 101 | 102 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 103 | 104 | private: 105 | 106 | bool operator == (const Upsampler2x4F64Avx &other) const = delete; 107 | bool operator != (const Upsampler2x4F64Avx &other) const = delete; 108 | 109 | }; // class Upsampler2x4F64Avx 110 | 111 | 112 | 113 | } // namespace hiir 114 | 115 | 116 | 117 | #include "hiir/Upsampler2x4F64Avx.hpp" 118 | 119 | 120 | 121 | #endif // hiir_Upsampler2x4F64Avx_HEADER_INCLUDED 122 | 123 | 124 | 125 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 126 | -------------------------------------------------------------------------------- /Upsampler2x2F64Neon.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2x2F64Neon.h 4 | Author: Laurent de Soras, 2016 5 | Ported to double by Dario Mambro, 2020 6 | 7 | Upsamples vectors of 4 double by a factor 2 the input signal, using the NEON 8 | instruction set. 9 | 10 | This object must be aligned on a 16-byte boundary! 11 | 12 | Template parameters: 13 | - NC: number of coefficients, > 0 14 | 15 | --- Legal stuff --- 16 | 17 | This program is free software. It comes without any warranty, to 18 | the extent permitted by applicable law. You can redistribute it 19 | and/or modify it under the terms of the Do What The Fuck You Want 20 | To Public License, Version 2, as published by Sam Hocevar. See 21 | http://sam.zoy.org/wtfpl/COPYING for more details. 22 | 23 | *Tab=3***********************************************************************/ 24 | 25 | 26 | 27 | #pragma once 28 | #if ! defined (hiir_Upsampler2x2F64Neon_HEADER_INCLUDED) 29 | #define hiir_Upsampler2x2F64Neon_HEADER_INCLUDED 30 | 31 | #if defined (_MSC_VER) 32 | #pragma warning (4 : 4250) 33 | #endif 34 | 35 | 36 | 37 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 38 | 39 | #include "hiir/def.h" 40 | #include "hiir/StageDataNeonV2F64.h" 41 | 42 | #include 43 | 44 | #include 45 | 46 | 47 | 48 | namespace hiir 49 | { 50 | 51 | 52 | 53 | template 54 | class Upsampler2x2F64Neon 55 | { 56 | 57 | static_assert ((NC > 0), "Number of coefficient must be positive."); 58 | 59 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 60 | 61 | public: 62 | 63 | typedef double DataType; 64 | static const int _nbr_chn = 2; 65 | 66 | enum { NBR_COEFS = NC }; 67 | 68 | Upsampler2x2F64Neon (); 69 | Upsampler2x2F64Neon (const Upsampler2x2F64Neon &other) = default; 70 | Upsampler2x2F64Neon (Upsampler2x2F64Neon &&other) = default; 71 | ~Upsampler2x2F64Neon () = default; 72 | 73 | Upsampler2x2F64Neon & 74 | operator = (const Upsampler2x2F64Neon &other) = default; 75 | Upsampler2x2F64Neon & 76 | operator = (Upsampler2x2F64Neon &&other) = default; 77 | 78 | void set_coefs (const double coef_arr [NBR_COEFS]); 79 | hiir_FORCEINLINE void 80 | process_sample (float64x2_t &out_0, float64x2_t &out_1, float64x2_t input); 81 | void process_block (double out_ptr [], const double in_ptr [], long nbr_spl); 82 | void clear_buffers (); 83 | 84 | 85 | 86 | 87 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 88 | 89 | protected: 90 | 91 | 92 | 93 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 94 | 95 | private: 96 | 97 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 98 | 99 | Filter _filter; // Should be the first member (thus easier to align) 100 | 101 | 102 | 103 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 104 | 105 | private: 106 | 107 | bool operator == (const Upsampler2x2F64Neon &other) const = delete; 108 | bool operator != (const Upsampler2x2F64Neon &other) const = delete; 109 | 110 | }; // class Upsampler2x2F64Neon 111 | 112 | 113 | 114 | } // namespace hiir 115 | 116 | 117 | 118 | #include "hiir/Upsampler2x2F64Neon.hpp" 119 | 120 | 121 | 122 | #endif // hiir_Upsampler2x2F64Neon_HEADER_INCLUDED 123 | 124 | 125 | 126 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 127 | -------------------------------------------------------------------------------- /Downsampler2xFpuTpl.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Downsampler2xFpuTpl.h 4 | Author: Laurent de Soras, 2005 5 | 6 | Downsamples by a factor 2 the input signal, using FPU. 7 | 8 | Template parameters: 9 | - NC: number of coefficients, > 0 10 | 11 | --- Legal stuff --- 12 | 13 | This program is free software. It comes without any warranty, to 14 | the extent permitted by applicable law. You can redistribute it 15 | and/or modify it under the terms of the Do What The Fuck You Want 16 | To Public License, Version 2, as published by Sam Hocevar. See 17 | http://sam.zoy.org/wtfpl/COPYING for more details. 18 | 19 | *Tab=3***********************************************************************/ 20 | 21 | 22 | 23 | #if ! defined (hiir_Downsampler2xFpuTpl_HEADER_INCLUDED) 24 | #define hiir_Downsampler2xFpuTpl_HEADER_INCLUDED 25 | 26 | #if defined (_MSC_VER) 27 | #pragma once 28 | #pragma warning (4 : 4250) // "Inherits via dominance." 29 | #endif 30 | 31 | 32 | 33 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 34 | 35 | #include "hiir/def.h" 36 | 37 | #include 38 | 39 | 40 | 41 | namespace hiir 42 | { 43 | 44 | 45 | 46 | template 47 | class Downsampler2xFpuTpl 48 | { 49 | 50 | static_assert ((NC > 0), "Number of coefficient must be positive."); 51 | 52 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 53 | 54 | public: 55 | 56 | typedef DT DataType; 57 | static const int _nbr_chn = 1; 58 | 59 | enum { NBR_COEFS = NC }; 60 | 61 | Downsampler2xFpuTpl (); 62 | Downsampler2xFpuTpl (const Downsampler2xFpuTpl &other) = default; 63 | Downsampler2xFpuTpl (Downsampler2xFpuTpl &&other) = default; 64 | ~Downsampler2xFpuTpl () = default; 65 | 66 | Downsampler2xFpuTpl & 67 | operator = (const Downsampler2xFpuTpl &other) = default; 68 | Downsampler2xFpuTpl & 69 | operator = (Downsampler2xFpuTpl &&other) = default; 70 | 71 | void set_coefs (const double coef_arr []); 72 | 73 | hiir_FORCEINLINE DataType 74 | process_sample (const DataType in_ptr [2]); 75 | void process_block (DataType out_ptr [], const DataType in_ptr [], long nbr_spl); 76 | 77 | hiir_FORCEINLINE void 78 | process_sample_split (DataType &low, DataType &high, const DataType in_ptr [2]); 79 | void process_block_split (DataType out_l_ptr [], DataType out_h_ptr [], const DataType in_ptr [], long nbr_spl); 80 | 81 | void clear_buffers (); 82 | 83 | 84 | 85 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 86 | 87 | protected: 88 | 89 | 90 | 91 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 92 | 93 | private: 94 | 95 | typedef std::array HyperGluar; 96 | 97 | HyperGluar _coef; 98 | HyperGluar _x; 99 | HyperGluar _y; 100 | 101 | 102 | 103 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 104 | 105 | private: 106 | 107 | bool operator == (const Downsampler2xFpuTpl &other) = delete; 108 | bool operator != (const Downsampler2xFpuTpl &other) = delete; 109 | 110 | }; // class Downsampler2xFpuTpl 111 | 112 | 113 | 114 | } // namespace hiir 115 | 116 | 117 | 118 | #include "hiir/Downsampler2xFpuTpl.hpp" 119 | 120 | 121 | 122 | #endif // hiir_Downsampler2xFpuTpl_HEADER_INCLUDED 123 | 124 | 125 | 126 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 127 | -------------------------------------------------------------------------------- /Upsampler2x2F64Sse2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2x2F64Sse2.h 4 | Port of Upsampler2x4Sse.h from float to double by Dario Mambro 5 | Upsampler2x4Sse.h by Laurent de Soras, 2015 6 | 7 | Upsamples vectors of 2 double by a factor 2 the input signal, using the SSE 8 | instruction set. 9 | 10 | This object must be aligned on a 16-byte boundary! 11 | 12 | Template parameters: 13 | - NC: number of coefficients, > 0 14 | 15 | --- Legal stuff --- 16 | 17 | This program is free software. It comes without any warranty, to 18 | the extent permitted by applicable law. You can redistribute it 19 | and/or modify it under the terms of the Do What The Fuck You Want 20 | To Public License, Version 2, as published by Sam Hocevar. See 21 | http://sam.zoy.org/wtfpl/COPYING for more details. 22 | 23 | *Tab=3***********************************************************************/ 24 | 25 | 26 | 27 | #pragma once 28 | #if ! defined (hiir_Upsampler2x2F64Sse2_HEADER_INCLUDED) 29 | #define hiir_Upsampler2x2F64Sse2_HEADER_INCLUDED 30 | 31 | #if defined (_MSC_VER) 32 | #pragma warning (4 : 4250) 33 | #endif 34 | 35 | 36 | 37 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 38 | 39 | #include "hiir/def.h" 40 | #include "hiir/StageDataF64Sse2.h" 41 | 42 | #include 43 | 44 | #include 45 | 46 | 47 | 48 | namespace hiir 49 | { 50 | 51 | 52 | 53 | template 54 | class Upsampler2x2F64Sse2 55 | { 56 | 57 | static_assert ((NC > 0), "Number of coefficient must be positive."); 58 | 59 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 60 | 61 | public: 62 | 63 | typedef double DataType; 64 | static const int _nbr_chn = 2; 65 | 66 | enum { NBR_COEFS = NC }; 67 | 68 | Upsampler2x2F64Sse2 (); 69 | Upsampler2x2F64Sse2 (const Upsampler2x2F64Sse2 &other) = default; 70 | Upsampler2x2F64Sse2 (Upsampler2x2F64Sse2 &&other) = default; 71 | ~Upsampler2x2F64Sse2 () = default; 72 | 73 | Upsampler2x2F64Sse2 & 74 | operator = (const Upsampler2x2F64Sse2 &other) = default; 75 | Upsampler2x2F64Sse2 & 76 | operator = (Upsampler2x2F64Sse2 &&other) = default; 77 | 78 | 79 | void set_coefs (const double coef_arr [NBR_COEFS]); 80 | hiir_FORCEINLINE void 81 | process_sample (__m128d &out_0, __m128d &out_1, __m128d input); 82 | void process_block (double out_ptr [], const double in_ptr [], long nbr_spl); 83 | void clear_buffers (); 84 | 85 | 86 | 87 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 88 | 89 | protected: 90 | 91 | 92 | 93 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 94 | 95 | private: 96 | 97 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 98 | 99 | Filter _filter; // Should be the first member (thus easier to align) 100 | 101 | 102 | 103 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 104 | 105 | private: 106 | 107 | bool operator == (const Upsampler2x2F64Sse2 &other) const = delete; 108 | bool operator != (const Upsampler2x2F64Sse2 &other) const = delete; 109 | 110 | }; // class Upsampler2x2F64Sse2 111 | 112 | 113 | 114 | } // namespace hiir 115 | 116 | 117 | 118 | #include "hiir/Upsampler2x2F64Sse2.hpp" 119 | 120 | 121 | 122 | #endif // hiir_Upsampler2x2F64Sse2_HEADER_INCLUDED 123 | 124 | 125 | 126 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 127 | -------------------------------------------------------------------------------- /Downsampler2xSse.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Downsampler2xSse.h 4 | Author: Laurent de Soras, 2020 5 | 6 | Downsamples by a factor 2 the input signal, using SSE instruction set. 7 | 8 | This object must be aligned on a 16-byte boundary! 9 | 10 | Template parameters: 11 | - NC: number of coefficients, > 0 12 | 13 | --- Legal stuff --- 14 | 15 | This program is free software. It comes without any warranty, to 16 | the extent permitted by applicable law. You can redistribute it 17 | and/or modify it under the terms of the Do What The Fuck You Want 18 | To Public License, Version 2, as published by Sam Hocevar. See 19 | http://www.wtfpl.net/ for more details. 20 | 21 | *Tab=3***********************************************************************/ 22 | 23 | 24 | 25 | #pragma once 26 | #if ! defined (hiir_Downsampler2xSse_HEADER_INCLUDED) 27 | #define hiir_Downsampler2xSse_HEADER_INCLUDED 28 | 29 | 30 | 31 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 32 | 33 | #include "hiir/def.h" 34 | #include "hiir/StageDataSse.h" 35 | 36 | #include 37 | 38 | #include 39 | 40 | 41 | 42 | namespace hiir 43 | { 44 | 45 | 46 | 47 | template 48 | class Downsampler2xSse 49 | { 50 | 51 | static_assert ((NC > 0), "Number of coefficient must be positive."); 52 | 53 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 54 | 55 | public: 56 | 57 | typedef float DataType; 58 | static const int _nbr_chn = 1; 59 | 60 | enum { NBR_COEFS = NC }; 61 | 62 | Downsampler2xSse (); 63 | Downsampler2xSse (const Downsampler2xSse &other) = default; 64 | Downsampler2xSse (Downsampler2xSse &&other) = default; 65 | 66 | Downsampler2xSse & 67 | operator = (const Downsampler2xSse &other) = default; 68 | Downsampler2xSse & 69 | operator = (Downsampler2xSse &&other) = default; 70 | 71 | void set_coefs (const double coef_arr []); 72 | 73 | hiir_FORCEINLINE float 74 | process_sample (const float in_ptr [2]); 75 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 76 | 77 | hiir_FORCEINLINE void 78 | process_sample_split (float &low, float &high, const float in_ptr [2]); 79 | void process_block_split (float out_l_ptr [], float out_h_ptr [], const float in_ptr [], long nbr_spl); 80 | 81 | void clear_buffers (); 82 | 83 | 84 | 85 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 86 | 87 | protected: 88 | 89 | 90 | 91 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 92 | 93 | private: 94 | 95 | static const int _stage_width = 2; 96 | static const int _nbr_stages = (NBR_COEFS + _stage_width - 1) / _stage_width; 97 | 98 | // Stage 0 contains only input memory 99 | typedef std::array Filter; 100 | 101 | Filter _filter; // Should be the first member (thus easier to align) 102 | 103 | 104 | 105 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 106 | 107 | private: 108 | 109 | bool operator == (const Downsampler2xSse &other) const = delete; 110 | bool operator != (const Downsampler2xSse &other) const = delete; 111 | 112 | }; // class Downsampler2xSse 113 | 114 | 115 | 116 | } // namespace hiir 117 | 118 | 119 | 120 | #include "hiir/Downsampler2xSse.hpp" 121 | 122 | 123 | 124 | #endif // hiir_Downsampler2xSse_HEADER_INCLUDED 125 | 126 | 127 | 128 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 129 | -------------------------------------------------------------------------------- /PhaseHalfPi4Sse.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPi4Sse.h 4 | Author: Laurent de Soras, 2020 5 | 6 | From the input signal, generates two signals with a pi/2 phase shift, using 7 | SSE instruction set. Works on vectors of 4 float. 8 | 9 | This object must be aligned on a 16-byte boundary! 10 | 11 | Template parameters: 12 | - NC: number of coefficients, > 0 13 | 14 | --- Legal stuff --- 15 | 16 | This program is free software. It comes without any warranty, to 17 | the extent permitted by applicable law. You can redistribute it 18 | and/or modify it under the terms of the Do What The Fuck You Want 19 | To Public License, Version 2, as published by Sam Hocevar. See 20 | http://www.wtfpl.net/ for more details. 21 | 22 | *Tab=3***********************************************************************/ 23 | 24 | 25 | 26 | #pragma once 27 | #if ! defined (hiir_PhaseHalfPi4Sse_HEADER_INCLUDED) 28 | #define hiir_PhaseHalfPi4Sse_HEADER_INCLUDED 29 | 30 | 31 | 32 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | #include "hiir/def.h" 35 | #include "hiir/StageDataSse.h" 36 | 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | namespace hiir 44 | { 45 | 46 | 47 | 48 | template 49 | class PhaseHalfPi4Sse 50 | { 51 | 52 | static_assert ((NC > 0), "Number of coefficient must be positive."); 53 | 54 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 55 | 56 | public: 57 | 58 | typedef float DataType; 59 | static const int _nbr_chn = 4; 60 | 61 | enum { NBR_COEFS = NC }; 62 | 63 | PhaseHalfPi4Sse (); 64 | PhaseHalfPi4Sse (const PhaseHalfPi4Sse &other) = default; 65 | PhaseHalfPi4Sse (PhaseHalfPi4Sse &&other) = default; 66 | ~PhaseHalfPi4Sse () = default; 67 | 68 | PhaseHalfPi4Sse & 69 | operator = (const PhaseHalfPi4Sse &other) = default; 70 | PhaseHalfPi4Sse & 71 | operator = (PhaseHalfPi4Sse &&other) = default; 72 | 73 | void set_coefs (const double coef_arr []); 74 | 75 | hiir_FORCEINLINE void 76 | process_sample (__m128 &out_0, __m128 &out_1, __m128 input); 77 | void process_block (float out_0_ptr [], float out_1_ptr [], const float in_ptr [], long nbr_spl); 78 | 79 | void clear_buffers (); 80 | 81 | 82 | 83 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 84 | 85 | protected: 86 | 87 | 88 | 89 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 90 | 91 | private: 92 | 93 | static const int _nbr_phases = 2; 94 | 95 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 96 | 97 | typedef std::array FilterBiPhase; 98 | 99 | FilterBiPhase _bifilter; 100 | union 101 | { 102 | __m128 _prev4; // Just to ensure alignement 103 | float _prev [4]; 104 | }; 105 | int _phase; // 0 or 1 106 | 107 | 108 | 109 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 110 | 111 | private: 112 | 113 | bool operator == (const PhaseHalfPi4Sse &other) const = delete; 114 | bool operator != (const PhaseHalfPi4Sse &other) const = delete; 115 | 116 | }; // class PhaseHalfPi4Sse 117 | 118 | 119 | 120 | } // namespace hiir 121 | 122 | 123 | 124 | #include "hiir/PhaseHalfPi4Sse.hpp" 125 | 126 | 127 | 128 | #endif // hiir_PhaseHalfPi4Sse_HEADER_INCLUDED 129 | 130 | 131 | 132 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 133 | -------------------------------------------------------------------------------- /PhaseHalfPi8Avx.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPi8Avx.h 4 | Author: Laurent de Soras, 2020 5 | 6 | From the input signal, generates two signals with a pi/2 phase shift, using 7 | AVX instruction set. Works on vectors of 8 float. 8 | 9 | This object must be aligned on a 32-byte boundary! 10 | 11 | Template parameters: 12 | - NC: number of coefficients, > 0 13 | 14 | --- Legal stuff --- 15 | 16 | This program is free software. It comes without any warranty, to 17 | the extent permitted by applicable law. You can redistribute it 18 | and/or modify it under the terms of the Do What The Fuck You Want 19 | To Public License, Version 2, as published by Sam Hocevar. See 20 | http://www.wtfpl.net/ for more details. 21 | 22 | *Tab=3***********************************************************************/ 23 | 24 | 25 | 26 | #pragma once 27 | #if ! defined (hiir_PhaseHalfPi8Avx_HEADER_INCLUDED) 28 | #define hiir_PhaseHalfPi8Avx_HEADER_INCLUDED 29 | 30 | 31 | 32 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | #include "hiir/def.h" 35 | #include "hiir/StageDataAvx.h" 36 | 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | namespace hiir 44 | { 45 | 46 | 47 | 48 | template 49 | class PhaseHalfPi8Avx 50 | { 51 | 52 | static_assert ((NC > 0), "Number of coefficient must be positive."); 53 | 54 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 55 | 56 | public: 57 | 58 | typedef float DataType; 59 | static const int _nbr_chn = 8; 60 | 61 | enum { NBR_COEFS = NC }; 62 | 63 | PhaseHalfPi8Avx (); 64 | PhaseHalfPi8Avx (const PhaseHalfPi8Avx &other) = default; 65 | PhaseHalfPi8Avx (PhaseHalfPi8Avx &&other) = default; 66 | ~PhaseHalfPi8Avx () = default; 67 | 68 | PhaseHalfPi8Avx & 69 | operator = (const PhaseHalfPi8Avx &other) = default; 70 | PhaseHalfPi8Avx & 71 | operator = (PhaseHalfPi8Avx &&other) = default; 72 | 73 | void set_coefs (const double coef_arr []); 74 | 75 | hiir_FORCEINLINE void 76 | process_sample (__m256 &out_0, __m256 &out_1, __m256 input); 77 | void process_block (float out_0_ptr [], float out_1_ptr [], const float in_ptr [], long nbr_spl); 78 | 79 | void clear_buffers (); 80 | 81 | 82 | 83 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 84 | 85 | protected: 86 | 87 | 88 | 89 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 90 | 91 | private: 92 | 93 | static const int _nbr_phases = 2; 94 | 95 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 96 | 97 | typedef std::array FilterBiPhase; 98 | 99 | FilterBiPhase _bifilter; 100 | union 101 | { 102 | __m256 _prev8; // Just to ensure alignement 103 | float _prev [_nbr_chn]; 104 | }; 105 | int _phase; // 0 or 1 106 | 107 | 108 | 109 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 110 | 111 | private: 112 | 113 | bool operator == (const PhaseHalfPi8Avx &other) const = delete; 114 | bool operator != (const PhaseHalfPi8Avx &other) const = delete; 115 | 116 | }; // class PhaseHalfPi8Avx 117 | 118 | 119 | 120 | } // namespace hiir 121 | 122 | 123 | 124 | #include "hiir/PhaseHalfPi8Avx.hpp" 125 | 126 | 127 | 128 | #endif // hiir_PhaseHalfPi8Avx_HEADER_INCLUDED 129 | 130 | 131 | 132 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 133 | -------------------------------------------------------------------------------- /PhaseHalfPiFpuTpl.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPiFpuTpl.h 4 | Author: Laurent de Soras, 2005 5 | 6 | From the input signal, generates two signals with a pi/2 phase shift, using 7 | FPU. 8 | 9 | Template parameters: 10 | - NC: number of coefficients, > 0 11 | 12 | --- Legal stuff --- 13 | 14 | This program is free software. It comes without any warranty, to 15 | the extent permitted by applicable law. You can redistribute it 16 | and/or modify it under the terms of the Do What The Fuck You Want 17 | To Public License, Version 2, as published by Sam Hocevar. See 18 | http://sam.zoy.org/wtfpl/COPYING for more details. 19 | 20 | *Tab=3***********************************************************************/ 21 | 22 | 23 | 24 | #if ! defined (hiir_PhaseHalfPiFpuTpl_HEADER_INCLUDED) 25 | #define hiir_PhaseHalfPiFpuTpl_HEADER_INCLUDED 26 | 27 | #if defined (_MSC_VER) 28 | #pragma once 29 | #pragma warning (4 : 4250) // "Inherits via dominance." 30 | #endif 31 | 32 | 33 | 34 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 35 | 36 | #include "hiir/def.h" 37 | 38 | #include 39 | 40 | 41 | 42 | namespace hiir 43 | { 44 | 45 | 46 | 47 | template 48 | class PhaseHalfPiFpuTpl 49 | { 50 | 51 | static_assert ((NC > 0), "Number of coefficient must be positive."); 52 | 53 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 54 | 55 | public: 56 | 57 | typedef DT DataType; 58 | static const int _nbr_chn = 1; 59 | 60 | enum { NBR_COEFS = NC }; 61 | 62 | PhaseHalfPiFpuTpl (); 63 | PhaseHalfPiFpuTpl (const PhaseHalfPiFpuTpl &other) = default; 64 | PhaseHalfPiFpuTpl (PhaseHalfPiFpuTpl &&other) = default; 65 | ~PhaseHalfPiFpuTpl () = default; 66 | 67 | PhaseHalfPiFpuTpl & 68 | operator = (const PhaseHalfPiFpuTpl &other) = default; 69 | PhaseHalfPiFpuTpl & 70 | operator = (PhaseHalfPiFpuTpl &&other) = default; 71 | 72 | void set_coefs (const double coef_arr []); 73 | 74 | hiir_FORCEINLINE void 75 | process_sample (DataType &out_0, DataType &out_1, DataType input); 76 | void process_block (DataType out_0_ptr [], DataType out_1_ptr [], const DataType in_ptr [], long nbr_spl); 77 | 78 | void clear_buffers (); 79 | 80 | 81 | 82 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 83 | 84 | protected: 85 | 86 | 87 | 88 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 89 | 90 | private: 91 | 92 | enum { NBR_PHASES = 2 }; 93 | 94 | typedef std::array HyperGluar; 95 | 96 | class Memory 97 | { 98 | public: 99 | HyperGluar _x; 100 | HyperGluar _y; 101 | }; 102 | 103 | typedef std::array MemoryBiPhase; 104 | 105 | HyperGluar _coef; 106 | MemoryBiPhase _mem; 107 | DataType _prev; 108 | int _phase; // 0 or 1 109 | 110 | 111 | 112 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 113 | 114 | private: 115 | 116 | bool operator == (const PhaseHalfPiFpuTpl &other) = delete; 117 | bool operator != (const PhaseHalfPiFpuTpl &other) = delete; 118 | 119 | }; // class PhaseHalfPiFpuTpl 120 | 121 | 122 | 123 | } // namespace hiir 124 | 125 | 126 | 127 | #include "hiir/PhaseHalfPiFpuTpl.hpp" 128 | 129 | 130 | 131 | #endif // hiir_PhaseHalfPiFpuTpl_HEADER_INCLUDED 132 | 133 | 134 | 135 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 136 | -------------------------------------------------------------------------------- /Upsampler2xNeonOld.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2xNeonOld.h 4 | Author: Laurent de Soras, 2016 5 | 6 | Upsamples by a factor 2 the input signal, using NEON instruction set. 7 | 8 | This object must be aligned on a 16-byte boundary! 9 | 10 | If the number of coefficients is 2 or 3 modulo 4, the output is delayed from 11 | 1 sample, compared to the theoretical formula (or FPU implementation). 12 | 13 | Template parameters: 14 | - NC: number of coefficients, > 0 15 | 16 | --- Legal stuff --- 17 | 18 | This program is free software. It comes without any warranty, to 19 | the extent permitted by applicable law. You can redistribute it 20 | and/or modify it under the terms of the Do What The Fuck You Want 21 | To Public License, Version 2, as published by Sam Hocevar. See 22 | http://sam.zoy.org/wtfpl/COPYING for more details. 23 | 24 | *Tab=3***********************************************************************/ 25 | 26 | 27 | 28 | #pragma once 29 | #if ! defined (hiir_Upsampler2xNeonOld_HEADER_INCLUDED) 30 | #define hiir_Upsampler2xNeonOld_HEADER_INCLUDED 31 | 32 | #if defined (_MSC_VER) 33 | #pragma warning (4 : 4250) 34 | #endif 35 | 36 | 37 | 38 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 39 | 40 | #include "hiir/StageDataNeonV4.h" 41 | 42 | #include 43 | 44 | 45 | 46 | namespace hiir 47 | { 48 | 49 | 50 | 51 | template 52 | class Upsampler2xNeonOld 53 | { 54 | 55 | static_assert ((NC > 0), "Number of coefficient must be positive."); 56 | 57 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 58 | 59 | public: 60 | 61 | typedef float DataType; 62 | static const int _nbr_chn = 1; 63 | 64 | enum { NBR_COEFS = NC }; 65 | 66 | Upsampler2xNeonOld (); 67 | Upsampler2xNeonOld (const Upsampler2xNeonOld &other) = default; 68 | Upsampler2xNeonOld (Upsampler2xNeonOld &&other) = default; 69 | ~Upsampler2xNeonOld () = default; 70 | 71 | Upsampler2xNeonOld & 72 | operator = (const Upsampler2xNeonOld &other) = default; 73 | Upsampler2xNeonOld & 74 | operator = (Upsampler2xNeonOld &&other) = default; 75 | 76 | void set_coefs (const double coef_arr [NBR_COEFS]); 77 | inline void process_sample (float &out_0, float &out_1, float input); 78 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 79 | void clear_buffers (); 80 | 81 | 82 | 83 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 84 | 85 | protected: 86 | 87 | 88 | 89 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 90 | 91 | private: 92 | 93 | enum { STAGE_WIDTH = 4 }; 94 | enum { NBR_STAGES = (NBR_COEFS + STAGE_WIDTH - 1) / STAGE_WIDTH }; 95 | 96 | typedef std::array Filter; // Stage 0 contains only input memory 97 | 98 | Filter _filter; // Should be the first member (thus easier to align) 99 | 100 | 101 | 102 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 103 | 104 | private: 105 | 106 | bool operator == (const Upsampler2xNeonOld &other) const = delete; 107 | bool operator != (const Upsampler2xNeonOld &other) const = delete; 108 | 109 | }; // class Upsampler2xNeonOld 110 | 111 | 112 | 113 | } // namespace hiir 114 | 115 | 116 | 117 | #include "hiir/Upsampler2xNeonOld.hpp" 118 | 119 | 120 | 121 | #endif // hiir_Upsampler2xNeonOld_HEADER_INCLUDED 122 | 123 | 124 | 125 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 126 | -------------------------------------------------------------------------------- /PhaseHalfPi3dnow.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPi3dnow.h 4 | Author: Laurent de Soras, 2005 5 | 6 | From the input signal, generates two signals with a pi/2 phase shift, using 7 | 3DNow! instruction set. 8 | 9 | Template parameters: 10 | - NC: number of coefficients, > 0 11 | 12 | --- Legal stuff --- 13 | 14 | This program is free software. It comes without any warranty, to 15 | the extent permitted by applicable law. You can redistribute it 16 | and/or modify it under the terms of the Do What The Fuck You Want 17 | To Public License, Version 2, as published by Sam Hocevar. See 18 | http://sam.zoy.org/wtfpl/COPYING for more details. 19 | 20 | *Tab=3***********************************************************************/ 21 | 22 | 23 | 24 | #if ! defined (hiir_PhaseHalfPi3dnow_HEADER_INCLUDED) 25 | #define hiir_PhaseHalfPi3dnow_HEADER_INCLUDED 26 | 27 | #if defined (_MSC_VER) 28 | #pragma once 29 | #pragma warning (4 : 4250) // "Inherits via dominance." 30 | #endif 31 | 32 | 33 | 34 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 35 | 36 | #include "hiir/def.h" 37 | #include "hiir/StageData3dnow.h" 38 | 39 | #include 40 | 41 | 42 | 43 | namespace hiir 44 | { 45 | 46 | 47 | 48 | template 49 | class PhaseHalfPi3dnow 50 | { 51 | 52 | static_assert ((NC > 0), "Number of coefficient must be positive."); 53 | 54 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 55 | 56 | public: 57 | 58 | typedef float DataType; 59 | static const int _nbr_chn = 1; 60 | 61 | enum { NBR_COEFS = NC }; 62 | 63 | PhaseHalfPi3dnow (); 64 | PhaseHalfPi3dnow (const PhaseHalfPi3dnow &other) = default; 65 | PhaseHalfPi3dnow (PhaseHalfPi3dnow &&other) = default; 66 | ~PhaseHalfPi3dnow () = default; 67 | 68 | PhaseHalfPi3dnow & 69 | operator = (const PhaseHalfPi3dnow &other) = default; 70 | PhaseHalfPi3dnow & 71 | operator = (PhaseHalfPi3dnow &&other) = default; 72 | 73 | void set_coefs (const double coef_arr []); 74 | 75 | hiir_FORCEINLINE void 76 | process_sample (float &out_0, float &out_1, float input); 77 | void process_block (float out_0_ptr [], float out_1_ptr [], const float in_ptr [], long nbr_spl); 78 | 79 | void clear_buffers (); 80 | 81 | 82 | 83 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 84 | 85 | protected: 86 | 87 | 88 | 89 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 90 | 91 | private: 92 | 93 | enum { STAGE_WIDTH = 2 }; 94 | enum { NBR_STAGES = (NBR_COEFS + STAGE_WIDTH-1) / STAGE_WIDTH }; 95 | enum { NBR_PHASES = 2 }; 96 | 97 | typedef std::array Filter; // Stage 0 contains only input memory 98 | typedef std::array FilterBiPhase; 99 | 100 | FilterBiPhase _filter; // Should be the first member (thus easier to align) 101 | float _prev; 102 | int _phase; // 0 or 1 103 | 104 | 105 | 106 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 107 | 108 | private: 109 | 110 | bool operator == (const PhaseHalfPi3dnow &other) = delete; 111 | bool operator != (const PhaseHalfPi3dnow &other) = delete; 112 | 113 | }; // class PhaseHalfPi3dnow 114 | 115 | 116 | 117 | } // namespace hiir 118 | 119 | 120 | 121 | #include "hiir/PhaseHalfPi3dnow.hpp" 122 | 123 | 124 | 125 | #endif // hiir_PhaseHalfPi3dnow_HEADER_INCLUDED 126 | 127 | 128 | 129 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 130 | -------------------------------------------------------------------------------- /PhaseHalfPi4F64Avx.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPi4F64Avx.h 4 | Author: Laurent de Soras, 2020 5 | 6 | From the input signal, generates two signals with a pi/2 phase shift, using 7 | AVX instruction set. Works on vectors of 4 double. 8 | 9 | This object must be aligned on a 32-byte boundary! 10 | 11 | Template parameters: 12 | - NC: number of coefficients, > 0 13 | 14 | --- Legal stuff --- 15 | 16 | This program is free software. It comes without any warranty, to 17 | the extent permitted by applicable law. You can redistribute it 18 | and/or modify it under the terms of the Do What The Fuck You Want 19 | To Public License, Version 2, as published by Sam Hocevar. See 20 | http://www.wtfpl.net/ for more details. 21 | 22 | *Tab=3***********************************************************************/ 23 | 24 | 25 | 26 | #pragma once 27 | #if ! defined (hiir_PhaseHalfPi4F64Avx_HEADER_INCLUDED) 28 | #define hiir_PhaseHalfPi4F64Avx_HEADER_INCLUDED 29 | 30 | 31 | 32 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | #include "hiir/def.h" 35 | #include "hiir/StageDataF64Avx.h" 36 | 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | namespace hiir 44 | { 45 | 46 | 47 | 48 | template 49 | class PhaseHalfPi4F64Avx 50 | { 51 | 52 | static_assert ((NC > 0), "Number of coefficient must be positive."); 53 | 54 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 55 | 56 | public: 57 | 58 | typedef double DataType; 59 | static const int _nbr_chn = 4; 60 | 61 | enum { NBR_COEFS = NC }; 62 | 63 | PhaseHalfPi4F64Avx (); 64 | PhaseHalfPi4F64Avx (const PhaseHalfPi4F64Avx &other) = default; 65 | PhaseHalfPi4F64Avx (PhaseHalfPi4F64Avx &&other) = default; 66 | ~PhaseHalfPi4F64Avx () = default; 67 | 68 | PhaseHalfPi4F64Avx & 69 | operator = (const PhaseHalfPi4F64Avx &other) = default; 70 | PhaseHalfPi4F64Avx & 71 | operator = (PhaseHalfPi4F64Avx &&other) = default; 72 | 73 | void set_coefs (const double coef_arr []); 74 | 75 | hiir_FORCEINLINE void 76 | process_sample (__m256d &out_0, __m256d &out_1, __m256d input); 77 | void process_block (double out_0_ptr [], double out_1_ptr [], const double in_ptr [], long nbr_spl); 78 | 79 | void clear_buffers (); 80 | 81 | 82 | 83 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 84 | 85 | protected: 86 | 87 | 88 | 89 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 90 | 91 | private: 92 | 93 | static const int _nbr_phases = 2; 94 | 95 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 96 | 97 | typedef std::array FilterBiPhase; 98 | 99 | FilterBiPhase _bifilter; 100 | union 101 | { 102 | __m256d _prev4; // Just to ensure alignement 103 | double _prev [_nbr_chn]; 104 | }; 105 | int _phase; // 0 or 1 106 | 107 | 108 | 109 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 110 | 111 | private: 112 | 113 | bool operator == (const PhaseHalfPi4F64Avx &other) const = delete; 114 | bool operator != (const PhaseHalfPi4F64Avx &other) const = delete; 115 | 116 | }; // class PhaseHalfPi4F64Avx 117 | 118 | 119 | 120 | } // namespace hiir 121 | 122 | 123 | 124 | #include "hiir/PhaseHalfPi4F64Avx.hpp" 125 | 126 | 127 | 128 | #endif // hiir_PhaseHalfPi4F64Avx_HEADER_INCLUDED 129 | 130 | 131 | 132 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 133 | -------------------------------------------------------------------------------- /PhaseHalfPi2F64Sse2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPi2F64Sse2.h 4 | Author: Laurent de Soras, 2020 5 | 6 | From the input signal, generates two signals with a pi/2 phase shift, using 7 | SSE instruction set. Works on vectors of 2 double. 8 | 9 | This object must be aligned on a 16-byte boundary! 10 | 11 | Template parameters: 12 | - NC: number of coefficients, > 0 13 | 14 | --- Legal stuff --- 15 | 16 | This program is free software. It comes without any warranty, to 17 | the extent permitted by applicable law. You can redistribute it 18 | and/or modify it under the terms of the Do What The Fuck You Want 19 | To Public License, Version 2, as published by Sam Hocevar. See 20 | http://www.wtfpl.net/ for more details. 21 | 22 | *Tab=3***********************************************************************/ 23 | 24 | 25 | 26 | #pragma once 27 | #if ! defined (hiir_PhaseHalfPi2F64Sse2_HEADER_INCLUDED) 28 | #define hiir_PhaseHalfPi2F64Sse2_HEADER_INCLUDED 29 | 30 | 31 | 32 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | #include "hiir/def.h" 35 | #include "hiir/StageDataF64Sse2.h" 36 | 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | namespace hiir 44 | { 45 | 46 | 47 | 48 | template 49 | class PhaseHalfPi2F64Sse2 50 | { 51 | 52 | static_assert ((NC > 0), "Number of coefficient must be positive."); 53 | 54 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 55 | 56 | public: 57 | 58 | typedef double DataType; 59 | static const int _nbr_chn = 2; 60 | 61 | enum { NBR_COEFS = NC }; 62 | 63 | PhaseHalfPi2F64Sse2 (); 64 | PhaseHalfPi2F64Sse2 (const PhaseHalfPi2F64Sse2 &other) = default; 65 | PhaseHalfPi2F64Sse2 (PhaseHalfPi2F64Sse2 &&other) = default; 66 | ~PhaseHalfPi2F64Sse2 () = default; 67 | 68 | PhaseHalfPi2F64Sse2 & 69 | operator = (const PhaseHalfPi2F64Sse2 &other) = default; 70 | PhaseHalfPi2F64Sse2 & 71 | operator = (PhaseHalfPi2F64Sse2 &&other) = default; 72 | 73 | void set_coefs (const double coef_arr []); 74 | 75 | hiir_FORCEINLINE void 76 | process_sample (__m128d &out_0, __m128d &out_1, __m128d input); 77 | void process_block (double out_0_ptr [], double out_1_ptr [], const double in_ptr [], long nbr_spl); 78 | 79 | void clear_buffers (); 80 | 81 | 82 | 83 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 84 | 85 | protected: 86 | 87 | 88 | 89 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 90 | 91 | private: 92 | 93 | static const int _nbr_phases = 2; 94 | 95 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 96 | 97 | typedef std::array FilterBiPhase; 98 | 99 | FilterBiPhase _bifilter; 100 | union 101 | { 102 | __m128d _prev4; // Just to ensure alignement 103 | double _prev [4]; 104 | }; 105 | int _phase; // 0 or 1 106 | 107 | 108 | 109 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 110 | 111 | private: 112 | 113 | bool operator == (const PhaseHalfPi2F64Sse2 &other) const = delete; 114 | bool operator != (const PhaseHalfPi2F64Sse2 &other) const = delete; 115 | 116 | }; // class PhaseHalfPi2F64Sse2 117 | 118 | 119 | 120 | } // namespace hiir 121 | 122 | 123 | 124 | #include "hiir/PhaseHalfPi2F64Sse2.hpp" 125 | 126 | 127 | 128 | #endif // hiir_PhaseHalfPi2F64Sse2_HEADER_INCLUDED 129 | 130 | 131 | 132 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 133 | -------------------------------------------------------------------------------- /PhaseHalfPi16Avx512.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPi16Avx512.h 4 | Author: Laurent de Soras, 2020 5 | 6 | From the input signal, generates two signals with a pi/2 phase shift, using 7 | AVX-512 instruction set. Works on vectors of 16 float. 8 | 9 | This object must be aligned on a 64-byte boundary! 10 | 11 | Template parameters: 12 | - NC: number of coefficients, > 0 13 | 14 | --- Legal stuff --- 15 | 16 | This program is free software. It comes without any warranty, to 17 | the extent permitted by applicable law. You can redistribute it 18 | and/or modify it under the terms of the Do What The Fuck You Want 19 | To Public License, Version 2, as published by Sam Hocevar. See 20 | http://www.wtfpl.net/ for more details. 21 | 22 | *Tab=3***********************************************************************/ 23 | 24 | 25 | 26 | #pragma once 27 | #if ! defined (hiir_PhaseHalfPi16Avx512_HEADER_INCLUDED) 28 | #define hiir_PhaseHalfPi16Avx512_HEADER_INCLUDED 29 | 30 | 31 | 32 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | #include "hiir/def.h" 35 | #include "hiir/StageDataAvx512.h" 36 | 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | namespace hiir 44 | { 45 | 46 | 47 | 48 | template 49 | class PhaseHalfPi16Avx512 50 | { 51 | 52 | static_assert ((NC > 0), "Number of coefficient must be positive."); 53 | 54 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 55 | 56 | public: 57 | 58 | typedef float DataType; 59 | static const int _nbr_chn = 16; 60 | 61 | enum { NBR_COEFS = NC }; 62 | 63 | PhaseHalfPi16Avx512 (); 64 | PhaseHalfPi16Avx512 (const PhaseHalfPi16Avx512 &other) = default; 65 | PhaseHalfPi16Avx512 (PhaseHalfPi16Avx512 &&other) = default; 66 | ~PhaseHalfPi16Avx512 () = default; 67 | 68 | PhaseHalfPi16Avx512 & 69 | operator = (const PhaseHalfPi16Avx512 &other) = default; 70 | PhaseHalfPi16Avx512 & 71 | operator = (PhaseHalfPi16Avx512 &&other) = default; 72 | 73 | void set_coefs (const double coef_arr []); 74 | 75 | hiir_FORCEINLINE void 76 | process_sample (__m512 &out_0, __m512 &out_1, __m512 input); 77 | void process_block (float out_0_ptr [], float out_1_ptr [], const float in_ptr [], long nbr_spl); 78 | 79 | void clear_buffers (); 80 | 81 | 82 | 83 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 84 | 85 | protected: 86 | 87 | 88 | 89 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 90 | 91 | private: 92 | 93 | static const int _nbr_phases = 2; 94 | 95 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 96 | 97 | typedef std::array FilterBiPhase; 98 | 99 | FilterBiPhase _bifilter; 100 | union 101 | { 102 | __m512 _prev16; // Just to ensure alignement 103 | float _prev [_nbr_chn]; 104 | }; 105 | int _phase; // 0 or 1 106 | 107 | 108 | 109 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 110 | 111 | private: 112 | 113 | bool operator == (const PhaseHalfPi16Avx512 &other) const = delete; 114 | bool operator != (const PhaseHalfPi16Avx512 &other) const = delete; 115 | 116 | }; // class PhaseHalfPi16Avx512 117 | 118 | 119 | 120 | } // namespace hiir 121 | 122 | 123 | 124 | #include "hiir/PhaseHalfPi16Avx512.hpp" 125 | 126 | 127 | 128 | #endif // hiir_PhaseHalfPi16Avx512_HEADER_INCLUDED 129 | 130 | 131 | 132 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 133 | -------------------------------------------------------------------------------- /Downsampler2x3dnow.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Downsampler2x3dnow.h 4 | Author: Laurent de Soras, 2005 5 | 6 | Downsamples by a factor 2 the input signal, using 3DNow! instruction set. 7 | 8 | Template parameters: 9 | - NC: number of coefficients, > 0 10 | 11 | --- Legal stuff --- 12 | 13 | This program is free software. It comes without any warranty, to 14 | the extent permitted by applicable law. You can redistribute it 15 | and/or modify it under the terms of the Do What The Fuck You Want 16 | To Public License, Version 2, as published by Sam Hocevar. See 17 | http://sam.zoy.org/wtfpl/COPYING for more details. 18 | 19 | *Tab=3***********************************************************************/ 20 | 21 | 22 | 23 | #if ! defined (hiir_Downsampler2x3dnow_HEADER_INCLUDED) 24 | #define hiir_Downsampler2x3dnow_HEADER_INCLUDED 25 | 26 | #if defined (_MSC_VER) 27 | #pragma once 28 | #pragma warning (4 : 4250) // "Inherits via dominance." 29 | #endif 30 | 31 | 32 | 33 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 34 | 35 | #include "hiir/def.h" 36 | #include "hiir/StageData3dnow.h" 37 | 38 | #include 39 | 40 | 41 | 42 | namespace hiir 43 | { 44 | 45 | 46 | 47 | template 48 | class Downsampler2x3dnow 49 | { 50 | 51 | static_assert ((NC > 0), "Number of coefficient must be positive."); 52 | 53 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 54 | 55 | public: 56 | 57 | typedef float DataType; 58 | static const int _nbr_chn = 1; 59 | 60 | enum { NBR_COEFS = NC }; 61 | 62 | Downsampler2x3dnow (); 63 | Downsampler2x3dnow (const Downsampler2x3dnow &other) = default; 64 | Downsampler2x3dnow (Downsampler2x3dnow &&other) = default; 65 | ~Downsampler2x3dnow () = default; 66 | 67 | Downsampler2x3dnow & 68 | operator = (const Downsampler2x3dnow &other) = default; 69 | Downsampler2x3dnow & 70 | operator = (Downsampler2x3dnow &&other) = default; 71 | 72 | void set_coefs (const double coef_arr []); 73 | 74 | hiir_FORCEINLINE float 75 | process_sample (const float in_ptr [2]); 76 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 77 | 78 | hiir_FORCEINLINE void 79 | process_sample_split (float &low, float &high, const float in_ptr [2]); 80 | void process_block_split (float out_l_ptr [], float out_h_ptr [], const float in_ptr [], long nbr_spl); 81 | 82 | void clear_buffers (); 83 | 84 | 85 | 86 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 87 | 88 | protected: 89 | 90 | 91 | 92 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 93 | 94 | private: 95 | 96 | enum { STAGE_WIDTH = 2 }; 97 | enum { NBR_STAGES = (NBR_COEFS + STAGE_WIDTH - 1) / STAGE_WIDTH }; 98 | 99 | typedef std::array Filter; // Stage 0 contains only input memory 100 | 101 | Filter _filter; // Should be the first member (thus easier to align) 102 | 103 | 104 | 105 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 106 | 107 | private: 108 | 109 | bool operator == (const Downsampler2x3dnow &other) = delete; 110 | bool operator != (const Downsampler2x3dnow &other) = delete; 111 | 112 | }; // class Downsampler2x3dnow 113 | 114 | 115 | 116 | } // namespace hiir 117 | 118 | 119 | 120 | #include "hiir/Downsampler2x3dnow.hpp" 121 | 122 | 123 | 124 | #endif // hiir_Downsampler2x3dnow_HEADER_INCLUDED 125 | 126 | 127 | 128 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 129 | -------------------------------------------------------------------------------- /Upsampler2xSseOld.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Upsampler2xSseOld.h 4 | Author: Laurent de Soras, 2005 5 | 6 | Upsamples by a factor 2 the input signal, using SSE instruction set. 7 | 8 | This object must be aligned on a 16-byte boundary! 9 | 10 | If the number of coefficients is 2 or 3 modulo 4, the output is delayed from 11 | 1 sample, compared to the theoretical formula (or FPU implementation). 12 | 13 | Template parameters: 14 | - NC: number of coefficients, > 0 15 | 16 | --- Legal stuff --- 17 | 18 | This program is free software. It comes without any warranty, to 19 | the extent permitted by applicable law. You can redistribute it 20 | and/or modify it under the terms of the Do What The Fuck You Want 21 | To Public License, Version 2, as published by Sam Hocevar. See 22 | http://sam.zoy.org/wtfpl/COPYING for more details. 23 | 24 | *Tab=3***********************************************************************/ 25 | 26 | 27 | 28 | #if ! defined (hiir_Upsampler2xSseOld_HEADER_INCLUDED) 29 | #define hiir_Upsampler2xSseOld_HEADER_INCLUDED 30 | 31 | #if defined (_MSC_VER) 32 | #pragma once 33 | #pragma warning (4 : 4250) // "Inherits via dominance." 34 | #endif 35 | 36 | 37 | 38 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 39 | 40 | #include "hiir/def.h" 41 | #include "hiir/StageDataSse.h" 42 | 43 | #include 44 | 45 | #include 46 | 47 | 48 | 49 | namespace hiir 50 | { 51 | 52 | 53 | 54 | template 55 | class Upsampler2xSseOld 56 | { 57 | 58 | static_assert ((NC > 0), "Number of coefficient must be positive."); 59 | 60 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 61 | 62 | public: 63 | 64 | typedef float DataType; 65 | static const int _nbr_chn = 1; 66 | 67 | enum { NBR_COEFS = NC }; 68 | 69 | Upsampler2xSseOld (); 70 | Upsampler2xSseOld (const Upsampler2xSseOld &other) = default; 71 | Upsampler2xSseOld (Upsampler2xSseOld &&other) = default; 72 | ~Upsampler2xSseOld () = default; 73 | 74 | Upsampler2xSseOld & 75 | operator = (const Upsampler2xSseOld &other) = default; 76 | Upsampler2xSseOld & 77 | operator = (Upsampler2xSseOld &&other) = default; 78 | 79 | void set_coefs (const double coef_arr [NBR_COEFS]); 80 | 81 | hiir_FORCEINLINE void 82 | process_sample (float &out_0, float &out_1, float input); 83 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 84 | 85 | void clear_buffers (); 86 | 87 | 88 | 89 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 90 | 91 | protected: 92 | 93 | 94 | 95 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 96 | 97 | private: 98 | 99 | enum { STAGE_WIDTH = 4 }; 100 | enum { NBR_STAGES = (NBR_COEFS + STAGE_WIDTH - 1) / STAGE_WIDTH }; 101 | 102 | typedef std::array Filter; // Stage 0 contains only input memory 103 | 104 | Filter _filter; // Should be the first member (thus easier to align) 105 | 106 | 107 | 108 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 109 | 110 | private: 111 | 112 | bool operator == (const Upsampler2xSseOld &other) const = delete; 113 | bool operator != (const Upsampler2xSseOld &other) const = delete; 114 | 115 | }; // class Upsampler2xSseOld 116 | 117 | 118 | 119 | } // namespace hiir 120 | 121 | 122 | 123 | #include "hiir/Upsampler2xSseOld.hpp" 124 | 125 | 126 | 127 | #endif // hiir_Upsampler2xSseOld_HEADER_INCLUDED 128 | 129 | 130 | 131 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 132 | -------------------------------------------------------------------------------- /PhaseHalfPi8F64Avx512.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPi8F64Avx512.h 4 | Author: Laurent de Soras, 2020 5 | 6 | From the input signal, generates two signals with a pi/2 phase shift, using 7 | AVX-512 instruction set. Works on vectors of 8 double. 8 | 9 | This object must be aligned on a 64-byte boundary! 10 | 11 | Template parameters: 12 | - NC: number of coefficients, > 0 13 | 14 | --- Legal stuff --- 15 | 16 | This program is free software. It comes without any warranty, to 17 | the extent permitted by applicable law. You can redistribute it 18 | and/or modify it under the terms of the Do What The Fuck You Want 19 | To Public License, Version 2, as published by Sam Hocevar. See 20 | http://www.wtfpl.net/ for more details. 21 | 22 | *Tab=3***********************************************************************/ 23 | 24 | 25 | 26 | #pragma once 27 | #if ! defined (hiir_PhaseHalfPi8F64Avx512_HEADER_INCLUDED) 28 | #define hiir_PhaseHalfPi8F64Avx512_HEADER_INCLUDED 29 | 30 | 31 | 32 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | #include "hiir/def.h" 35 | #include "hiir/StageDataF64Avx512.h" 36 | 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | namespace hiir 44 | { 45 | 46 | 47 | 48 | template 49 | class PhaseHalfPi8F64Avx512 50 | { 51 | 52 | static_assert ((NC > 0), "Number of coefficient must be positive."); 53 | 54 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 55 | 56 | public: 57 | 58 | typedef double DataType; 59 | static const int _nbr_chn = 8; 60 | 61 | enum { NBR_COEFS = NC }; 62 | 63 | PhaseHalfPi8F64Avx512 (); 64 | PhaseHalfPi8F64Avx512 (const PhaseHalfPi8F64Avx512 &other) = default; 65 | PhaseHalfPi8F64Avx512 (PhaseHalfPi8F64Avx512 &&other) = default; 66 | ~PhaseHalfPi8F64Avx512 () = default; 67 | 68 | PhaseHalfPi8F64Avx512 & 69 | operator = (const PhaseHalfPi8F64Avx512 &other) = default; 70 | PhaseHalfPi8F64Avx512 & 71 | operator = (PhaseHalfPi8F64Avx512 &&other) = default; 72 | 73 | void set_coefs (const double coef_arr []); 74 | 75 | hiir_FORCEINLINE void 76 | process_sample (__m512d &out_0, __m512d &out_1, __m512d input); 77 | void process_block (double out_0_ptr [], double out_1_ptr [], const double in_ptr [], long nbr_spl); 78 | 79 | void clear_buffers (); 80 | 81 | 82 | 83 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 84 | 85 | protected: 86 | 87 | 88 | 89 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 90 | 91 | private: 92 | 93 | static const int _nbr_phases = 2; 94 | 95 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 96 | 97 | typedef std::array FilterBiPhase; 98 | 99 | FilterBiPhase _bifilter; 100 | union 101 | { 102 | __m512d _prev8; // Just to ensure alignement 103 | double _prev [_nbr_chn]; 104 | }; 105 | int _phase; // 0 or 1 106 | 107 | 108 | 109 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 110 | 111 | private: 112 | 113 | bool operator == (const PhaseHalfPi8F64Avx512 &other) const = delete; 114 | bool operator != (const PhaseHalfPi8F64Avx512 &other) const = delete; 115 | 116 | }; // class PhaseHalfPi8F64Avx512 117 | 118 | 119 | 120 | } // namespace hiir 121 | 122 | 123 | 124 | #include "hiir/PhaseHalfPi8F64Avx512.hpp" 125 | 126 | 127 | 128 | #endif // hiir_PhaseHalfPi8F64Avx512_HEADER_INCLUDED 129 | 130 | 131 | 132 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 133 | -------------------------------------------------------------------------------- /PhaseHalfPi4Neon.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPi4Neon.h 4 | Author: Laurent de Soras, 2020 5 | 6 | From the input signal, generates two signals with a pi/2 phase shift, using 7 | NEON instruction set. Works on vectors of 4 float. 8 | 9 | This object must be aligned on a 16-byte boundary! 10 | 11 | Template parameters: 12 | - NC: number of coefficients, > 0 13 | 14 | --- Legal stuff --- 15 | 16 | This program is free software. It comes without any warranty, to 17 | the extent permitted by applicable law. You can redistribute it 18 | and/or modify it under the terms of the Do What The Fuck You Want 19 | To Public License, Version 2, as published by Sam Hocevar. See 20 | http://www.wtfpl.net/ for more details. 21 | 22 | *Tab=3***********************************************************************/ 23 | 24 | 25 | 26 | #pragma once 27 | #if ! defined (hiir_PhaseHalfPi4Neon_HEADER_INCLUDED) 28 | #define hiir_PhaseHalfPi4Neon_HEADER_INCLUDED 29 | 30 | 31 | 32 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 33 | 34 | #include "hiir/def.h" 35 | #include "hiir/StageDataNeonV4.h" 36 | 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | namespace hiir 44 | { 45 | 46 | 47 | 48 | template 49 | class PhaseHalfPi4Neon 50 | { 51 | 52 | static_assert ((NC > 0), "Number of coefficient must be positive."); 53 | 54 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 55 | 56 | public: 57 | 58 | typedef float DataType; 59 | static const int _nbr_chn = 4; 60 | 61 | enum { NBR_COEFS = NC }; 62 | 63 | PhaseHalfPi4Neon (); 64 | PhaseHalfPi4Neon (const PhaseHalfPi4Neon &other) = default; 65 | PhaseHalfPi4Neon (PhaseHalfPi4Neon &&other) = default; 66 | ~PhaseHalfPi4Neon () = default; 67 | 68 | PhaseHalfPi4Neon & 69 | operator = (const PhaseHalfPi4Neon &other) = default; 70 | PhaseHalfPi4Neon & 71 | operator = (PhaseHalfPi4Neon &&other) = default; 72 | 73 | void set_coefs (const double coef_arr []); 74 | 75 | hiir_FORCEINLINE void 76 | process_sample (float32x4_t &out_0, float32x4_t &out_1, float32x4_t input); 77 | void process_block (float out_0_ptr [], float out_1_ptr [], const float in_ptr [], long nbr_spl); 78 | 79 | void clear_buffers (); 80 | 81 | 82 | 83 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 84 | 85 | protected: 86 | 87 | 88 | 89 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 90 | 91 | private: 92 | 93 | static const int _nbr_phases = 2; 94 | 95 | typedef std::array Filter; // Stages 0 and 1 contain only input memories 96 | 97 | typedef std::array FilterBiPhase; 98 | 99 | FilterBiPhase _bifilter; 100 | union 101 | { 102 | __attribute__ ((aligned (16))) float32x4_t 103 | _prev4; // Just to ensure alignement 104 | __attribute__ ((aligned (16))) float 105 | _prev [4]; 106 | }; 107 | int _phase; // 0 or 1 108 | 109 | 110 | 111 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 112 | 113 | private: 114 | 115 | bool operator == (const PhaseHalfPi4Neon &other) const = delete; 116 | bool operator != (const PhaseHalfPi4Neon &other) const = delete; 117 | 118 | }; // class PhaseHalfPi4Neon 119 | 120 | 121 | 122 | } // namespace hiir 123 | 124 | 125 | 126 | #include "hiir/PhaseHalfPi4Neon.hpp" 127 | 128 | 129 | 130 | #endif // hiir_PhaseHalfPi4Neon_HEADER_INCLUDED 131 | 132 | 133 | 134 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 135 | -------------------------------------------------------------------------------- /Downsampler2xNeon.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | Downsampler2xNeon.h 4 | Author: Laurent de Soras, 2020 5 | 6 | Downsamples by a factor 2 the input signal, using NEON instruction set. 7 | 8 | This object must be aligned on a 16-byte boundary! 9 | 10 | Template parameters: 11 | - NC: number of coefficients, > 0 12 | 13 | --- Legal stuff --- 14 | 15 | This program is free software. It comes without any warranty, to 16 | the extent permitted by applicable law. You can redistribute it 17 | and/or modify it under the terms of the Do What The Fuck You Want 18 | To Public License, Version 2, as published by Sam Hocevar. See 19 | http://sam.zoy.org/wtfpl/COPYING for more details. 20 | 21 | *Tab=3***********************************************************************/ 22 | 23 | 24 | 25 | #pragma once 26 | #if ! defined (hiir_Downsampler2xNeon_HEADER_INCLUDED) 27 | #define hiir_Downsampler2xNeon_HEADER_INCLUDED 28 | 29 | #if defined (_MSC_VER) 30 | #pragma warning (4 : 4250) 31 | #endif 32 | 33 | 34 | 35 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 36 | 37 | #include "hiir/def.h" 38 | #include "hiir/StageDataNeonV2.h" 39 | 40 | #include 41 | 42 | 43 | 44 | namespace hiir 45 | { 46 | 47 | 48 | 49 | template 50 | class Downsampler2xNeon 51 | { 52 | 53 | static_assert ((NC > 0), "Number of coefficient must be positive."); 54 | 55 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 56 | 57 | public: 58 | 59 | typedef float DataType; 60 | static const int _nbr_chn = 1; 61 | 62 | enum { NBR_COEFS = NC }; 63 | 64 | Downsampler2xNeon (); 65 | Downsampler2xNeon (const Downsampler2xNeon &other) = default; 66 | Downsampler2xNeon (Downsampler2xNeon &&other) = default; 67 | ~Downsampler2xNeon () = default; 68 | 69 | Downsampler2xNeon & 70 | operator = (const Downsampler2xNeon &other) = default; 71 | Downsampler2xNeon & 72 | operator = (Downsampler2xNeon &&other) = default; 73 | 74 | void set_coefs (const double coef_arr []); 75 | 76 | hiir_FORCEINLINE float 77 | process_sample (const float in_ptr [2]); 78 | void process_block (float out_ptr [], const float in_ptr [], long nbr_spl); 79 | 80 | hiir_FORCEINLINE void 81 | process_sample_split (float &low, float &high, const float in_ptr [2]); 82 | void process_block_split (float out_l_ptr [], float out_h_ptr [], const float in_ptr [], long nbr_spl); 83 | 84 | void clear_buffers (); 85 | 86 | 87 | 88 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 89 | 90 | protected: 91 | 92 | 93 | 94 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 95 | 96 | private: 97 | 98 | static const int _stage_width = 2; 99 | static const int _nbr_stages = (NBR_COEFS + _stage_width - 1) / _stage_width; 100 | 101 | // Stage 0 contains only input memory 102 | typedef std::array Filter; 103 | 104 | Filter _filter; // Should be the first member (thus easier to align) 105 | 106 | 107 | 108 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 109 | 110 | private: 111 | 112 | bool operator == (const Downsampler2xNeon &other) const = delete; 113 | bool operator != (const Downsampler2xNeon &other) const = delete; 114 | 115 | }; // class Downsampler2xNeon 116 | 117 | 118 | 119 | } // namespace hiir 120 | 121 | 122 | 123 | #include "hiir/Downsampler2xNeon.hpp" 124 | 125 | 126 | 127 | #endif // hiir_Downsampler2xNeon_HEADER_INCLUDED 128 | 129 | 130 | 131 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 132 | -------------------------------------------------------------------------------- /PhaseHalfPiSse.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPiSse.h 4 | Author: Laurent de Soras, 2005 5 | 6 | From the input signal, generates two signals with a pi/2 phase shift, using 7 | SSE instruction set. 8 | 9 | This object must be aligned on a 16-byte boundary! 10 | 11 | If the number of coefficients is 2 or 3 modulo 4, the output is delayed from 12 | 1 sample, compared to the theoretical formula (or FPU implementation). 13 | 14 | Template parameters: 15 | - NC: number of coefficients, > 0 16 | 17 | --- Legal stuff --- 18 | 19 | This program is free software. It comes without any warranty, to 20 | the extent permitted by applicable law. You can redistribute it 21 | and/or modify it under the terms of the Do What The Fuck You Want 22 | To Public License, Version 2, as published by Sam Hocevar. See 23 | http://sam.zoy.org/wtfpl/COPYING for more details. 24 | 25 | *Tab=3***********************************************************************/ 26 | 27 | 28 | 29 | #if ! defined (hiir_PhaseHalfPiSse_HEADER_INCLUDED) 30 | #define hiir_PhaseHalfPiSse_HEADER_INCLUDED 31 | 32 | #if defined (_MSC_VER) 33 | #pragma once 34 | #pragma warning (4 : 4250) // "Inherits via dominance." 35 | #endif 36 | 37 | 38 | 39 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 40 | 41 | #include "hiir/def.h" 42 | #include "hiir/StageDataSse.h" 43 | 44 | #include 45 | 46 | #include 47 | 48 | 49 | 50 | namespace hiir 51 | { 52 | 53 | 54 | 55 | template 56 | class PhaseHalfPiSse 57 | { 58 | 59 | static_assert ((NC > 0), "Number of coefficient must be positive."); 60 | 61 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 62 | 63 | public: 64 | 65 | typedef float DataType; 66 | static const int _nbr_chn = 1; 67 | 68 | enum { NBR_COEFS = NC }; 69 | 70 | PhaseHalfPiSse (); 71 | PhaseHalfPiSse (const PhaseHalfPiSse &other) = default; 72 | PhaseHalfPiSse (PhaseHalfPiSse &&other) = default; 73 | ~PhaseHalfPiSse () = default; 74 | 75 | PhaseHalfPiSse & 76 | operator = (const PhaseHalfPiSse &other) = default; 77 | PhaseHalfPiSse & 78 | operator = (PhaseHalfPiSse &&other) = default; 79 | 80 | void set_coefs (const double coef_arr []); 81 | 82 | hiir_FORCEINLINE void 83 | process_sample (float &out_0, float &out_1, float input); 84 | void process_block (float out_0_ptr [], float out_1_ptr [], const float in_ptr [], long nbr_spl); 85 | 86 | void clear_buffers (); 87 | 88 | 89 | 90 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 91 | 92 | protected: 93 | 94 | 95 | 96 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 97 | 98 | private: 99 | 100 | enum { STAGE_WIDTH = 4 }; 101 | enum { NBR_STAGES = (NBR_COEFS + STAGE_WIDTH-1) / STAGE_WIDTH }; 102 | enum { NBR_PHASES = 2 }; 103 | 104 | typedef std::array Filter; // Stage 0 contains only input memory 105 | typedef std::array FilterBiPhase; 106 | 107 | FilterBiPhase _filter; // Should be the first member (thus easier to align) 108 | float _prev; 109 | int _phase; // 0 or 1 110 | 111 | 112 | 113 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 114 | 115 | private: 116 | 117 | bool operator == (const PhaseHalfPiSse &other) = delete; 118 | bool operator != (const PhaseHalfPiSse &other) = delete; 119 | 120 | }; // class PhaseHalfPiSse 121 | 122 | 123 | 124 | } // namespace hiir 125 | 126 | 127 | 128 | #include "hiir/PhaseHalfPiSse.hpp" 129 | 130 | 131 | 132 | #endif // hiir_PhaseHalfPiSse_HEADER_INCLUDED 133 | 134 | 135 | 136 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 137 | -------------------------------------------------------------------------------- /PhaseHalfPiNeon.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | 3 | PhaseHalfPiNeon.h 4 | Author: Laurent de Soras, 2016 5 | 6 | From the input signal, generates two signals with a pi/2 phase shift, using 7 | NEON instruction set. 8 | 9 | This object must be aligned on a 16-byte boundary! 10 | 11 | If the number of coefficients is 2 or 3 modulo 4, the output is delayed from 12 | 1 sample, compared to the theoretical formula (or FPU implementation). 13 | 14 | Template parameters: 15 | - NC: number of coefficients, > 0 16 | 17 | --- Legal stuff --- 18 | 19 | This program is free software. It comes without any warranty, to 20 | the extent permitted by applicable law. You can redistribute it 21 | and/or modify it under the terms of the Do What The Fuck You Want 22 | To Public License, Version 2, as published by Sam Hocevar. See 23 | http://sam.zoy.org/wtfpl/COPYING for more details. 24 | 25 | *Tab=3***********************************************************************/ 26 | 27 | 28 | 29 | #pragma once 30 | #if ! defined (hiir_PhaseHalfPiNeon_HEADER_INCLUDED) 31 | #define hiir_PhaseHalfPiNeon_HEADER_INCLUDED 32 | 33 | #if defined (_MSC_VER) 34 | #pragma warning (4 : 4250) 35 | #endif 36 | 37 | 38 | 39 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 40 | 41 | #include "hiir/def.h" 42 | #include "hiir/StageDataNeonV4.h" 43 | 44 | #include 45 | 46 | #include 47 | 48 | 49 | 50 | namespace hiir 51 | { 52 | 53 | 54 | 55 | template 56 | class PhaseHalfPiNeon 57 | { 58 | 59 | static_assert ((NC > 0), "Number of coefficient must be positive."); 60 | 61 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 62 | 63 | public: 64 | 65 | typedef float DataType; 66 | static const int _nbr_chn = 1; 67 | 68 | enum { NBR_COEFS = NC }; 69 | 70 | PhaseHalfPiNeon (); 71 | PhaseHalfPiNeon (const PhaseHalfPiNeon &other) = default; 72 | PhaseHalfPiNeon (PhaseHalfPiNeon &&other) = default; 73 | ~PhaseHalfPiNeon () = default; 74 | 75 | PhaseHalfPiNeon & 76 | operator = (const PhaseHalfPiNeon &other) = default; 77 | PhaseHalfPiNeon & 78 | operator = (PhaseHalfPiNeon &&other) = default; 79 | 80 | void set_coefs (const double coef_arr []); 81 | 82 | hiir_FORCEINLINE void 83 | process_sample (float &out_0, float &out_1, float input); 84 | void process_block (float out_0_ptr [], float out_1_ptr [], const float in_ptr [], long nbr_spl); 85 | 86 | void clear_buffers (); 87 | 88 | 89 | 90 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 91 | 92 | protected: 93 | 94 | 95 | 96 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 97 | 98 | private: 99 | 100 | enum { STAGE_WIDTH = 4 }; 101 | enum { NBR_STAGES = (NBR_COEFS + STAGE_WIDTH-1) / STAGE_WIDTH }; 102 | enum { NBR_PHASES = 2 }; 103 | 104 | typedef std::array Filter; // Stage 0 contains only input memory 105 | typedef std::array FilterBiPhase; 106 | 107 | FilterBiPhase _filter; // Should be the first member (thus easier to align) 108 | float _prev; 109 | int _phase; // 0 or 1 110 | 111 | 112 | 113 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 114 | 115 | private: 116 | 117 | bool operator == (const PhaseHalfPiNeon &other) const = delete; 118 | bool operator != (const PhaseHalfPiNeon &other) const = delete; 119 | 120 | }; // class PhaseHalfPiNeon 121 | 122 | 123 | 124 | } // namespace hiir 125 | 126 | 127 | 128 | #include "hiir/PhaseHalfPiNeon.hpp" 129 | 130 | 131 | 132 | #endif // hiir_PhaseHalfPiNeon_HEADER_INCLUDED 133 | 134 | 135 | 136 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ 137 | --------------------------------------------------------------------------------