├── README.md ├── HDRTools - ReadMe.txt ├── HDRTools ├── HDRTools.rc ├── ThreadPool.cpp ├── HDRTools.vcxproj.user ├── avs │ ├── filesystem.h │ ├── minmax.h │ ├── types.h │ ├── win.h │ ├── cpuid.h │ ├── capi.h │ ├── alignment.h │ ├── posix.h │ └── config.h ├── TransferFunctions.h ├── ThreadPoolDef.h ├── TransferFunctions.cpp ├── HDRTools.vcxproj.filters ├── ThreadPool.h ├── ThreadPoolInterface.h ├── MatrixClass.h ├── HDRTools.vcxproj ├── HDRTools_AVX2_asm.asm ├── HDRTools.h └── HDRTools_AVX2_asm_x64.asm └── HDRTools.sln /README.md: -------------------------------------------------------------------------------- 1 | # HDRTools 2 | Avisynth HDR Tools plugin 3 | -------------------------------------------------------------------------------- /HDRTools - ReadMe.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jpsdr/HDRTools/HEAD/HDRTools - ReadMe.txt -------------------------------------------------------------------------------- /HDRTools/HDRTools.rc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jpsdr/HDRTools/HEAD/HDRTools/HDRTools.rc -------------------------------------------------------------------------------- /HDRTools/ThreadPool.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jpsdr/HDRTools/HEAD/HDRTools/ThreadPool.cpp -------------------------------------------------------------------------------- /HDRTools/HDRTools.vcxproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /HDRTools/avs/filesystem.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Snippet copied from filesystem/README.md 4 | 5 | #if defined(__cplusplus) && __cplusplus >= 201703L && defined(__has_include) 6 | #if __has_include() 7 | #define GHC_USE_STD_FS 8 | #include 9 | namespace fs = std::filesystem; 10 | #endif 11 | #endif 12 | #ifndef GHC_USE_STD_FS 13 | #include 14 | namespace fs = ghc::filesystem; 15 | #endif 16 | -------------------------------------------------------------------------------- /HDRTools/TransferFunctions.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TransferFunctions 3 | * 4 | * OOTF,EOTF,OETF, etc... HDR and SDR core functions. 5 | * Copyright (C) 2019 JPSDR 6 | * 7 | * HDRTools is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * HDRTools is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | void Set_l_HLG(double lw); 24 | double HLG_OETF(double x); 25 | double HLG_inv_OETF(double x); 26 | double HLG_OOTF(double x); 27 | double HLG_inv_OOTF(double x); 28 | double inv_OETF(double x); 29 | double OETF(double x); 30 | double EOTF(double x); 31 | double PQ_OOTF(double x); 32 | double PQ_OOTF_Inv(double x); 33 | double PQ_EOTF(double x); 34 | double PQ_inv_EOTF(double x); 35 | -------------------------------------------------------------------------------- /HDRTools.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 11.00 3 | # Visual Studio 2010 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "HDRTools", "HDRTools\HDRTools.vcxproj", "{1820278E-F1C3-48E8-A951-EE5E95079370}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Win32 = Debug|Win32 9 | Debug|x64 = Debug|x64 10 | Release|Win32 = Release|Win32 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Debug|Win32.ActiveCfg = Debug|Win32 15 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Debug|Win32.Build.0 = Debug|Win32 16 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Debug|x64.ActiveCfg = Debug|x64 17 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Debug|x64.Build.0 = Debug|x64 18 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Release|Win32.ActiveCfg = Release|Win32 19 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Release|Win32.Build.0 = Release|Win32 20 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Release|x64.ActiveCfg = Release|x64 21 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Release|x64.Build.0 = Release|x64 22 | EndGlobalSection 23 | GlobalSection(SolutionProperties) = preSolution 24 | HideSolutionNode = FALSE 25 | EndGlobalSection 26 | EndGlobal 27 | -------------------------------------------------------------------------------- /HDRTools/ThreadPoolDef.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Threadpool 3 | * 4 | * Create and manage a threadpool. 5 | * Copyright (C) 2016 JPSDR 6 | * 7 | * Threadpool is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * Threadpool is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | #ifndef __ThreadPoolDef_H__ 24 | #define __ThreadPoolDef_H__ 25 | 26 | #include 27 | 28 | #define MAX_MT_THREADS 128 // Maximum possible 255 29 | #define MAX_THREAD_POOL 64 // Maximum possible 127 30 | 31 | typedef void (*ThreadPoolFunction)(void *ptr); 32 | 33 | enum ThreadLevelName {NoneThreadLevel,IdleThreadLevel,LowestThreadLevel,BelowThreadLevel, 34 | NormalThreadLevel,AboveThreadLevel,HighestThreadLevel,CriticalThreadLevel}; 35 | 36 | typedef struct _Public_MT_Data_Thread 37 | { 38 | ThreadPoolFunction pFunc; 39 | void *pClass; 40 | uint8_t f_process,thread_Id; 41 | void *pData; 42 | } Public_MT_Data_Thread; 43 | 44 | 45 | #endif // __ThreadPoolDef_H__ 46 | -------------------------------------------------------------------------------- /HDRTools/avs/minmax.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_MINMAX_H 33 | #define AVSCORE_MINMAX_H 34 | 35 | template 36 | T min(T v1, T v2) 37 | { 38 | return v1 < v2 ? v1 : v2; 39 | } 40 | 41 | template 42 | T max(T v1, T v2) 43 | { 44 | return v1 > v2 ? v1 : v2; 45 | } 46 | 47 | template 48 | T clamp(T n, T min, T max) 49 | { 50 | n = n > max ? max : n; 51 | return n < min ? min : n; 52 | } 53 | 54 | #endif // AVSCORE_MINMAX_H 55 | -------------------------------------------------------------------------------- /HDRTools/avs/types.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_TYPES_H 34 | #define AVS_TYPES_H 35 | 36 | // Define all types necessary for interfacing with avisynth.dll 37 | #include 38 | //#include 39 | #ifdef __cplusplus 40 | #include 41 | #include 42 | #else 43 | #include 44 | #include 45 | #endif 46 | 47 | // Raster types used by VirtualDub & Avisynth 48 | typedef uint32_t Pixel32; 49 | typedef uint8_t BYTE; 50 | 51 | // Audio Sample information 52 | typedef float SFLOAT; 53 | 54 | #endif //AVS_TYPES_H 55 | -------------------------------------------------------------------------------- /HDRTools/avs/win.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_WIN_H 33 | #define AVSCORE_WIN_H 34 | 35 | // Whenever you need windows headers, start by including this file, then the rest. 36 | 37 | // WWUUT? We require XP now? 38 | #if !defined(NTDDI_VERSION) && !defined(_WIN32_WINNT) 39 | #define NTDDI_VERSION 0x05020000 40 | #define _WIN32_WINNT 0x0502 41 | #endif 42 | 43 | #define WIN32_LEAN_AND_MEAN 44 | #define STRICT 45 | #if !defined(NOMINMAX) 46 | #define NOMINMAX 47 | #endif 48 | 49 | #include 50 | 51 | // Provision for UTF-8 max 4 bytes per code point 52 | #define AVS_MAX_PATH MAX_PATH*4 53 | 54 | #endif // AVSCORE_WIN_H 55 | -------------------------------------------------------------------------------- /HDRTools/TransferFunctions.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TransferFunctions 3 | * 4 | * OOTF,EOTF,OETF, etc... HDR and SDR core functions. 5 | * Copyright (C) 2019 JPSDR 6 | * 7 | * HDRTools is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * HDRTools is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | #include 24 | 25 | static const double m1=0.1593017578125,im1=1.0/m1; 26 | static const double m2=78.84375,im2=1.0/m2; 27 | static const double c1=0.8359375; 28 | static const double c2=18.8515625; 29 | static const double c3=18.6875; 30 | 31 | static const double alpha=1.09929682680944,alpham1=alpha-1.0,ialpha=1.0/alpha; 32 | static const double beta=0.018053968510807; 33 | static const double alpha2=267.84,beta2=0.0003024,ialpha2=1.0/alpha2; 34 | static const double coeff_i12=1.0/12.0,coeff_i3=1.0/3.0,coeff_i45=1.0/0.45; 35 | static const double coeff_i24=1.0/2.404,coeff_i59=1.0/59.5208; 36 | static const double a=0.17883277; 37 | static const double b=1.0-4.0*a,c=0.5-a*log(4.0*a),ia=1.0/a; 38 | static double lm1=1.2-1.0,ilm1=(1.0/1.2)-1.0; 39 | 40 | void Set_l_HLG(double Lw) 41 | { 42 | lm1=(1.2+0.42*log10(Lw*0.001))-1.0; 43 | ilm1=(1.0/(1.2+0.42*log10(Lw*0.001)))-1.0; 44 | } 45 | 46 | double HLG_OETF(double x) 47 | { 48 | if (x<=coeff_i12) return(sqrt(3.0*x)); 49 | else return(a*log(12.0*x-b)+c); 50 | } 51 | 52 | double HLG_inv_OETF(double x) 53 | { 54 | if (x<=0.5) return(x*x*coeff_i3); 55 | else return((exp((x-c)*ia)+b)*coeff_i12); 56 | } 57 | 58 | double HLG_OOTF(double x) 59 | { 60 | return(x*pow(x,lm1)); 61 | } 62 | 63 | double HLG_inv_OOTF(double x) 64 | { 65 | return(x*pow(x,ilm1)); 66 | } 67 | 68 | double inv_OETF(double x) 69 | { 70 | if (x<(beta*4.5)) return(x*coeff_i45); 71 | else return(pow(((x+alpham1))*ialpha,coeff_i45)); 72 | } 73 | 74 | double OETF(double x) 75 | { 76 | if (x 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Fichiers d%27en-tête 20 | 21 | 22 | Fichiers d%27en-tête 23 | 24 | 25 | Fichiers d%27en-tête 26 | 27 | 28 | Fichiers d%27en-tête 29 | 30 | 31 | Fichiers d%27en-tête 32 | 33 | 34 | Fichiers d%27en-tête 35 | 36 | 37 | Fichiers d%27en-tête 38 | 39 | 40 | 41 | 42 | Fichiers sources 43 | 44 | 45 | Fichiers sources 46 | 47 | 48 | Fichiers sources 49 | 50 | 51 | Fichiers sources 52 | 53 | 54 | Fichiers sources 55 | 56 | 57 | 58 | 59 | Fichiers de ressources 60 | 61 | 62 | 63 | 64 | Fichiers sources 65 | 66 | 67 | Fichiers sources 68 | 69 | 70 | Fichiers sources 71 | 72 | 73 | Fichiers sources 74 | 75 | 76 | Fichiers sources 77 | 78 | 79 | Fichiers sources 80 | 81 | 82 | Fichiers sources 83 | 84 | 85 | Fichiers sources 86 | 87 | 88 | Fichiers sources 89 | 90 | 91 | Fichiers sources 92 | 93 | 94 | -------------------------------------------------------------------------------- /HDRTools/avs/cpuid.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_CPUID_H 33 | #define AVSCORE_CPUID_H 34 | 35 | // For GetCPUFlags. These are backwards-compatible with those in VirtualDub. 36 | // ending with SSE4_2 37 | // For emulation see https://software.intel.com/en-us/articles/intel-software-development-emulator 38 | enum { 39 | /* oldest CPU to support extension */ 40 | CPUF_FORCE = 0x01, // N/A 41 | CPUF_FPU = 0x02, // 386/486DX 42 | CPUF_MMX = 0x04, // P55C, K6, PII 43 | CPUF_INTEGER_SSE = 0x08, // PIII, Athlon 44 | CPUF_SSE = 0x10, // PIII, Athlon XP/MP 45 | CPUF_SSE2 = 0x20, // PIV, K8 46 | CPUF_3DNOW = 0x40, // K6-2 47 | CPUF_3DNOW_EXT = 0x80, // Athlon 48 | CPUF_X86_64 = 0xA0, // Hammer (note: equiv. to 3DNow + SSE2, which 49 | // only Hammer will have anyway) 50 | CPUF_SSE3 = 0x100, // PIV+, K8 Venice 51 | CPUF_SSSE3 = 0x200, // Core 2 52 | CPUF_SSE4 = 0x400, 53 | CPUF_SSE4_1 = 0x400, // Penryn, Wolfdale, Yorkfield 54 | CPUF_AVX = 0x800, // Sandy Bridge, Bulldozer 55 | CPUF_SSE4_2 = 0x1000, // Nehalem 56 | // AVS+ 57 | CPUF_AVX2 = 0x2000, // Haswell 58 | CPUF_FMA3 = 0x4000, 59 | CPUF_F16C = 0x8000, 60 | CPUF_MOVBE = 0x10000, // Big Endian move 61 | CPUF_POPCNT = 0x20000, 62 | CPUF_AES = 0x40000, 63 | CPUF_FMA4 = 0x80000, 64 | 65 | CPUF_AVX512F = 0x100000, // AVX-512 Foundation. 66 | CPUF_AVX512DQ = 0x200000, // AVX-512 DQ (Double/Quad granular) Instructions 67 | CPUF_AVX512PF = 0x400000, // AVX-512 Prefetch 68 | CPUF_AVX512ER = 0x800000, // AVX-512 Exponential and Reciprocal 69 | CPUF_AVX512CD = 0x1000000, // AVX-512 Conflict Detection 70 | CPUF_AVX512BW = 0x2000000, // AVX-512 BW (Byte/Word granular) Instructions 71 | CPUF_AVX512VL = 0x4000000, // AVX-512 VL (128/256 Vector Length) Extensions 72 | CPUF_AVX512IFMA = 0x8000000, // AVX-512 IFMA integer 52 bit 73 | CPUF_AVX512VBMI = 0x10000000,// AVX-512 VBMI 74 | }; 75 | 76 | #ifdef BUILDING_AVSCORE 77 | int GetCPUFlags(); 78 | void SetMaxCPU(int new_flags); 79 | #endif 80 | 81 | #endif // AVSCORE_CPUID_H 82 | -------------------------------------------------------------------------------- /HDRTools/ThreadPool.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Threadpool 3 | * 4 | * Create and manage a threadpool. 5 | * Copyright (C) 2016 JPSDR 6 | * 7 | * Threadpool is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * Threadpool is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | #ifndef __ThreadPool_H__ 24 | #define __ThreadPool_H__ 25 | 26 | #include 27 | 28 | #include "./ThreadPoolDef.h" 29 | 30 | #define THREADPOOL_VERSION "ThreadPool 1.4.5" 31 | 32 | #define MAX_PHYSICAL_CORES 64 33 | 34 | typedef struct _MT_Data_Thread 35 | { 36 | Public_MT_Data_Thread *MTData; 37 | uint8_t f_process,thread_Id; 38 | HANDLE nextJob,jobFinished; 39 | } MT_Data_Thread; 40 | 41 | 42 | typedef struct _Arch_CPU 43 | { 44 | uint8_t NbPhysCore,NbLogicCPU; 45 | uint8_t NbHT[MAX_PHYSICAL_CORES]; 46 | ULONG_PTR ProcMask[MAX_PHYSICAL_CORES]; 47 | ULONG_PTR FullMask; 48 | } Arch_CPU; 49 | 50 | 51 | class ThreadPool 52 | { 53 | public : 54 | ThreadPool(void); 55 | virtual ~ThreadPool(); 56 | 57 | protected : 58 | 59 | Arch_CPU CPU; 60 | 61 | public : 62 | 63 | uint8_t GetThreadNumber(uint8_t thread_number,bool logical); 64 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore, 65 | bool SetAffinity,bool sleep,ThreadLevelName priority); 66 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore, 67 | bool SetAffinity,bool sleep) 68 | {return(AllocateThreads(thread_number,offset_core,offset_ht,UseMaxPhysCore,SetAffinity,sleep,NormalThreadLevel));} 69 | bool DeAllocateThreads(void); 70 | bool ChangeThreadsAffinity(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity); 71 | bool ChangeThreadsLevel(ThreadLevelName priority); 72 | bool RequestThreadPool(uint8_t thread_number,Public_MT_Data_Thread *Data,ThreadLevelName priority); 73 | bool RequestThreadPool(uint8_t thread_number,Public_MT_Data_Thread *Data) 74 | {return(RequestThreadPool(thread_number,Data,NoneThreadLevel));} 75 | bool ReleaseThreadPool(bool sleep); 76 | bool StartThreads(void); 77 | bool WaitThreadsEnd(void); 78 | bool GetThreadPoolStatus(void) {return(Status_Ok);} 79 | uint8_t GetCurrentThreadAllocated(void) {return(CurrentThreadsAllocated);} 80 | uint8_t GetCurrentThreadUsed(void) {return(CurrentThreadsUsed);} 81 | uint8_t GetLogicalCPUNumber(void) {return(CPU.NbLogicCPU);} 82 | uint8_t GetPhysicalCoreNumber(void) {return(CPU.NbPhysCore);} 83 | 84 | protected : 85 | 86 | MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 87 | HANDLE nextJob[MAX_MT_THREADS],jobFinished[MAX_MT_THREADS]; 88 | HANDLE thds[MAX_MT_THREADS]; 89 | DWORD tids[MAX_MT_THREADS]; 90 | ULONG_PTR ThreadMask[MAX_MT_THREADS]; 91 | bool ThreadSleep[MAX_MT_THREADS]; 92 | ThreadLevelName nPriority; 93 | 94 | bool Status_Ok; 95 | uint8_t TotalThreadsRequested,CurrentThreadsAllocated,CurrentThreadsUsed; 96 | 97 | void FreeThreadPool(void); 98 | void DestroyThreadPool(void); 99 | void CreateThreadPool(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity, 100 | bool sleep,ThreadLevelName priority); 101 | 102 | private : 103 | 104 | static DWORD WINAPI StaticThreadpool(LPVOID lpParam); 105 | 106 | ThreadPool (const ThreadPool &other); 107 | ThreadPool& operator = (const ThreadPool &other); 108 | bool operator == (const ThreadPool &other) const; 109 | bool operator != (const ThreadPool &other) const; 110 | }; 111 | 112 | #endif // __ThreadPool_H__ 113 | -------------------------------------------------------------------------------- /HDRTools/avs/capi.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_CAPI_H 34 | #define AVS_CAPI_H 35 | 36 | #include "config.h" 37 | 38 | #ifdef AVS_POSIX 39 | // this is also defined in avs/posix.h 40 | #ifndef AVS_HAIKU 41 | #define __declspec(x) 42 | #endif 43 | #endif 44 | 45 | #ifdef __cplusplus 46 | # define EXTERN_C extern "C" 47 | #else 48 | # define EXTERN_C 49 | #endif 50 | 51 | #ifdef AVS_WINDOWS 52 | #ifdef BUILDING_AVSCORE 53 | # if defined(GCC) && defined(X86_32) 54 | # define AVSC_CC 55 | # else // MSVC builds and 64-bit GCC 56 | # ifndef AVSC_USE_STDCALL 57 | # define AVSC_CC __cdecl 58 | # else 59 | # define AVSC_CC __stdcall 60 | # endif 61 | # endif 62 | #else // needed for programs that talk to AviSynth+ 63 | # ifndef AVSC_WIN32_GCC32 // see comment below 64 | # ifndef AVSC_USE_STDCALL 65 | # define AVSC_CC __cdecl 66 | # else 67 | # define AVSC_CC __stdcall 68 | # endif 69 | # else 70 | # define AVSC_CC 71 | # endif 72 | #endif 73 | # else 74 | # define AVSC_CC 75 | #endif 76 | 77 | // On 64-bit Windows, there's only one calling convention, 78 | // so there is no difference between MSVC and GCC. On 32-bit, 79 | // this isn't true. The convention that GCC needs to use to 80 | // even build AviSynth+ as 32-bit makes anything that uses 81 | // it incompatible with 32-bit MSVC builds of AviSynth+. 82 | // The AVSC_WIN32_GCC32 define is meant to provide a user 83 | // switchable way to make builds of FFmpeg to test 32-bit 84 | // GCC builds of AviSynth+ without having to screw around 85 | // with alternate headers, while still default to the usual 86 | // situation of using 32-bit MSVC builds of AviSynth+. 87 | 88 | // Hopefully, this situation will eventually be resolved 89 | // and a broadly compatible solution will arise so the 90 | // same 32-bit FFmpeg build can handle either MSVC or GCC 91 | // builds of AviSynth+. 92 | 93 | #define AVSC_INLINE static __inline 94 | 95 | #ifdef BUILDING_AVSCORE 96 | #ifdef AVS_WINDOWS 97 | # ifndef AVS_STATIC_LIB 98 | # define AVSC_EXPORT __declspec(dllexport) 99 | # else 100 | # define AVSC_EXPORT 101 | # endif 102 | # define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name 103 | #else 104 | # define AVSC_EXPORT EXTERN_C 105 | # define AVSC_API(ret, name) EXTERN_C ret AVSC_CC name 106 | #endif 107 | #else 108 | # define AVSC_EXPORT EXTERN_C __declspec(dllexport) 109 | # ifndef AVS_STATIC_LIB 110 | # define AVSC_IMPORT __declspec(dllimport) 111 | # else 112 | # define AVSC_IMPORT 113 | # endif 114 | # ifndef AVSC_NO_DECLSPEC 115 | # define AVSC_API(ret, name) EXTERN_C AVSC_IMPORT ret AVSC_CC name 116 | # else 117 | # define AVSC_API(ret, name) typedef ret (AVSC_CC *name##_func) 118 | # endif 119 | #endif 120 | 121 | #endif //AVS_CAPI_H 122 | -------------------------------------------------------------------------------- /HDRTools/avs/alignment.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_ALIGNMENT_H 34 | #define AVS_ALIGNMENT_H 35 | 36 | // Functions and macros to help work with alignment requirements. 37 | 38 | // Tells if a number is a power of two. 39 | #define IS_POWER2(n) ((n) && !((n) & ((n) - 1))) 40 | 41 | // Tells if the pointer "ptr" is aligned to "align" bytes. 42 | #define IS_PTR_ALIGNED(ptr, align) (((uintptr_t)ptr & ((uintptr_t)(align-1))) == 0) 43 | 44 | // Rounds up the number "n" to the next greater multiple of "align" 45 | #define ALIGN_NUMBER(n, align) (((n) + (align)-1) & (~((align)-1))) 46 | 47 | // Rounds up the pointer address "ptr" to the next greater multiple of "align" 48 | #define ALIGN_POINTER(ptr, align) (((uintptr_t)(ptr) + (align)-1) & (~(uintptr_t)((align)-1))) 49 | 50 | #ifdef __cplusplus 51 | 52 | #include 53 | #include 54 | #include 55 | #include "config.h" 56 | 57 | #if defined(MSVC) && _MSC_VER<1400 58 | // needed for VS2013, otherwise C++11 'alignas' works 59 | #define avs_alignas(x) __declspec(align(x)) 60 | #else 61 | // assumes C++11 support 62 | #define avs_alignas(x) alignas(x) 63 | #endif 64 | 65 | template 66 | static bool IsPtrAligned(T* ptr, size_t align) 67 | { 68 | assert(IS_POWER2(align)); 69 | return (bool)IS_PTR_ALIGNED(ptr, align); 70 | } 71 | 72 | template 73 | static T AlignNumber(T n, T align) 74 | { 75 | assert(IS_POWER2(align)); 76 | return ALIGN_NUMBER(n, align); 77 | } 78 | 79 | template 80 | static T* AlignPointer(T* ptr, size_t align) 81 | { 82 | assert(IS_POWER2(align)); 83 | return (T*)ALIGN_POINTER(ptr, align); 84 | } 85 | 86 | extern "C" 87 | { 88 | #else 89 | #include 90 | #endif // __cplusplus 91 | 92 | // Returns a new buffer that is at least the size "nbytes". 93 | // The buffer will be aligned to "align" bytes. 94 | // Returns NULL on error. On successful allocation, 95 | // the returned buffer must be freed using "avs_free". 96 | inline void* avs_malloc(size_t nbytes, size_t align) 97 | { 98 | if (!IS_POWER2(align)) 99 | return NULL; 100 | 101 | size_t offset = sizeof(void*) + align - 1; 102 | 103 | void *orig = malloc(nbytes + offset); 104 | if (orig == NULL) 105 | return NULL; 106 | 107 | void **aligned = (void**)(((uintptr_t)orig + (uintptr_t)offset) & (~(uintptr_t)(align-1))); 108 | aligned[-1] = orig; 109 | return aligned; 110 | } 111 | 112 | // Buffers allocated using "avs_malloc" must be freed 113 | // using "avs_free" instead of "free". 114 | inline void avs_free(void *ptr) 115 | { 116 | // Mirroring free()'s semantic requires us to accept NULLs 117 | if (ptr == NULL) 118 | return; 119 | 120 | free(((void**)ptr)[-1]); 121 | } 122 | 123 | #ifdef __cplusplus 124 | } // extern "C" 125 | 126 | // The point of these undef's is to force using the template functions 127 | // if we are in C++ mode. For C, the user can rely only on the macros. 128 | #undef IS_PTR_ALIGNED 129 | #undef ALIGN_NUMBER 130 | #undef ALIGN_POINTER 131 | 132 | #endif // __cplusplus 133 | 134 | #endif //AVS_ALIGNMENT_H 135 | -------------------------------------------------------------------------------- /HDRTools/avs/posix.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifdef AVS_POSIX 33 | #ifndef AVSCORE_POSIX_H 34 | #define AVSCORE_POSIX_H 35 | 36 | #ifdef __cplusplus 37 | #include 38 | #endif 39 | #include 40 | #include 41 | 42 | // Define these MSVC-extension used in Avisynth 43 | #define __single_inheritance 44 | 45 | // These things don't exist in Linux 46 | #if defined(AVS_HAIKU) 47 | #undef __declspec 48 | #endif 49 | #define __declspec(x) 50 | #define lstrlen strlen 51 | #define lstrcmp strcmp 52 | #define lstrcmpi strcasecmp 53 | #define _stricmp strcasecmp 54 | #define _strnicmp strncasecmp 55 | #define _strdup strdup 56 | #define SetCurrentDirectory(x) chdir(x) 57 | #define SetCurrentDirectoryW(x) chdir(x) 58 | #define GetCurrentDirectoryW(x) getcwd(x) 59 | #define _putenv putenv 60 | #define _alloca alloca 61 | 62 | // Borrowing some compatibility macros from AvxSynth, slightly modified 63 | #define UInt32x32To64(a, b) ((uint64_t)(((uint64_t)((uint32_t)(a))) * ((uint32_t)(b)))) 64 | #define Int64ShrlMod32(a, b) ((uint64_t)((uint64_t)(a) >> (b))) 65 | #define Int32x32To64(a, b) ((int64_t)(((int64_t)((long)(a))) * ((long)(b)))) 66 | 67 | #define InterlockedIncrement(x) __sync_add_and_fetch((x), 1) 68 | #define InterlockedDecrement(x) __sync_sub_and_fetch((x), 1) 69 | #define InterlockedExchangeAdd(x, v) __sync_add_and_fetch((x), (v)) 70 | 71 | #define MulDiv(nNumber, nNumerator, nDenominator) (int32_t) (((int64_t) (nNumber) * (int64_t) (nNumerator) + (int64_t) ((nDenominator)/2)) / (int64_t) (nDenominator)) 72 | 73 | #ifndef TRUE 74 | #define TRUE true 75 | #endif 76 | 77 | #ifndef FALSE 78 | #define FALSE false 79 | #endif 80 | 81 | #define S_FALSE (0x00000001) 82 | #define E_FAIL (0x80004005) 83 | #define FAILED(hr) ((hr) & 0x80000000) 84 | #define SUCCEEDED(hr) (!FAILED(hr)) 85 | 86 | // Statuses copied from comments in exception.cpp 87 | #define STATUS_GUARD_PAGE_VIOLATION 0x80000001 88 | #define STATUS_DATATYPE_MISALIGNMENT 0x80000002 89 | #define STATUS_BREAKPOINT 0x80000003 90 | #define STATUS_SINGLE_STEP 0x80000004 91 | #define STATUS_ACCESS_VIOLATION 0xc0000005 92 | #define STATUS_IN_PAGE_ERROR 0xc0000006 93 | #define STATUS_INVALID_HANDLE 0xc0000008 94 | #define STATUS_NO_MEMORY 0xc0000017 95 | #define STATUS_ILLEGAL_INSTRUCTION 0xc000001d 96 | #define STATUS_NONCONTINUABLE_EXCEPTION 0xc0000025 97 | #define STATUS_INVALID_DISPOSITION 0xc0000026 98 | #define STATUS_ARRAY_BOUNDS_EXCEEDED 0xc000008c 99 | #define STATUS_FLOAT_DENORMAL_OPERAND 0xc000008d 100 | #define STATUS_FLOAT_DIVIDE_BY_ZERO 0xc000008e 101 | #define STATUS_FLOAT_INEXACT_RESULT 0xc000008f 102 | #define STATUS_FLOAT_INVALID_OPERATION 0xc0000090 103 | #define STATUS_FLOAT_OVERFLOW 0xc0000091 104 | #define STATUS_FLOAT_STACK_CHECK 0xc0000092 105 | #define STATUS_FLOAT_UNDERFLOW 0xc0000093 106 | #define STATUS_INTEGER_DIVIDE_BY_ZERO 0xc0000094 107 | #define STATUS_INTEGER_OVERFLOW 0xc0000095 108 | #define STATUS_PRIVILEGED_INSTRUCTION 0xc0000096 109 | #define STATUS_STACK_OVERFLOW 0xc00000fd 110 | 111 | // Calling convension 112 | #ifndef AVS_HAIKU 113 | #define __stdcall 114 | #define __cdecl 115 | #endif 116 | 117 | // PowerPC OS X is really niche these days, but this painless equivocation 118 | // of the function/macro names used in posix_get_available_memory() 119 | // is all it takes to let it work. The G5 was 64-bit, and if 10.5 Leopard 120 | // can run in native 64-bit, it probably uses the names in that block as-is. 121 | #ifdef AVS_MACOS 122 | #ifdef PPC32 123 | #define vm_statistics64_data_t vm_statistics_data_t 124 | #define HOST_VM_INFO64_COUNT HOST_VM_INFO_COUNT 125 | #define HOST_VM_INFO64 HOST_VM_INFO 126 | #define host_statistics64 host_statistics 127 | #endif // PPC32 128 | #endif // AVS_MACOS 129 | 130 | #endif // AVSCORE_POSIX_H 131 | #endif // AVS_POSIX 132 | -------------------------------------------------------------------------------- /HDRTools/ThreadPoolInterface.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ThreadpoolInterface 3 | * 4 | * Allow to use the threadpool, kind of API. 5 | * Copyright (C) 2017 JPSDR 6 | * 7 | * ThreadpoolInterface is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * ThreadpoolInterface is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | #ifndef __ThreadPoolInterface_H__ 24 | #define __ThreadPoolInterface_H__ 25 | 26 | #include 27 | #include 28 | 29 | #include "./ThreadPoolDef.h" 30 | 31 | #define THREADPOOLINTERFACE_VERSION "ThreadPoolInterface 1.12.1" 32 | 33 | class ThreadPoolInterface; 34 | 35 | class UserData 36 | { 37 | friend ThreadPoolInterface; 38 | 39 | public : 40 | 41 | UserData(void); 42 | virtual ~UserData(void); 43 | 44 | protected : 45 | 46 | uint32_t UserId; 47 | bool AllowSeveral; 48 | bool AllowWaiting; 49 | bool AllowTimeOut; 50 | bool AllowRetryMax; 51 | DWORD TimeOut; 52 | uint8_t RetryMax; 53 | int8_t NbrePool; 54 | int8_t UsedPool[MAX_THREAD_POOL]; 55 | }; 56 | 57 | 58 | class ThreadPoolInterface 59 | { 60 | public : 61 | 62 | virtual ~ThreadPoolInterface(void); 63 | static ThreadPoolInterface* Init(uint8_t num); 64 | 65 | uint8_t GetThreadNumber(uint8_t thread_number,bool logical); 66 | int16_t AddPool(uint8_t num); 67 | bool CreatePool(uint8_t num); 68 | bool DeletePool(uint8_t num); 69 | bool RemovePool(uint8_t num); 70 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore, 71 | bool SetAffinity,bool sleep,ThreadLevelName priority,int8_t nPool); 72 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore, 73 | bool SetAffinity,bool sleep,int8_t nPool) 74 | {return(AllocateThreads(thread_number,offset_core,offset_ht,UseMaxPhysCore,SetAffinity,sleep, 75 | NormalThreadLevel,nPool));} 76 | bool GetUserId(uint32_t &UserId); 77 | bool RemoveUserId(uint32_t UserId); 78 | bool ChangeThreadsAffinity(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity,int8_t nPool); 79 | bool ChangeThreadsLevel(ThreadLevelName priority,int8_t nPool); 80 | bool DeAllocateUserThreads(uint32_t UserId,bool check); 81 | bool DeAllocatePoolThreads(uint8_t nPool,bool check); 82 | bool DeAllocateAllThreads(bool check); 83 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data, 84 | ThreadLevelName priority,int8_t nPool,bool Exclusive); 85 | bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data, 86 | ThreadLevelName priority,int8_t &nPool,bool Exclusive); 87 | bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data); 88 | bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data, 89 | ThreadLevelName priority); 90 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data, 91 | int8_t nPool,bool Exclusive) 92 | {return(RequestThreadPool(UserId,thread_number,Data,NoneThreadLevel,nPool,Exclusive));} 93 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data) 94 | {return(RequestThreadPool(UserId,thread_number,Data,NoneThreadLevel,-1,false));} 95 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data, 96 | ThreadLevelName priority) 97 | {return(RequestThreadPool(UserId,thread_number,Data,priority,-1,false));} 98 | bool ReleaseThreadPool(uint32_t UserId,bool sleep); 99 | bool ReleaseThreadPool(uint32_t UserId,bool sleep,int8_t idxPool); 100 | bool StartThreads(uint32_t UserId); 101 | bool StartThreads(uint32_t UserId,int8_t idxPool); 102 | bool WaitThreadsEnd(uint32_t UserId); 103 | bool WaitThreadsEnd(uint32_t UserId,int8_t idxPool); 104 | bool GetThreadPoolStatus(uint32_t UserId,int8_t idxPool,int8_t nPool); 105 | uint8_t GetCurrentThreadAllocated(uint32_t UserId,int8_t idxPool,int8_t nPool); 106 | uint8_t GetCurrentThreadUsed(uint32_t UserId,int8_t idxPool,int8_t nPool); 107 | bool EnableAllowSeveral(uint32_t UserId); 108 | bool DisableAllowSeveral(uint32_t UserId); 109 | bool IsAllowedSeveral(uint32_t UserId); 110 | bool EnableWaitonRequest(uint32_t UserId); 111 | bool DisableWaitonRequest(uint32_t UserId); 112 | bool EnableTimeOutonRequest(uint32_t UserId); 113 | bool DisableTimeOutonRequest(uint32_t UserId); 114 | bool EnableRetryMaxonRequest(uint32_t UserId); 115 | bool DisableRetryMaxonRequest(uint32_t UserId); 116 | bool ConfigureTimeOutValue(uint32_t UserId, DWORD dwMilliseconds); 117 | bool ConfigureRetryMaxValue(uint32_t UserId, uint8_t NbreMax); 118 | int8_t GetPoolAllocated(uint32_t UserId); 119 | int8_t GetPoolNumber(uint32_t UserId,int8_t idxPool); 120 | int8_t GetPoolIndex(uint32_t UserId,int8_t nPool); 121 | uint8_t GetLogicalCPUNumber(void); 122 | uint8_t GetPhysicalCoreNumber(void); 123 | 124 | protected : 125 | 126 | bool Status_Ok; 127 | uint8_t NbrePool; 128 | 129 | public : 130 | 131 | bool GetThreadPoolInterfaceStatus(void) {return(Status_Ok);} 132 | int8_t GetCurrentPoolCreated(void) {return((Status_Ok) ? NbrePool:-1);} 133 | 134 | protected : 135 | 136 | ThreadPoolInterface(void); 137 | 138 | CRITICAL_SECTION CriticalSection; 139 | HANDLE ghMutexResources; 140 | BOOL CSectionOk; 141 | HANDLE JobsEnded[MAX_THREAD_POOL],ThreadPoolFree[MAX_THREAD_POOL]; 142 | std::vector TabId; 143 | HANDLE EndExclusive; 144 | bool Error_Occured; 145 | 146 | bool ThreadPoolRequested[MAX_THREAD_POOL],JobsRunning[MAX_THREAD_POOL]; 147 | bool ThreadPoolReleased[MAX_THREAD_POOL],ThreadWaitEnd[MAX_THREAD_POOL]; 148 | bool ThreadPoolWaitFree[MAX_THREAD_POOL]; 149 | uint32_t ThreadPoolUserId[MAX_THREAD_POOL]; 150 | bool ExclusiveMode; 151 | uint8_t NbrePoolEvent; 152 | 153 | bool CreatePoolEvent(uint8_t num); 154 | void FreeData(void); 155 | void FreePool(void); 156 | void FreePool(int8_t nPool); 157 | bool EnterCS(void); 158 | void LeaveCS(void); 159 | bool GetMutex(void); 160 | void FreeMutex(void); 161 | int32_t GetUserIdIndex(uint32_t UserId); 162 | bool ReleaseThreadPoolCore(uint32_t UserId,int32_t index,bool sleep,int8_t nPool,int8_t idxPool); 163 | bool StartThreadsCore(int8_t nPool); 164 | bool WaitThreadsEndCore(uint32_t UserId,int8_t nPool,int8_t idxPool); 165 | 166 | private : 167 | 168 | ThreadPoolInterface (const ThreadPoolInterface &other); 169 | ThreadPoolInterface& operator = (const ThreadPoolInterface &other); 170 | bool operator == (const ThreadPoolInterface &other) const; 171 | bool operator != (const ThreadPoolInterface &other) const; 172 | }; 173 | 174 | #endif // __ThreadPoolInterface_H__ 175 | -------------------------------------------------------------------------------- /HDRTools/avs/config.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_CONFIG_H 34 | #define AVS_CONFIG_H 35 | 36 | // Undefine this to get cdecl calling convention 37 | #define AVSC_USE_STDCALL 1 38 | 39 | // NOTE TO PLUGIN AUTHORS: 40 | // Because FRAME_ALIGN can be substantially higher than the alignment 41 | // a plugin actually needs, plugins should not use FRAME_ALIGN to check for 42 | // alignment. They should always request the exact alignment value they need. 43 | // This is to make sure that plugins work over the widest range of AviSynth 44 | // builds possible. 45 | #define FRAME_ALIGN 64 46 | 47 | #if defined(_M_AMD64) || defined(__x86_64) 48 | # define X86_64 49 | #elif defined(_M_IX86) || defined(__i386__) 50 | # define X86_32 51 | // VS2017 introduced _M_ARM64 52 | #elif defined(_M_ARM64) || defined(__aarch64__) 53 | # define ARM64 54 | #elif defined(_M_ARM) || defined(__arm__) 55 | # define ARM32 56 | #elif defined(__PPC64__) 57 | # define PPC64 58 | #elif defined(_M_PPC) || defined(__PPC__) || defined(__POWERPC__) 59 | # define PPC32 60 | #elif defined(__riscv) 61 | # define RISCV 62 | #elif defined(__loongarch__) 63 | # define LOONGARCH 64 | #elif defined(__sparc_v9__) 65 | # define SPARC 66 | #elif defined(__mips__) 67 | # define MIPS 68 | #else 69 | # error Unsupported CPU architecture. 70 | #endif 71 | 72 | // VC++ LLVM-Clang-cl MinGW-Gnu 73 | // MSVC x x 74 | // MSVC_PURE x 75 | // CLANG x 76 | // GCC x 77 | 78 | #if defined(__clang__) 79 | // Check clang first. clang-cl also defines __MSC_VER 80 | // We set MSVC because they are mostly compatible 81 | # define CLANG 82 | #if defined(_MSC_VER) 83 | # define MSVC 84 | # define AVS_FORCEINLINE __attribute__((always_inline)) 85 | #else 86 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline 87 | #endif 88 | #elif defined(_MSC_VER) 89 | # define MSVC 90 | # define MSVC_PURE 91 | # define AVS_FORCEINLINE __forceinline 92 | #elif defined(__GNUC__) 93 | # define GCC 94 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline 95 | #elif defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) 96 | // Intel C++ Compilers with MSVC command line interface will not appear here rather at _MSC_VER 97 | # define AVS_FORCEINLINE inline 98 | # undef __forceinline 99 | # define __forceinline inline 100 | #else 101 | # error Unsupported compiler. 102 | # define AVS_FORCEINLINE inline 103 | # undef __forceinline 104 | # define __forceinline inline 105 | #endif 106 | 107 | #if defined(_WIN32) 108 | # define AVS_WINDOWS 109 | #elif defined(__linux__) 110 | # define AVS_LINUX 111 | # define AVS_POSIX 112 | #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) 113 | # define AVS_BSD 114 | # define AVS_POSIX 115 | #elif defined(__APPLE__) 116 | # define AVS_MACOS 117 | # define AVS_POSIX 118 | #elif defined(__HAIKU__) 119 | # define AVS_HAIKU 120 | # define AVS_POSIX 121 | #else 122 | # error Operating system unsupported. 123 | #endif 124 | 125 | #if defined(AVS_WINDOWS) 126 | # if defined(X86_32) || defined(X86_64) 127 | # define AVS_WINDOWS_X86 128 | # elif defined(ARM64) || defined(ARM32) 129 | # define AVS_WINDOWS_ARM 130 | # endif 131 | #endif 132 | 133 | #if defined(MSVC) && !defined(AVS_WINDOWS_X86) 134 | # error Unsupported combination of compiler, operating system, and machine architecture. 135 | #endif 136 | 137 | // useful warnings disabler macros for supported compilers 138 | 139 | #if defined(_MSC_VER) 140 | #define DISABLE_WARNING_PUSH __pragma(warning( push )) 141 | #define DISABLE_WARNING_POP __pragma(warning( pop )) 142 | #define DISABLE_WARNING(warningNumber) __pragma(warning( disable : warningNumber )) 143 | 144 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(4101) 145 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(4505) 146 | // other warnings you want to deactivate... 147 | 148 | #elif defined(__GNUC__) || defined(__clang__) 149 | #define DO_PRAGMA(X) _Pragma(#X) 150 | #define DISABLE_WARNING_PUSH DO_PRAGMA(GCC diagnostic push) 151 | #define DISABLE_WARNING_POP DO_PRAGMA(GCC diagnostic pop) 152 | #define DISABLE_WARNING(warningName) DO_PRAGMA(GCC diagnostic ignored #warningName) 153 | 154 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(-Wunused-variable) 155 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(-Wunused-function) 156 | // other warnings you want to deactivate... 157 | 158 | #else 159 | #define DISABLE_WARNING_PUSH 160 | #define DISABLE_WARNING_POP 161 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE 162 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION 163 | // other warnings you want to deactivate... 164 | 165 | #endif 166 | 167 | #if defined(AVS_WINDOWS) && defined(_USING_V110_SDK71_) 168 | // Windows XP does not have proper initialization for 169 | // thread local variables. 170 | // Use workaround instead __declspec(thread) 171 | #define XP_TLS 172 | #endif 173 | 174 | #ifndef MSVC 175 | // GCC and Clang can be used on big endian systems, MSVC can't. 176 | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 177 | # define AVS_ENDIANNESS "little" 178 | # elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 179 | # define AVS_ENDIANNESS "big" 180 | # else 181 | # define AVS_ENDIANNESS "middle" 182 | # endif 183 | #else 184 | #define AVS_ENDIANNESS "little" 185 | #endif 186 | 187 | #endif //AVS_CONFIG_H 188 | -------------------------------------------------------------------------------- /HDRTools/MatrixClass.h: -------------------------------------------------------------------------------- 1 | /* 2 | * MatrixClass 3 | * 4 | * Matrix and vector class allowing several operations. 5 | * Copyright (C) 2017 JPSDR 6 | * 7 | * MatrixClass is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * MatrixClass is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | #ifndef _MATRIX_CLASS_H 24 | #define _MATRIX_CLASS_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | typedef enum MTRXCL_COEFF_DATA_TYPE_ { 31 | MTRXCL_DATA_NONE,MTRXCL_DATA_DOUBLE,MTRXCL_DATA_FLOAT,MTRXCL_DATA_UINT64,MTRXCL_DATA_INT64, 32 | MTRXCL_DATA_UINT32,MTRXCL_DATA_INT32,MTRXCL_DATA_UINT16,MTRXCL_DATA_INT16, 33 | MTRXCL_DATA_UINT8,MTRXCL_DATA_INT8} MTRXCL_COEFF_DATA_TYPE; 34 | 35 | 36 | void SetCPUMatrixClass(const bool SSE2,const bool AVX,const bool AVX2,const bool AVX512); 37 | 38 | 39 | class Vector 40 | { 41 | public : 42 | Vector(void); 43 | Vector(const uint16_t l,const MTRXCL_COEFF_DATA_TYPE data); 44 | Vector(const Vector &x); 45 | virtual ~Vector(void); 46 | 47 | inline bool AllocCheck(void) const {return(Coeff!=nullptr);} 48 | bool Create(void); 49 | bool Create(const uint16_t l,const MTRXCL_COEFF_DATA_TYPE data); 50 | bool Create(const Vector &x); 51 | bool CopyStrict(const Vector &x); 52 | bool CopyRaw(const void *ptr); 53 | bool CopyRaw(const void *ptr,const uint16_t lgth); 54 | bool ExportRaw(void *ptr); 55 | bool ExportRaw(void *ptr,const uint16_t lgth); 56 | void Destroy(void); 57 | bool FillD(const double data); 58 | bool FillF(const float data); 59 | bool FillZero(void); 60 | inline MTRXCL_COEFF_DATA_TYPE GetDataType(void) const {return(data_type);} 61 | bool SetInfo(const uint16_t l,const MTRXCL_COEFF_DATA_TYPE data); 62 | void GetInfo(uint16_t &l, MTRXCL_COEFF_DATA_TYPE &data) const; 63 | inline uint16_t GetLength(void) const {return(length);} 64 | inline void* GetPtrVector(void) const {return(Coeff);} 65 | inline size_t GetDataSize(void) const {return(size);} 66 | inline double GetD(const uint16_t i) const {return(((double *)Coeff)[i]);} 67 | inline float GetF(const uint16_t i) const {return(((float *)Coeff)[i]);} 68 | inline void SetD(const uint16_t i,const double d) {((double *)Coeff)[i]=d;} 69 | inline void SetF(const uint16_t i,const float d) {((float *)Coeff)[i]=d;} 70 | bool GetSafeD(const uint16_t i,double &d) const ; 71 | bool SetSafeD(const uint16_t i,const double d); 72 | bool GetSafeF(const uint16_t i,float &d) const ; 73 | bool SetSafeF(const uint16_t i,const float d); 74 | 75 | protected : 76 | void *Coeff; 77 | uint16_t length; 78 | size_t size; 79 | MTRXCL_COEFF_DATA_TYPE data_type; 80 | 81 | private : 82 | Vector& operator = (const Vector &other); 83 | bool operator == (const Vector &other) const; 84 | bool operator != (const Vector &other) const; 85 | }; 86 | 87 | class Matrix; 88 | 89 | class Vector_Compute : public Vector 90 | { 91 | protected : 92 | bool SSE2_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 93 | 94 | public : 95 | Vector_Compute(void); 96 | Vector_Compute(const uint16_t l,const MTRXCL_COEFF_DATA_TYPE data); 97 | Vector_Compute(const Vector_Compute &x); 98 | virtual ~Vector_Compute(void); 99 | 100 | inline void SetSSE2(const bool val) {SSE2_Enable=val;} 101 | inline void SetAVX(const bool val) {AVX_Enable=val; if (val) {SSE2_Enable=true;}} 102 | inline void SetAVX2(const bool val) {AVX2_Enable=val; if (val) {AVX_Enable=true; SSE2_Enable=true;}} 103 | inline void SetAVX512(const bool val) {AVX512_Enable=val; if (val) {AVX2_Enable=true; AVX_Enable=true; SSE2_Enable=true;}} 104 | 105 | bool Mult(const double coef,const Vector &x); 106 | bool Mult(const double coef); 107 | bool Add(const double coef,const Vector &x); 108 | bool Add(const double coef); 109 | bool Sub(const double coef,const Vector &x); 110 | bool Sub(const double coef); 111 | bool Add_X(const Vector &x,const Vector &y); 112 | bool Add_X(const Vector &x); 113 | bool Sub_X(const Vector &x,const Vector &y); 114 | bool Sub_X(const Vector &x); 115 | bool InvSub_X(const Vector &x); 116 | bool Mult_X(const Vector &x,const Vector &y); 117 | bool Mult_X(const Vector &x); 118 | 119 | bool Product_AX(const Matrix &ma,const Vector &x); 120 | bool Product_AX(const Matrix &ma); 121 | bool Product_tAX(const Matrix &ma,const Vector &x); 122 | bool Product_tAX(const Matrix &ma); 123 | 124 | bool Norme2(double &result); 125 | bool Distance2(const Vector &x,double &result); 126 | bool Norme1(double &result); 127 | bool Distance1(const Vector &x,double &result); 128 | 129 | protected : 130 | // Float 131 | void MultF(const double coef,const Vector &x); 132 | void MultF(const double coef); 133 | void AddF(const double coef,const Vector &x); 134 | void AddF(const double coef); 135 | void SubF(const double coef,const Vector &x); 136 | void SubF(const double coef); 137 | void AddF_X(const Vector &x,const Vector &y); 138 | void AddF_X(const Vector &x); 139 | void SubF_X(const Vector &x,const Vector &y); 140 | void SubF_X(const Vector &x); 141 | void InvSubF_X(const Vector &x); 142 | void MultF_X(const Vector &x,const Vector &y); 143 | void MultF_X(const Vector &x); 144 | 145 | void ProductF_AX(const Matrix &ma,const Vector &x); 146 | void ProductF_tAX(const Matrix &ma,const Vector &x); 147 | 148 | double Norme2F(void); 149 | double Distance2F(const Vector &x); 150 | double Norme1F(void); 151 | double Distance1F(const Vector &x); 152 | 153 | // Double 154 | void MultD(const double coef,const Vector &x); 155 | void MultD(const double coef); 156 | void AddD(const double coef,const Vector &x); 157 | void AddD(const double coef); 158 | void SubD(const double coef,const Vector &x); 159 | void SubD(const double coef); 160 | void AddD_X(const Vector &x,const Vector &y); 161 | void AddD_X(const Vector &x); 162 | void SubD_X(const Vector &x,const Vector &y); 163 | void SubD_X(const Vector &x); 164 | void InvSubD_X(const Vector &x); 165 | void MultD_X(const Vector &x,const Vector &y); 166 | void MultD_X(const Vector &x); 167 | 168 | void ProductD_AX(const Matrix &ma,const Vector &x); 169 | void ProductD_tAX(const Matrix &ma,const Vector &x); 170 | 171 | double Norme2D(void); 172 | double Distance2D(const Vector &x); 173 | double Norme1D(void); 174 | double Distance1D(const Vector &x); 175 | 176 | private : 177 | Vector_Compute& operator = (const Vector_Compute &other); 178 | bool operator == (const Vector_Compute &other) const; 179 | bool operator != (const Vector_Compute &other) const; 180 | }; 181 | 182 | 183 | class Matrix 184 | { 185 | public : 186 | Matrix(void); 187 | Matrix(const uint16_t l,const uint16_t c,const MTRXCL_COEFF_DATA_TYPE data); 188 | Matrix(const Matrix &m); 189 | virtual ~Matrix(void); 190 | 191 | inline bool AllocCheck(void) const {return(Coeff!=nullptr);} 192 | bool Create(void); 193 | bool Create(const uint16_t l,const uint16_t c,const MTRXCL_COEFF_DATA_TYPE data); 194 | bool Create(const Matrix &m); 195 | bool CopyStrict(const Matrix &m); 196 | bool CopyRaw(const void *ptr); 197 | bool CopyRaw(const void *ptr,const ptrdiff_t ptr_pitch); 198 | bool CopyRaw(const void *ptr,const ptrdiff_t ptr_pitch,const uint16_t ln,const uint16_t co); 199 | bool ExportRaw(void *ptr); 200 | bool ExportRaw(void *ptr,const ptrdiff_t ptr_pitch); 201 | bool ExportRaw(void *ptr,const ptrdiff_t ptr_pitch,const uint16_t ln,const uint16_t co); 202 | void Destroy(void); 203 | bool FillD(const double data); 204 | bool FillF(const float data); 205 | bool FillZero(void); 206 | inline MTRXCL_COEFF_DATA_TYPE GetDataType(void) const {return(data_type);} 207 | bool SetInfo(const uint16_t l,const uint16_t c,const MTRXCL_COEFF_DATA_TYPE data); 208 | void GetInfo(uint16_t &l,uint16_t &c, MTRXCL_COEFF_DATA_TYPE &data) const; 209 | inline uint16_t GetLines(void) const {return(lines);} 210 | inline uint16_t GetColumns(void) const {return(columns);} 211 | inline void* GetPtrMatrix(void) const {return(Coeff);} 212 | inline void* GetPtrMatrixLine(const uint16_t i) const {return((void *)((uint8_t *)Coeff+i*pitch));} 213 | inline ptrdiff_t GetPitch(void) const {return(pitch);} 214 | inline size_t GetDataSize(void) const {return(size);} 215 | inline double GetD(const uint16_t i,const uint16_t j) const {return(((double *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]);} 216 | inline float GetF(const uint16_t i,const uint16_t j) const {return(((float *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]);} 217 | inline void SetD(const uint16_t i,const uint16_t j,const double d) {((double *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]=d;} 218 | inline void SetF(const uint16_t i,const uint16_t j,const float d) {((float *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]=d;} 219 | bool GetSafeD(const uint16_t i,const uint16_t j,double &d) const ; 220 | bool SetSafeD(const uint16_t i,const uint16_t j,const double d); 221 | bool GetSafeF(const uint16_t i,const uint16_t j,float &d) const ; 222 | bool SetSafeF(const uint16_t i,const uint16_t j,const float d); 223 | 224 | protected : 225 | void *Coeff; 226 | uint16_t columns,lines; 227 | size_t size; 228 | ptrdiff_t pitch; 229 | MTRXCL_COEFF_DATA_TYPE data_type; 230 | 231 | Matrix& operator=(const Matrix&){return(*this);} 232 | 233 | private : 234 | bool operator == (const Matrix &other) const; 235 | bool operator != (const Matrix &other) const; 236 | }; 237 | 238 | 239 | class Matrix_Compute : public Matrix 240 | { 241 | protected : 242 | double zero_value; 243 | bool SSE2_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 244 | 245 | public : 246 | Matrix_Compute(void); 247 | Matrix_Compute(const uint16_t l,const uint16_t c,const MTRXCL_COEFF_DATA_TYPE data); 248 | Matrix_Compute(const Matrix_Compute &m); 249 | virtual ~Matrix_Compute(void); 250 | 251 | inline void SetSSE2(const bool val) {SSE2_Enable=val;} 252 | inline void SetAVX(const bool val) {AVX_Enable=val; if (val) {SSE2_Enable=true;} } 253 | inline void SetAVX2(const bool val) {AVX2_Enable=val; if (val) {AVX_Enable=true; SSE2_Enable=true;}} 254 | inline void SetAVX512(const bool val) {AVX512_Enable=val; if (val) {AVX2_Enable=true; AVX_Enable=true; SSE2_Enable=true;}} 255 | 256 | bool CreateTranspose(const Matrix &m); 257 | bool CopyStrict(const Matrix_Compute &m); 258 | inline void SetZeroValue(const double z) {zero_value=fabs(z);} 259 | inline double GetZeroValue(void) const {return(zero_value);} 260 | 261 | bool Transpose(void); 262 | bool Transpose(const Matrix &ma); 263 | 264 | bool Mult(const double coef,const Matrix &ma); 265 | bool Mult(const double coef); 266 | bool Add(const double coef,const Matrix &ma); 267 | bool Add(const double coef); 268 | bool Sub(const double coef,const Matrix &ma); 269 | bool Sub(const double coef); 270 | bool Add_A(const Matrix &ma,const Matrix &mb); 271 | bool Add_A(const Matrix &ma); 272 | bool Sub_A(const Matrix &ma,const Matrix &mb); 273 | bool Sub_A(const Matrix &ma); 274 | bool InvSub_A(const Matrix &ma); 275 | bool Mult_A(const Matrix &ma,const Matrix &mb); 276 | bool Mult_A(const Matrix &ma); 277 | 278 | bool Product_AB(const Matrix &ma,const Matrix &mb); 279 | bool Product_AtB(const Matrix &ma,const Matrix &mb); 280 | bool Product_tAA(const Matrix &ma); 281 | bool Product_tAA(void); 282 | 283 | bool Inverse(const Matrix &ma); 284 | bool Inverse(void); 285 | int8_t InverseSafe(const Matrix_Compute &ma); 286 | int8_t InverseSafe(void); 287 | 288 | bool Norme2(double &result); 289 | bool Distance2(const Matrix &ma,double &result); 290 | bool Norme1(double &result); 291 | bool Distance1(const Matrix &ma,double &result); 292 | 293 | protected : 294 | // Float 295 | void TransposeF(const Matrix &ma); 296 | 297 | void MultF(const double coef,const Matrix &ma); 298 | void MultF(const double coef); 299 | void AddF(const double coef,const Matrix &ma); 300 | void AddF(const double coef); 301 | void SubF(const double coef,const Matrix &ma); 302 | void SubF(const double coef); 303 | void AddF_A(const Matrix &ma,const Matrix &mb); 304 | void AddF_A(const Matrix &ma); 305 | void SubF_A(const Matrix &ma,const Matrix &mb); 306 | void SubF_A(const Matrix &ma); 307 | void InvSubF_A(const Matrix &ma); 308 | void MultF_A(const Matrix &ma,const Matrix &mb); 309 | void MultF_A(const Matrix &ma); 310 | 311 | void ProductF_AB(const Matrix &ma,const Matrix &mb); 312 | void ProductF_AtB(const Matrix &ma,const Matrix &mb); 313 | 314 | bool InverseF(const Matrix &ma); 315 | int8_t InverseSafeF(const Matrix_Compute &ma); 316 | 317 | double Norme2F(void); 318 | double Distance2F(const Matrix &ma); 319 | double Norme1F(void); 320 | double Distance1F(const Matrix &ma); 321 | 322 | // Double 323 | void MultD(const double coef,const Matrix &ma); 324 | void MultD(const double coef); 325 | void AddD(const double coef,const Matrix &ma); 326 | void AddD(const double coef); 327 | void SubD(const double coef,const Matrix &ma); 328 | void SubD(const double coef); 329 | void AddD_A(const Matrix &ma,const Matrix &mb); 330 | void AddD_A(const Matrix &ma); 331 | void SubD_A(const Matrix &ma,const Matrix &mb); 332 | void SubD_A(const Matrix &ma); 333 | void InvSubD_A(const Matrix &ma); 334 | void MultD_A(const Matrix &ma,const Matrix &mb); 335 | void MultD_A(const Matrix &ma); 336 | 337 | void TransposeD(const Matrix &ma); 338 | 339 | void ProductD_AB(const Matrix &ma,const Matrix &mb); 340 | void ProductD_AtB(const Matrix &ma,const Matrix &mb); 341 | 342 | bool InverseD(const Matrix &ma); 343 | int8_t InverseSafeD(const Matrix_Compute &ma); 344 | 345 | double Norme2D(void); 346 | double Distance2D(const Matrix &ma); 347 | double Norme1D(void); 348 | double Distance1D(const Matrix &ma); 349 | 350 | // U64 351 | void TransposeU64(const Matrix &ma); 352 | 353 | // I64 354 | void TransposeI64(const Matrix &ma); 355 | 356 | // U32 357 | void TransposeU32(const Matrix &ma); 358 | 359 | // I32 360 | void TransposeI32(const Matrix &ma); 361 | 362 | // U16 363 | void TransposeU16(const Matrix &ma); 364 | 365 | // I16 366 | void TransposeI16(const Matrix &ma); 367 | 368 | // U8 369 | void TransposeU8(const Matrix &ma); 370 | 371 | // I8 372 | void TransposeI8(const Matrix &ma); 373 | 374 | Matrix_Compute& operator=(const Matrix_Compute&){return(*this);} 375 | 376 | private : 377 | bool operator == (const Matrix_Compute &other) const; 378 | bool operator != (const Matrix_Compute &other) const; 379 | }; 380 | 381 | #endif 382 | -------------------------------------------------------------------------------- /HDRTools/HDRTools.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {1820278E-F1C3-48E8-A951-EE5E95079370} 23 | Win32Proj 24 | HDRTools 25 | 26 | 27 | 28 | DynamicLibrary 29 | true 30 | MultiByte 31 | 32 | 33 | DynamicLibrary 34 | true 35 | MultiByte 36 | 37 | 38 | DynamicLibrary 39 | false 40 | true 41 | MultiByte 42 | 43 | 44 | DynamicLibrary 45 | false 46 | true 47 | MultiByte 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | true 68 | 69 | 70 | true 71 | 72 | 73 | false 74 | 75 | 76 | false 77 | 78 | 79 | 80 | NotUsing 81 | Level3 82 | Disabled 83 | WIN32;_DEBUG;_WINDOWS;_USRDLL;HDRTOOLS_EXPORTS;%(PreprocessorDefinitions) 84 | 85 | 86 | Windows 87 | true 88 | 89 | 90 | 91 | 92 | NotUsing 93 | Level3 94 | Disabled 95 | WIN32;_DEBUG;_WINDOWS;_USRDLL;HDRTOOLS_EXPORTS;%(PreprocessorDefinitions) 96 | 97 | 98 | Windows 99 | true 100 | 101 | 102 | 103 | 104 | Level3 105 | NotUsing 106 | MaxSpeed 107 | true 108 | true 109 | WIN32;NDEBUG;_WINDOWS;_USRDLL;HDRTOOLS_EXPORTS;%(PreprocessorDefinitions) 110 | AnySuitable 111 | Speed 112 | true 113 | true 114 | true 115 | false 116 | true 117 | true 118 | 119 | 120 | Windows 121 | false 122 | true 123 | true 124 | 125 | 126 | 127 | 128 | Level3 129 | NotUsing 130 | MaxSpeed 131 | true 132 | true 133 | WIN32;NDEBUG;_WINDOWS;_USRDLL;HDRTOOLS_EXPORTS;%(PreprocessorDefinitions) 134 | AnySuitable 135 | Speed 136 | true 137 | true 138 | true 139 | false 140 | true 141 | true 142 | 143 | 144 | Windows 145 | false 146 | true 147 | true 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | false 172 | false 173 | true 174 | true 175 | 176 | 177 | true 178 | true 179 | false 180 | false 181 | 182 | 183 | false 184 | false 185 | true 186 | true 187 | true 188 | true 189 | 190 | 191 | true 192 | true 193 | false 194 | false 195 | true 196 | true 197 | 198 | 199 | true 200 | false 201 | true 202 | false 203 | true 204 | true 205 | true 206 | true 207 | 208 | 209 | true 210 | false 211 | true 212 | false 213 | true 214 | true 215 | true 216 | true 217 | 218 | 219 | true 220 | false 221 | true 222 | false 223 | true 224 | true 225 | true 226 | true 227 | 228 | 229 | true 230 | false 231 | true 232 | false 233 | true 234 | true 235 | true 236 | true 237 | 238 | 239 | false 240 | false 241 | true 242 | true 243 | 244 | 245 | false 246 | false 247 | true 248 | true 249 | 250 | 251 | 252 | 253 | 254 | 255 | -------------------------------------------------------------------------------- /HDRTools/HDRTools_AVX2_asm.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; HDRTools() 3 | ; 4 | ; Several functions for working on HDR data, and linear to non-linear convertions. 5 | ; Copyright (C) 2018 JPSDR 6 | ; 7 | ; HDRTools is free software; you can redistribute it and/or modify 8 | ; it under the terms of the GNU General Public License as published by 9 | ; the Free Software Foundation; either version 2, or (at your option) 10 | ; any later version. 11 | ; 12 | ; HDRTools is distributed in the hope that it will be useful, 13 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | ; GNU General Public License for more details. 16 | ; 17 | ; You should have received a copy of the GNU General Public License 18 | ; along with GNU Make; see the file COPYING. If not, write to 19 | ; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | ; 21 | ; 22 | 23 | .xmm 24 | .model flat,c 25 | 26 | .data 27 | 28 | align 16 29 | 30 | data segment align(32) 31 | 32 | data_f_0 real4 8 dup(0.0) 33 | data_f_1 real4 8 dup(1.0) 34 | 35 | data_f_1048575 real4 8 dup(1048575.0) 36 | data_f_65535 real4 8 dup(65535.0) 37 | data_dw_1048575 dword 8 dup(1048575) 38 | data_dw_65535 dword 8 dup(65535) 39 | data_dw_0 dword 8 dup(0) 40 | 41 | data_w_128 word 16 dup(128) 42 | data_w_32 word 16 dup(32) 43 | data_w_8 word 16 dup(8) 44 | 45 | .code 46 | 47 | 48 | ;*************************************************** 49 | ;** YUV to RGB functions ** 50 | ;*************************************************** 51 | 52 | 53 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword 54 | 55 | public JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 56 | 57 | push esi 58 | push edi 59 | push ebx 60 | 61 | vpcmpeqb ymm3,ymm3,ymm3 62 | 63 | mov edi,dst 64 | mov esi,src1 65 | mov edx,src2 66 | xor eax,eax 67 | mov ecx,w 68 | mov ebx,32 69 | 70 | Convert_Planar420_to_Planar422_8_AVX2_1: 71 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 72 | vmovdqa ymm1,YMMWORD ptr[edx+eax] 73 | vpxor ymm2,ymm0,ymm3 74 | vpxor ymm1,ymm1,ymm3 75 | vpavgb ymm2,ymm2,ymm1 76 | vpxor ymm2,ymm2,ymm3 77 | vpavgb ymm2,ymm2,ymm0 78 | 79 | vmovdqa YMMWORD ptr[edi+eax],ymm2 80 | add eax,ebx 81 | loop Convert_Planar420_to_Planar422_8_AVX2_1 82 | 83 | vzeroupper 84 | 85 | pop ebx 86 | pop edi 87 | pop esi 88 | 89 | ret 90 | 91 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 endp 92 | 93 | 94 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword 95 | 96 | public JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 97 | 98 | push esi 99 | push edi 100 | push ebx 101 | 102 | vpcmpeqb ymm3,ymm3,ymm3 103 | 104 | mov edi,dst 105 | mov esi,src1 106 | mov edx,src2 107 | xor eax,eax 108 | mov ecx,w 109 | mov ebx,32 110 | 111 | Convert_Planar420_to_Planar422_16_AVX2_1: 112 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 113 | vmovdqa ymm1,YMMWORD ptr[edx+eax] 114 | vpxor ymm2,ymm0,ymm3 115 | vpxor ymm1,ymm1,ymm3 116 | vpavgw ymm2,ymm2,ymm1 117 | vpxor ymm2,ymm2,ymm3 118 | vpavgw ymm2,ymm2,ymm0 119 | 120 | vmovdqa YMMWORD ptr[edi+eax],ymm2 121 | add eax,ebx 122 | loop Convert_Planar420_to_Planar422_16_AVX2_1 123 | 124 | vzeroupper 125 | 126 | pop ebx 127 | pop edi 128 | pop esi 129 | 130 | ret 131 | 132 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 endp 133 | 134 | 135 | ;*************************************************** 136 | ;** RGB to YUV functions ** 137 | ;*************************************************** 138 | 139 | 140 | JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2 proc src_R:dword,src_G:dword,src_B:dword,dst:dword,w:dword,h:dword,lookup:dword, 141 | src_modulo_R:dword,src_modulo_G:dword,src_modulo_B:dword,dst_modulo:dword 142 | 143 | public JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2 144 | 145 | push esi 146 | push edi 147 | push ebx 148 | 149 | vmovaps ymm3,YMMWORD ptr data_f_1048575 150 | vmovaps ymm4,YMMWORD ptr data_f_0 151 | vmovaps ymm5,YMMWORD ptr data_f_1 152 | 153 | cld 154 | mov edi,dst 155 | mov ebx,lookup 156 | 157 | Convert_LinearRGBPStoRGB64_AVX2_1: 158 | mov ecx,w 159 | Convert_LinearRGBPStoRGB64_AVX2_2: 160 | mov esi,src_B 161 | xor edx,edx 162 | vmaxps ymm0,ymm4,YMMWORD ptr[esi] 163 | mov esi,src_G 164 | vminps ymm0,ymm0,ymm5 165 | vmaxps ymm1,ymm4,YMMWORD ptr[esi] 166 | mov esi,src_R 167 | vminps ymm1,ymm1,ymm5 168 | vmaxps ymm2,ymm4,YMMWORD ptr[esi] 169 | vmulps ymm0,ymm0,ymm3 170 | vminps ymm2,ymm2,ymm5 171 | 172 | vmulps ymm1,ymm1,ymm3 173 | vmulps ymm2,ymm2,ymm3 174 | vcvtps2dq ymm0,ymm0 175 | vcvtps2dq ymm1,ymm1 176 | vcvtps2dq ymm2,ymm2 177 | 178 | vpextrd eax,xmm0,0 179 | mov ax,word ptr[ebx+2*eax] 180 | stosw 181 | vpextrd eax,xmm1,0 182 | mov ax,word ptr[ebx+2*eax] 183 | stosw 184 | vpextrd eax,xmm2,0 185 | mov ax,word ptr[ebx+2*eax] 186 | stosw 187 | xor eax,eax 188 | stosw 189 | dec ecx 190 | jz Convert_LinearRGBPStoRGB64_AVX2_3 191 | inc edx 192 | 193 | vpextrd eax,xmm0,1 194 | mov ax,word ptr[ebx+2*eax] 195 | stosw 196 | vpextrd eax,xmm1,1 197 | mov ax,word ptr[ebx+2*eax] 198 | stosw 199 | vpextrd eax,xmm2,1 200 | mov ax,word ptr[ebx+2*eax] 201 | stosw 202 | xor eax,eax 203 | stosw 204 | dec ecx 205 | jz Convert_LinearRGBPStoRGB64_AVX2_3 206 | inc edx 207 | 208 | vpextrd eax,xmm0,2 209 | mov ax,word ptr[ebx+2*eax] 210 | stosw 211 | vpextrd eax,xmm1,2 212 | mov ax,word ptr[ebx+2*eax] 213 | stosw 214 | vpextrd eax,xmm2,2 215 | mov ax,word ptr[ebx+2*eax] 216 | stosw 217 | xor eax,eax 218 | stosw 219 | dec ecx 220 | jz Convert_LinearRGBPStoRGB64_AVX2_3 221 | inc edx 222 | 223 | vpextrd eax,xmm0,3 224 | mov ax,word ptr[ebx+2*eax] 225 | stosw 226 | vpextrd eax,xmm1,3 227 | mov ax,word ptr[ebx+2*eax] 228 | stosw 229 | vpextrd eax,xmm2,3 230 | mov ax,word ptr[ebx+2*eax] 231 | stosw 232 | xor eax,eax 233 | stosw 234 | dec ecx 235 | jz Convert_LinearRGBPStoRGB64_AVX2_3 236 | inc edx 237 | 238 | vextracti128 xmm0,ymm0,1 239 | vextracti128 xmm1,ymm1,1 240 | vextracti128 xmm2,ymm2,1 241 | 242 | vpextrd eax,xmm0,0 243 | mov ax,word ptr[ebx+2*eax] 244 | stosw 245 | vpextrd eax,xmm1,0 246 | mov ax,word ptr[ebx+2*eax] 247 | stosw 248 | vpextrd eax,xmm2,0 249 | mov ax,word ptr[ebx+2*eax] 250 | stosw 251 | xor eax,eax 252 | stosw 253 | dec ecx 254 | jz Convert_LinearRGBPStoRGB64_AVX2_3 255 | inc edx 256 | 257 | vpextrd eax,xmm0,1 258 | mov ax,word ptr[ebx+2*eax] 259 | stosw 260 | vpextrd eax,xmm1,1 261 | mov ax,word ptr[ebx+2*eax] 262 | stosw 263 | vpextrd eax,xmm2,1 264 | mov ax,word ptr[ebx+2*eax] 265 | stosw 266 | xor eax,eax 267 | stosw 268 | dec ecx 269 | jz short Convert_LinearRGBPStoRGB64_AVX2_3 270 | inc edx 271 | 272 | vpextrd eax,xmm0,2 273 | mov ax,word ptr[ebx+2*eax] 274 | stosw 275 | vpextrd eax,xmm1,2 276 | mov ax,word ptr[ebx+2*eax] 277 | stosw 278 | vpextrd eax,xmm2,2 279 | mov ax,word ptr[ebx+2*eax] 280 | stosw 281 | xor eax,eax 282 | stosw 283 | dec ecx 284 | jz short Convert_LinearRGBPStoRGB64_AVX2_3 285 | inc edx 286 | 287 | vpextrd eax,xmm0,3 288 | mov ax,word ptr[ebx+2*eax] 289 | stosw 290 | vpextrd eax,xmm1,3 291 | mov ax,word ptr[ebx+2*eax] 292 | stosw 293 | vpextrd eax,xmm2,3 294 | mov ax,word ptr[ebx+2*eax] 295 | stosw 296 | xor eax,eax 297 | stosw 298 | dec ecx 299 | 300 | Convert_LinearRGBPStoRGB64_AVX2_3: 301 | inc edx 302 | shl edx,2 303 | add src_B,edx 304 | add src_G,edx 305 | add src_R,edx 306 | or ecx,ecx 307 | jnz Convert_LinearRGBPStoRGB64_AVX2_2 308 | 309 | mov eax,dst_modulo 310 | mov edx,src_modulo_B 311 | add edi,eax 312 | add src_B,edx 313 | mov eax,src_modulo_G 314 | mov edx,src_modulo_R 315 | add src_G,eax 316 | add src_R,edx 317 | dec h 318 | jnz Convert_LinearRGBPStoRGB64_AVX2_1 319 | 320 | vzeroupper 321 | 322 | pop ebx 323 | pop edi 324 | pop esi 325 | 326 | ret 327 | 328 | JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2 endp 329 | 330 | 331 | JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2 proc src_R:dword,src_G:dword,src_B:dword,dst:dword,w:dword,h:dword, 332 | src_pitch_R:dword,src_pitch_G:dword,src_pitch_B:dword,dst_pitch:dword 333 | 334 | public JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2 335 | 336 | push esi 337 | push edi 338 | push ebx 339 | 340 | vmovaps ymm3,YMMWORD ptr data_f_65535 341 | vpxor xmm4,xmm4,xmm4 342 | 343 | mov esi,src_B 344 | mov ebx,src_G 345 | mov edx,src_R 346 | mov edi,dst 347 | 348 | Convert_RGBPStoRGB64_AVX2_1: 349 | mov ecx,w 350 | xor eax,eax 351 | shr ecx,3 352 | jz Convert_RGBPStoRGB64_AVX2_3 353 | 354 | Convert_RGBPStoRGB64_AVX2_2: 355 | vmulps ymm0,ymm3,YMMWORD ptr[esi+4*eax] 356 | vmulps ymm1,ymm3,YMMWORD ptr[edx+4*eax] 357 | vmulps ymm2,ymm3,YMMWORD ptr[ebx+4*eax] 358 | vcvtps2dq ymm0,ymm0 359 | vcvtps2dq ymm1,ymm1 360 | vcvtps2dq ymm2,ymm2 361 | 362 | vextracti128 xmm5,ymm0,1 363 | vextracti128 xmm6,ymm1,1 364 | vextracti128 xmm7,ymm2,1 365 | 366 | packusdw xmm0,xmm0 ;0000B4B3B2B1 367 | packusdw xmm1,xmm1 ;0000R4R3R2R1 368 | packusdw xmm2,xmm2 ;0000G4G3G2G1 369 | 370 | vpunpcklwd xmm0,xmm0,xmm1 ;R4B4R3B3R2B2R1B1 371 | vpunpcklwd xmm2,xmm2,xmm4 ;0G40G30G20G1 372 | vpunpckhwd xmm1,xmm0,xmm2 ;0R4G4B40R3G3B3 373 | vpunpcklwd xmm0,xmm0,xmm2 ;0R2G2B20R1G1B1 374 | 375 | packusdw xmm5,xmm5 ;0000B8B7B6B5 376 | packusdw xmm6,xmm6 ;0000R8R7R6R5 377 | packusdw xmm7,xmm7 ;0000G8G7G6G5 378 | 379 | vpunpcklwd xmm5,xmm5,xmm6 ;R8B8R7B7R6B6R5B5 380 | vpunpcklwd xmm7,xmm7,xmm4 ;0G80G70G60G5 381 | vpunpckhwd xmm6,xmm5,xmm7 ;0R8G8B80R7G7B7 382 | vpunpcklwd xmm5,xmm5,xmm7 ;0R6G6B60R5G5B5 383 | 384 | vmovdqa XMMWORD ptr[edi+8*eax],xmm0 385 | vmovdqa XMMWORD ptr[edi+8*eax+16],xmm1 386 | vmovdqa XMMWORD ptr[edi+8*eax+32],xmm5 387 | vmovdqa XMMWORD ptr[edi+8*eax+48],xmm6 388 | add eax,8 389 | dec ecx 390 | jnz Convert_RGBPStoRGB64_AVX2_2 391 | 392 | Convert_RGBPStoRGB64_AVX2_3: 393 | mov ecx,w 394 | and ecx,7 395 | jz Convert_RGBPStoRGB64_AVX2_7 396 | 397 | vmulps ymm0,ymm3,YMMWORD ptr[esi+4*eax] 398 | vmulps ymm1,ymm3,YMMWORD ptr[edx+4*eax] 399 | vmulps ymm2,ymm3,YMMWORD ptr[ebx+4*eax] 400 | vcvtps2dq ymm0,ymm0 401 | vcvtps2dq ymm1,ymm1 402 | vcvtps2dq ymm2,ymm2 403 | 404 | vextracti128 xmm5,ymm0,1 405 | vextracti128 xmm6,ymm1,1 406 | vextracti128 xmm7,ymm2,1 407 | 408 | packusdw xmm0,xmm0 ;0000B4B3B2B1 409 | packusdw xmm1,xmm1 ;0000R4R3R2R1 410 | packusdw xmm2,xmm2 ;0000G4G3G2G1 411 | 412 | vpunpcklwd xmm0,xmm0,xmm1 ;R4B4R3B3R2B2R1B1 413 | vpunpcklwd xmm2,xmm2,xmm4 ;0G40G30G20G1 414 | vpunpckhwd xmm1,xmm0,xmm2 ;0R4G4B40R3G3B3 415 | vpunpcklwd xmm0,xmm0,xmm2 ;0R2G2B20R1G1B1 416 | 417 | packusdw xmm5,xmm5 ;0000B8B7B6B5 418 | packusdw xmm6,xmm6 ;0000R8R7R6R5 419 | packusdw xmm7,xmm7 ;0000G8G7G6G5 420 | 421 | vpunpcklwd xmm5,xmm5,xmm6 ;R8B8R7B7R6B6R5B5 422 | vpunpcklwd xmm7,xmm7,xmm4 ;0G80G70G60G5 423 | vpunpckhwd xmm6,xmm5,xmm7 ;0R8G8B80R7G7B7 424 | vpunpcklwd xmm5,xmm5,xmm7 ;0R6G6B60R5G5B5 425 | 426 | test ecx,4 427 | jnz short Convert_RGBPStoRGB64_AVX2_5 428 | test ecx,2 429 | jnz short Convert_RGBPStoRGB64_AVX2_4 430 | vmovq qword ptr[edi+8*eax],xmm0 431 | jmp short Convert_RGBPStoRGB64_AVX2_7 432 | 433 | Convert_RGBPStoRGB64_AVX2_4: 434 | vmovdqa XMMWORD ptr[edi+8*eax],xmm0 435 | test ecx,1 436 | jz short Convert_RGBPStoRGB64_AVX2_7 437 | vmovq qword ptr[edi+8*eax+16],xmm1 438 | jmp short Convert_RGBPStoRGB64_AVX2_7 439 | 440 | Convert_RGBPStoRGB64_AVX2_5: 441 | vmovdqa XMMWORD ptr[edi+8*eax],xmm0 442 | vmovdqa XMMWORD ptr[edi+8*eax+16],xmm1 443 | test ecx,2 444 | jnz short Convert_RGBPStoRGB64_AVX2_6 445 | test ecx,1 446 | jz short Convert_RGBPStoRGB64_AVX2_7 447 | vmovq qword ptr[edi+8*eax+32],xmm5 448 | jmp short Convert_RGBPStoRGB64_AVX2_7 449 | 450 | Convert_RGBPStoRGB64_AVX2_6: 451 | vmovdqa XMMWORD ptr[edi+8*eax+32],xmm5 452 | test ecx,1 453 | jz short Convert_RGBPStoRGB64_AVX2_7 454 | vmovq qword ptr[edi+8*eax+48],xmm6 455 | 456 | Convert_RGBPStoRGB64_AVX2_7: 457 | add esi,src_pitch_B 458 | add ebx,src_pitch_G 459 | add edx,src_pitch_R 460 | add edi,dst_pitch 461 | dec h 462 | jnz Convert_RGBPStoRGB64_AVX2_1 463 | 464 | vzeroupper 465 | 466 | pop ebx 467 | pop edi 468 | pop esi 469 | 470 | ret 471 | 472 | JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2 endp 473 | 474 | 475 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword,h:dword,src_pitch2:dword,dst_pitch:dword 476 | 477 | public JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 478 | 479 | push esi 480 | push edi 481 | push ebx 482 | 483 | mov edi,dst 484 | mov esi,src1 485 | mov edx,src2 486 | mov ebx,32 487 | 488 | Convert_Planar422_to_Planar420_8_AVX2_1: 489 | xor eax,eax 490 | mov ecx,w32 491 | 492 | Convert_Planar422_to_Planar420_8_AVX2_2: 493 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 494 | vpavgb ymm0,ymm0,YMMWORD ptr[edx+eax] 495 | 496 | vmovdqa YMMWORD ptr[edi+eax],ymm0 497 | add eax,ebx 498 | loop Convert_Planar422_to_Planar420_8_AVX2_2 499 | 500 | add esi,src_pitch2 501 | add edx,src_pitch2 502 | add edi,dst_pitch 503 | dec h 504 | jnz short Convert_Planar422_to_Planar420_8_AVX2_1 505 | 506 | vzeroupper 507 | 508 | pop ebx 509 | pop edi 510 | pop esi 511 | 512 | ret 513 | 514 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 endp 515 | 516 | 517 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword,h:dword,src_pitch2:dword,dst_pitch:dword 518 | 519 | public JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 520 | 521 | push esi 522 | push edi 523 | push ebx 524 | 525 | mov edi,dst 526 | mov esi,src1 527 | mov edx,src2 528 | mov ebx,32 529 | 530 | Convert_Planar422_to_Planar420_16_AVX2_1: 531 | xor eax,eax 532 | mov ecx,w16 533 | 534 | Convert_Planar422_to_Planar420_16_AVX2_2: 535 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 536 | vpavgw ymm0,ymm0,YMMWORD ptr[edx+eax] 537 | 538 | vmovdqa YMMWORD ptr[edi+eax],ymm0 539 | add eax,ebx 540 | loop Convert_Planar422_to_Planar420_16_AVX2_2 541 | 542 | add esi,src_pitch2 543 | add edx,src_pitch2 544 | add edi,dst_pitch 545 | dec h 546 | jnz short Convert_Planar422_to_Planar420_16_AVX2_1 547 | 548 | vzeroupper 549 | 550 | pop ebx 551 | pop edi 552 | pop esi 553 | 554 | ret 555 | 556 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 endp 557 | 558 | 559 | ;*************************************************** 560 | ;** XYZ/RGB functions ** 561 | ;*************************************************** 562 | 563 | 564 | ;*************************************************** 565 | ;** HLG functions ** 566 | ;*************************************************** 567 | 568 | 569 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 570 | src_pitch:dword,dst_pitch:dword 571 | 572 | public JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 573 | 574 | push esi 575 | push edi 576 | push ebx 577 | 578 | vmovdqa ymm4,YMMWORD ptr data_w_128 579 | 580 | mov esi,src 581 | mov edi,dst 582 | mov ebx,w 583 | shr ebx,2 584 | mov edx,128 585 | 586 | Convert_RGB64_16toRGB64_8_AVX2_loop_1: 587 | mov ecx,ebx 588 | xor eax,eax 589 | 590 | shr ecx,2 591 | jz short Convert_RGB64_16toRGB64_8_AVX2_3 592 | 593 | Convert_RGB64_16toRGB64_8_AVX2_loop_2: 594 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax] 595 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32] 596 | vpaddusw ymm2,ymm4,YMMWORD ptr[esi+eax+64] 597 | vpaddusw ymm3,ymm4,YMMWORD ptr[esi+eax+96] 598 | vpsrlw ymm0,ymm0,8 599 | vpsrlw ymm1,ymm1,8 600 | vpsrlw ymm2,ymm2,8 601 | vpsrlw ymm3,ymm3,8 602 | vmovdqa YMMWORD ptr[edi+eax],ymm0 603 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1 604 | vmovdqa YMMWORD ptr[edi+eax+64],ymm2 605 | vmovdqa YMMWORD ptr[edi+eax+96],ymm3 606 | add eax,edx 607 | loop Convert_RGB64_16toRGB64_8_AVX2_loop_2 608 | 609 | Convert_RGB64_16toRGB64_8_AVX2_3: 610 | test ebx,2 611 | jz short Convert_RGB64_16toRGB64_8_AVX2_4 612 | 613 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax] 614 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32] 615 | vpsrlw ymm0,ymm0,8 616 | vpsrlw ymm1,ymm1,8 617 | vmovdqa YMMWORD ptr[edi+eax],ymm0 618 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1 619 | add eax,64 620 | 621 | Convert_RGB64_16toRGB64_8_AVX2_4: 622 | test ebx,1 623 | jz short Convert_RGB64_16toRGB64_8_AVX2_5 624 | 625 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax] 626 | vpsrlw ymm0,ymm0,8 627 | vmovdqa YMMWORD ptr[edi+eax],ymm0 628 | add eax,32 629 | 630 | Convert_RGB64_16toRGB64_8_AVX2_5: 631 | test w,2 632 | jz short Convert_RGB64_16toRGB64_8_AVX2_6 633 | 634 | vpaddusw xmm0,xmm4,XMMWORD ptr[esi+eax] 635 | vpsrlw xmm0,xmm0,8 636 | vmovdqa XMMWORD ptr[edi+eax],xmm0 637 | 638 | add eax,16 639 | 640 | Convert_RGB64_16toRGB64_8_AVX2_6: 641 | test w,1 642 | jz short Convert_RGB64_16toRGB64_8_AVX2_7 643 | 644 | vmovq xmm0,qword ptr[esi+eax] 645 | vpaddusw xmm0,xmm0,xmm4 646 | vpsrlw xmm0,xmm0,8 647 | vmovq qword ptr[edi+eax],xmm0 648 | 649 | Convert_RGB64_16toRGB64_8_AVX2_7: 650 | add esi,src_pitch 651 | add edi,dst_pitch 652 | dec h 653 | jnz Convert_RGB64_16toRGB64_8_AVX2_loop_1 654 | 655 | vzeroupper 656 | 657 | pop ebx 658 | pop edi 659 | pop esi 660 | 661 | ret 662 | 663 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 endp 664 | 665 | 666 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 667 | src_pitch:dword,dst_pitch:dword 668 | 669 | public JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 670 | 671 | push esi 672 | push edi 673 | push ebx 674 | 675 | vmovdqa ymm4,YMMWORD ptr data_w_32 676 | 677 | mov esi,src 678 | mov edi,dst 679 | mov ebx,w 680 | shr ebx,2 681 | mov edx,128 682 | 683 | Convert_RGB64_16toRGB64_10_AVX2_loop_1: 684 | mov ecx,ebx 685 | xor eax,eax 686 | 687 | shr ecx,2 688 | jz short Convert_RGB64_16toRGB64_10_AVX2_3 689 | 690 | Convert_RGB64_16toRGB64_10_AVX2_loop_2: 691 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax] 692 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32] 693 | vpaddusw ymm2,ymm4,YMMWORD ptr[esi+eax+64] 694 | vpaddusw ymm3,ymm4,YMMWORD ptr[esi+eax+96] 695 | vpsrlw ymm0,ymm0,6 696 | vpsrlw ymm1,ymm1,6 697 | vpsrlw ymm2,ymm2,6 698 | vpsrlw ymm3,ymm3,6 699 | vmovdqa YMMWORD ptr[edi+eax],ymm0 700 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1 701 | vmovdqa YMMWORD ptr[edi+eax+64],ymm2 702 | vmovdqa YMMWORD ptr[edi+eax+96],ymm3 703 | add eax,edx 704 | loop Convert_RGB64_16toRGB64_10_AVX2_loop_2 705 | 706 | Convert_RGB64_16toRGB64_10_AVX2_3: 707 | test ebx,2 708 | jz short Convert_RGB64_16toRGB64_10_AVX2_4 709 | 710 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax] 711 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32] 712 | vpsrlw ymm0,ymm0,6 713 | vpsrlw ymm1,ymm1,6 714 | vmovdqa YMMWORD ptr[edi+eax],ymm0 715 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1 716 | add eax,64 717 | 718 | Convert_RGB64_16toRGB64_10_AVX2_4: 719 | test ebx,1 720 | jz short Convert_RGB64_16toRGB64_10_AVX2_5 721 | 722 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax] 723 | vpsrlw ymm0,ymm0,6 724 | vmovdqa YMMWORD ptr[edi+eax],ymm0 725 | add eax,32 726 | 727 | Convert_RGB64_16toRGB64_10_AVX2_5: 728 | test w,2 729 | jz short Convert_RGB64_16toRGB64_10_AVX2_6 730 | 731 | vpaddusw xmm0,xmm4,XMMWORD ptr[esi+eax] 732 | vpsrlw xmm0,xmm0,6 733 | vmovdqa XMMWORD ptr[edi+eax],xmm0 734 | 735 | add eax,16 736 | 737 | Convert_RGB64_16toRGB64_10_AVX2_6: 738 | test w,1 739 | jz short Convert_RGB64_16toRGB64_10_AVX2_7 740 | 741 | vmovq xmm0,qword ptr[esi+eax] 742 | vpaddusw xmm0,xmm0,xmm4 743 | vpsrlw xmm0,xmm0,6 744 | vmovq qword ptr[edi+eax],xmm0 745 | 746 | Convert_RGB64_16toRGB64_10_AVX2_7: 747 | add esi,src_pitch 748 | add edi,dst_pitch 749 | dec h 750 | jnz Convert_RGB64_16toRGB64_10_AVX2_loop_1 751 | 752 | vzeroupper 753 | 754 | pop ebx 755 | pop edi 756 | pop esi 757 | 758 | ret 759 | 760 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 endp 761 | 762 | 763 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 764 | src_pitch:dword,dst_pitch:dword 765 | 766 | public JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 767 | 768 | push esi 769 | push edi 770 | push ebx 771 | 772 | vmovdqa ymm4,YMMWORD ptr data_w_8 773 | 774 | mov esi,src 775 | mov edi,dst 776 | mov ebx,w 777 | shr ebx,2 778 | mov edx,128 779 | 780 | Convert_RGB64_16toRGB64_12_AVX2_loop_1: 781 | mov ecx,ebx 782 | xor eax,eax 783 | 784 | shr ecx,2 785 | jz short Convert_RGB64_16toRGB64_12_AVX2_3 786 | 787 | Convert_RGB64_16toRGB64_12_AVX2_loop_2: 788 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax] 789 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32] 790 | vpaddusw ymm2,ymm4,YMMWORD ptr[esi+eax+64] 791 | vpaddusw ymm3,ymm4,YMMWORD ptr[esi+eax+96] 792 | vpsrlw ymm0,ymm0,4 793 | vpsrlw ymm1,ymm1,4 794 | vpsrlw ymm2,ymm2,4 795 | vpsrlw ymm3,ymm3,4 796 | vmovdqa YMMWORD ptr[edi+eax],ymm0 797 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1 798 | vmovdqa YMMWORD ptr[edi+eax+64],ymm2 799 | vmovdqa YMMWORD ptr[edi+eax+96],ymm3 800 | add eax,edx 801 | loop Convert_RGB64_16toRGB64_12_AVX2_loop_2 802 | 803 | Convert_RGB64_16toRGB64_12_AVX2_3: 804 | test ebx,2 805 | jz short Convert_RGB64_16toRGB64_12_AVX2_4 806 | 807 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax] 808 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32] 809 | vpsrlw ymm0,ymm0,4 810 | vpsrlw ymm1,ymm1,4 811 | vmovdqa YMMWORD ptr[edi+eax],ymm0 812 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1 813 | add eax,64 814 | 815 | Convert_RGB64_16toRGB64_12_AVX2_4: 816 | test ebx,1 817 | jz short Convert_RGB64_16toRGB64_12_AVX2_5 818 | 819 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax] 820 | vpsrlw ymm0,ymm0,4 821 | vmovdqa YMMWORD ptr[edi+eax],ymm0 822 | add eax,32 823 | 824 | Convert_RGB64_16toRGB64_12_AVX2_5: 825 | test w,2 826 | jz short Convert_RGB64_16toRGB64_12_AVX2_6 827 | 828 | vpaddusw xmm0,xmm4,XMMWORD ptr[esi+eax] 829 | vpsrlw xmm0,xmm0,4 830 | vmovdqa XMMWORD ptr[edi+eax],xmm0 831 | 832 | add eax,16 833 | 834 | Convert_RGB64_16toRGB64_12_AVX2_6: 835 | test w,1 836 | jz short Convert_RGB64_16toRGB64_12_AVX2_7 837 | 838 | vmovq xmm0,qword ptr[esi+eax] 839 | vpaddusw xmm0,xmm0,xmm4 840 | vpsrlw xmm0,xmm0,4 841 | vmovq qword ptr[edi+eax],xmm0 842 | 843 | Convert_RGB64_16toRGB64_12_AVX2_7: 844 | add esi,src_pitch 845 | add edi,dst_pitch 846 | dec h 847 | jnz Convert_RGB64_16toRGB64_12_AVX2_loop_1 848 | 849 | vzeroupper 850 | 851 | pop ebx 852 | pop edi 853 | pop esi 854 | 855 | ret 856 | 857 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 endp 858 | 859 | 860 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc dst:dword,srcY:dword,w:dword,h:dword,dst_pitch:dword,src_pitchY:dword 861 | 862 | public JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 863 | 864 | push esi 865 | push edi 866 | push ebx 867 | 868 | mov ebx,w 869 | shr ebx,1 870 | mov esi,srcY 871 | mov edi,dst 872 | mov edx,8 873 | vpxor xmm4,xmm4,xmm4 874 | 875 | Convert_16_RGB64_HLG_OOTF_AVX2_1: 876 | mov ecx,ebx 877 | xor eax,eax 878 | or ecx,ecx 879 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_3 880 | 881 | Convert_16_RGB64_HLG_OOTF_AVX2_2: 882 | vbroadcastss xmm0,dword ptr[esi+eax] 883 | vbroadcastss xmm1,dword ptr[esi+eax+4] 884 | vmovdqa xmm2,XMMWORD ptr[edi+2*eax] 885 | vinsertf128 ymm0,ymm0,xmm1,1 886 | vpunpckhwd xmm3,xmm2,xmm4 887 | vpunpcklwd xmm2,xmm2,xmm4 888 | vinserti128 ymm2,ymm2,xmm3,1 889 | vcvtdq2ps ymm2,ymm2 890 | vmulps ymm2,ymm2,ymm0 891 | vcvtps2dq ymm2,ymm2 892 | vextracti128 xmm3,ymm2,1 893 | vpackusdw xmm2,xmm2,xmm3 894 | vmovdqa XMMWORD ptr[edi+2*eax],xmm2 895 | 896 | add eax,edx 897 | loop Convert_16_RGB64_HLG_OOTF_AVX2_2 898 | 899 | Convert_16_RGB64_HLG_OOTF_AVX2_3: 900 | test w,1 901 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_4 902 | 903 | vbroadcastss xmm0,dword ptr[esi+eax] 904 | vmovq xmm2,qword ptr[edi+2*eax] 905 | vpunpcklwd xmm2,xmm2,xmm4 906 | vcvtdq2ps xmm2,xmm2 907 | vmulps xmm2,xmm2,xmm0 908 | vcvtps2dq xmm2,xmm2 909 | vpackusdw xmm2,xmm2,xmm2 910 | vmovq qword ptr[edi+2*eax],xmm2 911 | 912 | Convert_16_RGB64_HLG_OOTF_AVX2_4: 913 | add edi,dst_pitch 914 | add esi,src_pitchY 915 | dec h 916 | jnz Convert_16_RGB64_HLG_OOTF_AVX2_1 917 | 918 | vzeroupper 919 | 920 | pop ebx 921 | pop edi 922 | pop esi 923 | 924 | ret 925 | 926 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 endp 927 | 928 | 929 | ;*************************************************** 930 | ;** XYZ/HDR/SDR functions ** 931 | ;*************************************************** 932 | 933 | 934 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword, 935 | ValMin:dword,Coeff:dword 936 | 937 | public JPSDR_HDRTools_Scale_20_XYZ_AVX2 938 | 939 | push esi 940 | push edi 941 | push ebx 942 | 943 | mov esi,ValMin 944 | vbroadcastss ymm1,dword ptr[esi] 945 | mov esi,Coeff 946 | vbroadcastss ymm2,dword ptr[esi] 947 | 948 | vmovdqa ymm3,YMMWORD ptr data_dw_1048575 949 | vmovdqa ymm4,YMMWORD ptr data_dw_0 950 | vmulps ymm2,ymm2,YMMWORD ptr data_f_1048575 951 | 952 | mov esi,src 953 | mov edi,dst 954 | mov ebx,w8 955 | mov edx,32 956 | 957 | Scale_20_XYZ_AVX2_1: 958 | xor eax,eax 959 | mov ecx,ebx 960 | Scale_20_XYZ_AVX2_2: 961 | vaddps ymm0,ymm1,YMMWORD ptr[esi+eax] 962 | vmulps ymm0,ymm0,ymm2 963 | vcvtps2dq ymm0,ymm0 964 | vpminsd ymm0,ymm0,ymm3 965 | vpmaxsd ymm0,ymm0,ymm4 966 | vmovdqa YMMWORD ptr[edi+eax],ymm0 967 | 968 | add eax,edx 969 | loop Scale_20_XYZ_AVX2_2 970 | 971 | add esi,src_pitch 972 | add edi,dst_pitch 973 | dec h 974 | jnz short Scale_20_XYZ_AVX2_1 975 | 976 | vzeroupper 977 | 978 | pop ebx 979 | pop edi 980 | pop esi 981 | 982 | ret 983 | 984 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 endp 985 | 986 | 987 | JPSDR_HDRTools_Scale_20_RGB_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword 988 | 989 | public JPSDR_HDRTools_Scale_20_RGB_AVX2 990 | 991 | push esi 992 | push edi 993 | push ebx 994 | 995 | vmovaps ymm1,YMMWORD ptr data_f_1048575 996 | vmovdqa ymm2,YMMWORD ptr data_dw_1048575 997 | vmovdqa ymm3,YMMWORD ptr data_dw_0 998 | 999 | mov esi,src 1000 | mov edi,dst 1001 | mov ebx,w8 1002 | mov edx,32 1003 | 1004 | Scale_20_RGB_AVX2_1: 1005 | xor eax,eax 1006 | mov ecx,ebx 1007 | Scale_20_RGB_AVX2_2: 1008 | vmulps ymm0,ymm1,YMMWORD ptr[esi+eax] 1009 | vcvtps2dq ymm0,ymm0 1010 | vpminsd ymm0,ymm0,ymm2 1011 | vpmaxsd ymm0,ymm0,ymm3 1012 | vmovdqa YMMWORD ptr[edi+eax],ymm0 1013 | 1014 | add eax,edx 1015 | loop Scale_20_RGB_AVX2_2 1016 | 1017 | add esi,src_pitch 1018 | add edi,dst_pitch 1019 | dec h 1020 | jnz short Scale_20_RGB_AVX2_1 1021 | 1022 | vzeroupper 1023 | 1024 | pop ebx 1025 | pop edi 1026 | pop esi 1027 | 1028 | ret 1029 | 1030 | JPSDR_HDRTools_Scale_20_RGB_AVX2 endp 1031 | 1032 | 1033 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc src:dword,dst1:dword,dst2:dword,w8:dword,h:dword,src_pitch:dword, 1034 | dst_pitch1:dword,dst_pitch2:dword,ValMinX:dword,CoeffX:dword,ValMinZ:dword,CoeffZ:dword 1035 | 1036 | public JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 1037 | 1038 | push esi 1039 | push edi 1040 | push ebx 1041 | 1042 | mov esi,ValMinX 1043 | vbroadcastss ymm2,dword ptr[esi] 1044 | mov esi,CoeffX 1045 | vbroadcastss ymm3,dword ptr[esi] 1046 | mov esi,ValMinZ 1047 | vbroadcastss ymm4,dword ptr[esi] 1048 | mov esi,CoeffZ 1049 | vbroadcastss ymm5,dword ptr[esi] 1050 | 1051 | vmovdqa ymm6,YMMWORD ptr data_dw_65535 1052 | vmovdqa ymm7,YMMWORD ptr data_dw_0 1053 | vmulps ymm3,ymm3,YMMWORD ptr data_f_65535 1054 | vmulps ymm5,ymm5,YMMWORD ptr data_f_65535 1055 | 1056 | mov esi,src 1057 | mov edi,dst1 1058 | mov edx,dst2 1059 | mov ebx,32 1060 | 1061 | BT2446C_16_XYZ_AVX2_1: 1062 | xor eax,eax 1063 | mov ecx,w8 1064 | BT2446C_16_XYZ_AVX2_2: 1065 | vmovaps ymm0,YMMWORD ptr[edi+eax] 1066 | vmovaps ymm1,YMMWORD ptr[edx+eax] 1067 | vmulps ymm0,ymm0,YMMWORD ptr[esi+eax] 1068 | vmulps ymm1,ymm1,YMMWORD ptr[esi+eax] 1069 | vaddps ymm0,ymm0,ymm2 1070 | vaddps ymm1,ymm1,ymm4 1071 | vmulps ymm0,ymm0,ymm3 1072 | vmulps ymm1,ymm1,ymm5 1073 | vcvtps2dq ymm0,ymm0 1074 | vcvtps2dq ymm1,ymm1 1075 | vpminsd ymm0,ymm0,ymm6 1076 | vpminsd ymm1,ymm1,ymm6 1077 | vpmaxsd ymm0,ymm0,ymm7 1078 | vpmaxsd ymm1,ymm1,ymm7 1079 | vmovdqa YMMWORD ptr[edi+eax],ymm0 1080 | vmovdqa YMMWORD ptr[edx+eax],ymm1 1081 | 1082 | add eax,ebx 1083 | loop BT2446C_16_XYZ_AVX2_2 1084 | 1085 | add esi,src_pitch 1086 | add edi,dst_pitch1 1087 | add edx,dst_pitch2 1088 | dec h 1089 | jnz short BT2446C_16_XYZ_AVX2_1 1090 | 1091 | vzeroupper 1092 | 1093 | pop ebx 1094 | pop edi 1095 | pop esi 1096 | 1097 | ret 1098 | 1099 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 endp 1100 | 1101 | 1102 | end 1103 | 1104 | 1105 | 1106 | 1107 | 1108 | -------------------------------------------------------------------------------- /HDRTools/HDRTools.h: -------------------------------------------------------------------------------- 1 | /* 2 | * HDRTools() 3 | * 4 | * Several functions for working on HDR data, and linear to non-linear convertions. 5 | * Copyright (C) 2018 JPSDR 6 | * 7 | * HDRTools is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * HDRTools is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | #include "./avisynth.h" 24 | #include "./ThreadPoolInterface.h" 25 | 26 | #define HDRTOOLS_VERSION "HDRTools 1.2.3 JPSDR" 27 | 28 | 29 | typedef struct _dataLookUp 30 | { 31 | uint16_t Min_Y,Max_Y,Min_U,Max_U,Min_V,Max_V; 32 | int32_t Offset_Y,Offset_U,Offset_V,Offset_R,Offset_G,Offset_B; 33 | double Coeff_Y,Coeff_U,Coeff_V; 34 | } dataLookUp; 35 | 36 | typedef struct _MT_Data_Info_HDRTools 37 | { 38 | void *src1,*src2,*src3,*src4; 39 | void *dst1,*dst2,*dst3,*dst4; 40 | ptrdiff_t src_pitch1,src_pitch2,src_pitch3,src_pitch4; 41 | ptrdiff_t dst_pitch1,dst_pitch2,dst_pitch3,dst_pitch4; 42 | ptrdiff_t src_modulo1,src_modulo2,src_modulo3,src_modulo4; 43 | ptrdiff_t dst_modulo1,dst_modulo2,dst_modulo3,dst_modulo4; 44 | int32_t src_Y_h_min,src_Y_h_max,src_Y_w; 45 | int32_t src_UV_h_min,src_UV_h_max,src_UV_w; 46 | int32_t dst_Y_h_min,dst_Y_h_max,dst_Y_w; 47 | int32_t dst_UV_h_min,dst_UV_h_max,dst_UV_w; 48 | bool top,bottom; 49 | bool moveY8to16; 50 | } MT_Data_Info_HDRTools; 51 | 52 | 53 | class ConvertYUVtoLinearRGB : public GenericVideoFilter 54 | { 55 | public: 56 | ConvertYUVtoLinearRGB(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw, 57 | uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fullrange,bool _mpeg2c,uint8_t _threads, bool _sleep, 58 | bool negativePrefetch, IScriptEnvironment* env); 59 | virtual ~ConvertYUVtoLinearRGB(); 60 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 61 | 62 | int __stdcall SetCacheHints(int cachehints, int frame_range); 63 | 64 | private: 65 | uint8_t Color,OutputMode,HDRMode,HLGColor; 66 | bool OOTF,mpeg2c,fullrange,EOTF; 67 | bool sleep,HLG_Mode; 68 | double HLG_Lb,HLG_Lw; 69 | uint16_t *lookup_Upscale8; 70 | uint32_t *lookup_Upscale16,*lookup_8to16; 71 | int16_t *lookupRGB_8; 72 | int32_t *lookupRGB_16,*lookupHLG_RGB_16; 73 | uint8_t *lookupL_8; 74 | uint16_t *lookupL_16; 75 | float *lookupL_32; 76 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF; 77 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 78 | 79 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 80 | uint8_t pixelsize; // AVS16 81 | uint8_t bits_per_pixel; 82 | 83 | VideoInfo *vi_original,*vi_422,*vi_444,*vi_RGB64,*vi_PlaneY_HLG; 84 | 85 | dataLookUp dl; 86 | 87 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 88 | MT_Data_Info_HDRTools MT_Data[3][MAX_MT_THREADS]; 89 | uint8_t threads,threads_number[3],max_threads; 90 | uint32_t UserId; 91 | 92 | ThreadPoolFunction StaticThreadpoolF; 93 | 94 | static void StaticThreadpool(void *ptr); 95 | 96 | void FreeData(void); 97 | }; 98 | 99 | 100 | class ConvertYUVtoXYZ : public GenericVideoFilter 101 | { 102 | public: 103 | ConvertYUVtoXYZ(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw, 104 | double _Crosstalk,uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fullrange,bool _mpeg2c,float _Rx,float _Ry, 105 | float _Gx,float _Gy,float _Bx,float _By,float _Wx,float _Wy, 106 | uint8_t _threads, bool _sleep, bool negativePrefetch, IScriptEnvironment* env); 107 | virtual ~ConvertYUVtoXYZ(); 108 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 109 | 110 | int __stdcall SetCacheHints(int cachehints, int frame_range); 111 | 112 | private: 113 | uint8_t Color,OutputMode,HDRMode,HLGColor; 114 | bool OOTF,mpeg2c,fullrange,EOTF; 115 | float Rx,Ry,Gx,Gy,Bx,By,Wx,Wy; 116 | bool sleep,HLG_Mode; 117 | double HLG_Lb,HLG_Lw,Crosstalk; 118 | uint16_t *lookup_Upscale8; 119 | uint32_t *lookup_Upscale16,*lookup_8to16; 120 | int16_t *lookupRGB_8,*lookupXYZ_8,*lookupCrosstalk_8; 121 | int32_t *lookupRGB_16,*lookupXYZ_16,*lookupCrosstalk_16,*lookupHLG_RGB_16; 122 | uint8_t *lookupL_8; 123 | uint16_t *lookupL_16; 124 | float *lookupL_32; 125 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF; 126 | float Coeff_XYZ[9],*Coeff_XYZ_asm,Coeff_Crosstalk[9],*Coeff_Crosstalk_asm; 127 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 128 | 129 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 130 | uint8_t pixelsize; // AVS16 131 | uint8_t bits_per_pixel; 132 | 133 | VideoInfo *vi_original,*vi_422,*vi_444,*vi_RGB64,*vi_PlaneY_HLG; 134 | 135 | dataLookUp dl; 136 | 137 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 138 | MT_Data_Info_HDRTools MT_Data[3][MAX_MT_THREADS]; 139 | uint8_t threads,threads_number[3],max_threads; 140 | uint32_t UserId; 141 | 142 | ThreadPoolFunction StaticThreadpoolF; 143 | 144 | static void StaticThreadpool(void *ptr); 145 | 146 | void FreeData(void); 147 | }; 148 | 149 | 150 | class ConvertRGBtoXYZ : public GenericVideoFilter 151 | { 152 | public: 153 | ConvertRGBtoXYZ(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw, 154 | double _Crosstalk,uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fastmode,float _Rx,float _Ry, 155 | float _Gx,float _Gy,float _Bx,float _By,float _Wx,float _Wy, 156 | uint8_t _threads, bool _sleep, bool negativePrefetch, IScriptEnvironment* env); 157 | virtual ~ConvertRGBtoXYZ(); 158 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 159 | 160 | int __stdcall SetCacheHints(int cachehints, int frame_range); 161 | 162 | private: 163 | uint8_t Color,OutputMode,HDRMode,HLGColor; 164 | bool OOTF,EOTF,fastmode; 165 | float Rx,Ry,Gx,Gy,Bx,By,Wx,Wy; 166 | bool sleep,HLG_Mode; 167 | double HLG_Lb,HLG_Lw,Crosstalk; 168 | int16_t *lookupXYZ_8,*lookupCrosstalk_8;; 169 | int32_t *lookupXYZ_16,*lookupCrosstalk_16,*lookupHLG_RGB_16; 170 | uint8_t *lookupL_8; 171 | uint16_t *lookupL_16,*lookupL_8to16; 172 | float *lookupL_32,*lookupL_8to32,*lookupL_20; 173 | float Coeff_XYZ[9],*Coeff_XYZ_asm,Coeff_Crosstalk[9],*Coeff_Crosstalk_asm; 174 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF; 175 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 176 | 177 | VideoInfo *vi_PlaneY_HLG; 178 | 179 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 180 | uint8_t pixelsize; // AVS16 181 | uint8_t bits_per_pixel; 182 | 183 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 184 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 185 | uint8_t threads,threads_number; 186 | uint32_t UserId; 187 | 188 | ThreadPoolFunction StaticThreadpoolF; 189 | 190 | static void StaticThreadpool(void *ptr); 191 | 192 | void FreeData(void); 193 | }; 194 | 195 | 196 | class ConvertLinearRGBtoYUV : public GenericVideoFilter 197 | { 198 | public: 199 | ConvertLinearRGBtoYUV(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw, 200 | uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fullrange,bool _mpeg2c,bool _fastmode,uint8_t _threads, bool _sleep, 201 | bool negativePrefetch,IScriptEnvironment* env); 202 | virtual ~ConvertLinearRGBtoYUV(); 203 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 204 | 205 | int __stdcall SetCacheHints(int cachehints, int frame_range); 206 | 207 | private: 208 | uint8_t Color,OutputMode,HDRMode,HLGColor; 209 | bool OOTF,mpeg2c,fullrange,fastmode,EOTF; 210 | bool sleep,HLG_Mode; 211 | double HLG_Lb,HLG_Lw; 212 | int16_t *lookupRGB_8; 213 | int32_t *lookupRGB_16,*lookupHLG_RGB_16; 214 | uint8_t *lookupL_8; 215 | uint16_t *lookupL_16,*lookupL_20; 216 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF; 217 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 218 | 219 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 220 | uint8_t pixelsize; // AVS16 221 | uint8_t bits_per_pixel; 222 | 223 | VideoInfo *vi_original,*vi_420,*vi_422,*vi_444,*vi_RGB32,*vi_RGB64,*vi_PlaneY_HLG; 224 | 225 | dataLookUp dl; 226 | 227 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 228 | MT_Data_Info_HDRTools MT_Data[3][MAX_MT_THREADS]; 229 | uint8_t threads,threads_number[3],max_threads; 230 | uint32_t UserId; 231 | 232 | ThreadPoolFunction StaticThreadpoolF; 233 | 234 | static void StaticThreadpool(void *ptr); 235 | 236 | void FreeData(void); 237 | }; 238 | 239 | 240 | class ConvertXYZtoYUV : public GenericVideoFilter 241 | { 242 | public: 243 | ConvertXYZtoYUV(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw, 244 | double _Crosstalk,uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fullrange,bool _mpeg2c,bool _fastmode, 245 | float _Rx,float _Ry,float _Gx,float _Gy,float _Bx,float _By,float _Wx,float _Wy,float _pRx,float _pRy, 246 | float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy, 247 | uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 248 | virtual ~ConvertXYZtoYUV(); 249 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 250 | 251 | int __stdcall SetCacheHints(int cachehints, int frame_range); 252 | 253 | private: 254 | uint8_t Color,OutputMode,HDRMode,HLGColor; 255 | bool OOTF,mpeg2c,fullrange,fastmode,EOTF; 256 | float Rx,Ry,Gx,Gy,Bx,By,Wx,Wy; 257 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy; 258 | bool sleep,HLG_Mode; 259 | double HLG_Lb,HLG_Lw,Crosstalk; 260 | int16_t *lookupRGB_8,*lookupXYZ_8,*lookupCrosstalk_8; 261 | int32_t *lookupRGB_16,*lookupXYZ_16,*lookupCrosstalk_16,*lookupHLG_RGB_16; 262 | uint8_t *lookupL_8; 263 | uint16_t *lookupL_16,*lookupL_20; 264 | float Coeff_XYZ[9],*Coeff_XYZ_asm,Coeff_Crosstalk[9],*Coeff_Crosstalk_asm; 265 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF; 266 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 267 | 268 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 269 | uint8_t pixelsize; // AVS16 270 | uint8_t bits_per_pixel; 271 | 272 | VideoInfo *vi_original,*vi_420,*vi_422,*vi_444,*vi_RGB32,*vi_RGB64,*vi_PlaneY_HLG; 273 | 274 | dataLookUp dl; 275 | 276 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 277 | MT_Data_Info_HDRTools MT_Data[3][MAX_MT_THREADS]; 278 | uint8_t threads,threads_number[3],max_threads; 279 | uint32_t UserId; 280 | 281 | ThreadPoolFunction StaticThreadpoolF; 282 | 283 | static void StaticThreadpool(void *ptr); 284 | 285 | void FreeData(void); 286 | }; 287 | 288 | 289 | class ConvertXYZtoRGB : public GenericVideoFilter 290 | { 291 | public: 292 | ConvertXYZtoRGB(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw, 293 | double _Crosstalk,uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fastmode,float _Rx,float _Ry, 294 | float _Gx,float _Gy,float _Bx,float _By,float _Wx,float _Wy,float _pRx,float _pRy,float _pGx, 295 | float _pGy,float _pBx,float _pBy,float _pWx,float _pWy, 296 | uint8_t _threads, bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 297 | virtual ~ConvertXYZtoRGB(); 298 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 299 | 300 | int __stdcall SetCacheHints(int cachehints, int frame_range); 301 | 302 | private: 303 | uint8_t Color,OutputMode,HDRMode,HLGColor; 304 | bool OOTF,fastmode,EOTF; 305 | float Rx,Ry,Gx,Gy,Bx,By,Wx,Wy; 306 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy; 307 | bool sleep,HLG_Mode; 308 | double HLG_Lb,HLG_Lw,Crosstalk; 309 | int16_t *lookupXYZ_8,*lookupCrosstalk_8; 310 | int32_t *lookupXYZ_16,*lookupCrosstalk_16,*lookupHLG_RGB_16; 311 | uint8_t *lookupL_8; 312 | uint16_t *lookupL_16,*lookupL_20; 313 | float Coeff_XYZ[9],*Coeff_XYZ_asm,Coeff_Crosstalk[9],*Coeff_Crosstalk_asm,*lookupL_32; 314 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF; 315 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 316 | 317 | VideoInfo *vi_PlaneY_HLG; 318 | 319 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 320 | uint8_t pixelsize; // AVS16 321 | uint8_t bits_per_pixel; 322 | 323 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 324 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 325 | uint8_t threads,threads_number; 326 | uint32_t UserId; 327 | 328 | ThreadPoolFunction StaticThreadpoolF; 329 | 330 | static void StaticThreadpool(void *ptr); 331 | 332 | void FreeData(void); 333 | }; 334 | 335 | 336 | class ConvertXYZ_Scale_HDRtoSDR : public GenericVideoFilter 337 | { 338 | public: 339 | ConvertXYZ_Scale_HDRtoSDR(PClip _child,float _Coeff_X,float _Coeff_Y,float _Coeff_Z,uint8_t _threads, 340 | bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 341 | virtual ~ConvertXYZ_Scale_HDRtoSDR(); 342 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 343 | 344 | int __stdcall SetCacheHints(int cachehints, int frame_range); 345 | 346 | private: 347 | bool sleep; 348 | float Coeff_X,Coeff_Y,Coeff_Z; 349 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16; 350 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 351 | 352 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 353 | uint8_t pixelsize; // AVS16 354 | uint8_t bits_per_pixel; 355 | 356 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 357 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 358 | uint8_t threads,threads_number; 359 | uint32_t UserId; 360 | 361 | ThreadPoolFunction StaticThreadpoolF; 362 | 363 | static void StaticThreadpool(void *ptr); 364 | 365 | void FreeData(void); 366 | }; 367 | 368 | 369 | class ConvertXYZ_Scale_SDRtoHDR : public GenericVideoFilter 370 | { 371 | public: 372 | ConvertXYZ_Scale_SDRtoHDR(PClip _child,float _Coeff_X,float _Coeff_Y,float _Coeff_Z, 373 | uint8_t _threads, bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 374 | virtual ~ConvertXYZ_Scale_SDRtoHDR(); 375 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 376 | 377 | int __stdcall SetCacheHints(int cachehints, int frame_range); 378 | 379 | private: 380 | float MinMastering,MaxMastering; 381 | bool sleep; 382 | float Coeff_X,Coeff_Y,Coeff_Z; 383 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16; 384 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 385 | 386 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 387 | uint8_t pixelsize; // AVS16 388 | uint8_t bits_per_pixel; 389 | 390 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 391 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 392 | uint8_t threads,threads_number; 393 | uint32_t UserId; 394 | 395 | ThreadPoolFunction StaticThreadpoolF; 396 | 397 | static void StaticThreadpool(void *ptr); 398 | 399 | void FreeData(void); 400 | }; 401 | 402 | 403 | class ConvertXYZ_Hable_HDRtoSDR : public GenericVideoFilter 404 | { 405 | public: 406 | ConvertXYZ_Hable_HDRtoSDR(PClip _child,double _exp_X,double _w_X,double _a_X,double _b_X,double _c_X, 407 | double _d_X,double _e_X,double _f_X,double _exp_Y,double _w_Y,double _a_Y,double _b_Y,double _c_Y, 408 | double _d_Y,double _e_Y,double _f_Y,double _exp_Z,double _w_Z,double _a_Z,double _b_Z,double _c_Z, 409 | double _d_Z,double _e_Z,double _f_Z, 410 | float _pRx,float _pRy,float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy, 411 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 412 | virtual ~ConvertXYZ_Hable_HDRtoSDR(); 413 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 414 | 415 | int __stdcall SetCacheHints(int cachehints, int frame_range); 416 | 417 | private: 418 | bool sleep,fastmode; 419 | double exp_X,w_X,a_X,b_X,c_X,d_X,e_X,f_X; 420 | double exp_Y,w_Y,a_Y,b_Y,c_Y,d_Y,e_Y,f_Y; 421 | double exp_Z,w_Z,a_Z,b_Z,c_Z,d_Z,e_Z,f_Z; 422 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy; 423 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16; 424 | float *lookupX_32,*lookupY_32,*lookupZ_32; 425 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 426 | 427 | double Xmin,Ymin,Zmin,CoeffX,CoeffY,CoeffZ; 428 | 429 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 430 | uint8_t pixelsize; // AVS16 431 | uint8_t bits_per_pixel; 432 | 433 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 434 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 435 | uint8_t threads,threads_number; 436 | uint32_t UserId; 437 | 438 | ThreadPoolFunction StaticThreadpoolF; 439 | 440 | static void StaticThreadpool(void *ptr); 441 | 442 | void FreeData(void); 443 | }; 444 | 445 | 446 | class ConvertRGB_Hable_HDRtoSDR : public GenericVideoFilter 447 | { 448 | public: 449 | ConvertRGB_Hable_HDRtoSDR(PClip _child,double _exp_R,double _w_R,double _a_R,double _b_R,double _c_R, 450 | double _d_R,double _e_R,double _f_R,double _exp_G,double _w_G,double _a_G,double _b_G,double _c_G, 451 | double _d_G,double _e_G,double _f_G,double _exp_B,double _w_B,double _a_B,double _b_B,double _c_B, 452 | double _d_B,double _e_B,double _f_B, 453 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 454 | virtual ~ConvertRGB_Hable_HDRtoSDR(); 455 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 456 | 457 | int __stdcall SetCacheHints(int cachehints, int frame_range); 458 | 459 | private: 460 | bool sleep,fastmode; 461 | double exp_R,w_R,a_R,b_R,c_R,d_R,e_R,f_R; 462 | double exp_G,w_G,a_G,b_G,c_G,d_G,e_G,f_G; 463 | double exp_B,w_B,a_B,b_B,c_B,d_B,e_B,f_B; 464 | uint16_t *lookupR_16,*lookupG_16,*lookupB_16; 465 | float *lookupR_32,*lookupG_32,*lookupB_32; 466 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 467 | 468 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 469 | uint8_t pixelsize; // AVS16 470 | uint8_t bits_per_pixel; 471 | 472 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 473 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 474 | uint8_t threads,threads_number; 475 | uint32_t UserId; 476 | 477 | ThreadPoolFunction StaticThreadpoolF; 478 | 479 | static void StaticThreadpool(void *ptr); 480 | 481 | void FreeData(void); 482 | }; 483 | 484 | 485 | class ConvertXYZ_Mobius_HDRtoSDR : public GenericVideoFilter 486 | { 487 | public: 488 | ConvertXYZ_Mobius_HDRtoSDR(PClip _child,double _exp_X,double _trans_X,double _peak_X, 489 | double _exp_Y,double _trans_Y,double _peak_Y,double _exp_Z,double _trans_Z,double _peak_Z, 490 | float _pRx,float _pRy,float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy, 491 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 492 | virtual ~ConvertXYZ_Mobius_HDRtoSDR(); 493 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 494 | 495 | int __stdcall SetCacheHints(int cachehints, int frame_range); 496 | 497 | private: 498 | bool sleep,fastmode; 499 | double exp_X,trans_X,peak_X; 500 | double exp_Y,trans_Y,peak_Y; 501 | double exp_Z,trans_Z,peak_Z; 502 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy; 503 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16; 504 | float *lookupX_32,*lookupY_32,*lookupZ_32; 505 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 506 | 507 | double Xmin,Ymin,Zmin,CoeffX,CoeffY,CoeffZ; 508 | 509 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 510 | uint8_t pixelsize; // AVS16 511 | uint8_t bits_per_pixel; 512 | 513 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 514 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 515 | uint8_t threads,threads_number; 516 | uint32_t UserId; 517 | 518 | ThreadPoolFunction StaticThreadpoolF; 519 | 520 | static void StaticThreadpool(void *ptr); 521 | 522 | void FreeData(void); 523 | }; 524 | 525 | 526 | class ConvertRGB_Mobius_HDRtoSDR : public GenericVideoFilter 527 | { 528 | public: 529 | ConvertRGB_Mobius_HDRtoSDR(PClip _child,double _exp_R,double _trans_R,double _peak_R, 530 | double _exp_G,double _trans_G,double _peak_G,double _exp_B,double _trans_B,double _peak_B, 531 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 532 | virtual ~ConvertRGB_Mobius_HDRtoSDR(); 533 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 534 | 535 | int __stdcall SetCacheHints(int cachehints, int frame_range); 536 | 537 | private: 538 | bool sleep,fastmode; 539 | double exp_R,trans_R,peak_R; 540 | double exp_G,trans_G,peak_G; 541 | double exp_B,trans_B,peak_B; 542 | uint16_t *lookupR_16,*lookupG_16,*lookupB_16; 543 | float *lookupR_32,*lookupG_32,*lookupB_32; 544 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 545 | 546 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 547 | uint8_t pixelsize; // AVS16 548 | uint8_t bits_per_pixel; 549 | 550 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 551 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 552 | uint8_t threads,threads_number; 553 | uint32_t UserId; 554 | 555 | ThreadPoolFunction StaticThreadpoolF; 556 | 557 | static void StaticThreadpool(void *ptr); 558 | 559 | void FreeData(void); 560 | }; 561 | 562 | 563 | class ConvertXYZ_Reinhard_HDRtoSDR : public GenericVideoFilter 564 | { 565 | public: 566 | ConvertXYZ_Reinhard_HDRtoSDR(PClip _child,double _exp_X,double _contr_X,double _peak_X, 567 | double _exp_Y,double _contr_Y,double _peak_Y,double _exp_Z,double _contr_Z,double _peak_Z, 568 | float _pRx,float _pRy,float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy, 569 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 570 | virtual ~ConvertXYZ_Reinhard_HDRtoSDR(); 571 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 572 | 573 | int __stdcall SetCacheHints(int cachehints, int frame_range); 574 | 575 | private: 576 | bool sleep,fastmode; 577 | double exp_X,contr_X,peak_X; 578 | double exp_Y,contr_Y,peak_Y; 579 | double exp_Z,contr_Z,peak_Z; 580 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy; 581 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16; 582 | float *lookupX_32,*lookupY_32,*lookupZ_32; 583 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 584 | 585 | double Xmin,Ymin,Zmin,CoeffX,CoeffY,CoeffZ; 586 | 587 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 588 | uint8_t pixelsize; // AVS16 589 | uint8_t bits_per_pixel; 590 | 591 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 592 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 593 | uint8_t threads,threads_number; 594 | uint32_t UserId; 595 | 596 | ThreadPoolFunction StaticThreadpoolF; 597 | 598 | static void StaticThreadpool(void *ptr); 599 | 600 | void FreeData(void); 601 | }; 602 | 603 | 604 | class ConvertRGB_Reinhard_HDRtoSDR : public GenericVideoFilter 605 | { 606 | public: 607 | ConvertRGB_Reinhard_HDRtoSDR(PClip _child,double _exp_R,double _contr_R,double _peak_R, 608 | double _exp_G,double _contr_G,double _peak_G,double _exp_B,double _contr_B,double _peak_B, 609 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 610 | virtual ~ConvertRGB_Reinhard_HDRtoSDR(); 611 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 612 | 613 | int __stdcall SetCacheHints(int cachehints, int frame_range); 614 | 615 | private: 616 | bool sleep,fastmode; 617 | double exp_R,contr_R,peak_R; 618 | double exp_G,contr_G,peak_G; 619 | double exp_B,contr_B,peak_B; 620 | uint16_t *lookupR_16,*lookupG_16,*lookupB_16; 621 | float *lookupR_32,*lookupG_32,*lookupB_32; 622 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 623 | 624 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 625 | uint8_t pixelsize; // AVS16 626 | uint8_t bits_per_pixel; 627 | 628 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 629 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 630 | uint8_t threads,threads_number; 631 | uint32_t UserId; 632 | 633 | ThreadPoolFunction StaticThreadpoolF; 634 | 635 | static void StaticThreadpool(void *ptr); 636 | 637 | void FreeData(void); 638 | }; 639 | 640 | 641 | class ConvertLinearRGBtoYUV_BT2446_A_HDRtoSDR : public GenericVideoFilter 642 | { 643 | public: 644 | ConvertLinearRGBtoYUV_BT2446_A_HDRtoSDR(PClip _child,double _Lhdr,double _Lsdr,double _CoeffAdj, 645 | bool _fastmode,uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 646 | virtual ~ConvertLinearRGBtoYUV_BT2446_A_HDRtoSDR(); 647 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 648 | 649 | int __stdcall SetCacheHints(int cachehints, int frame_range); 650 | 651 | private: 652 | bool sleep,fastmode; 653 | double Lhdr,Lsdr,CoeffAdj; 654 | uint16_t *lookupEOTF_16,*lookupR_16,*lookupG_16,*lookupB_16; 655 | float *lookupY1_16,*lookupY2_16,*lookupBY_16,*lookupRY_16; 656 | uint32_t *lookupEOTF_32,*lookupR_32,*lookupG_32,*lookupB_32; 657 | float *lookupY1_32,*lookupY2_32,*lookupBY_32,*lookupRY_32; 658 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 659 | 660 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 661 | uint8_t pixelsize; // AVS16 662 | uint8_t bits_per_pixel; 663 | 664 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 665 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 666 | uint8_t threads,threads_number; 667 | uint32_t UserId; 668 | 669 | ThreadPoolFunction StaticThreadpoolF; 670 | 671 | static void StaticThreadpool(void *ptr); 672 | 673 | void FreeData(void); 674 | }; 675 | 676 | 677 | class ConverXYZ_BT2446_C_HDRtoSDR : public GenericVideoFilter 678 | { 679 | public: 680 | ConverXYZ_BT2446_C_HDRtoSDR(PClip _child,bool _ChromaC,bool _PQMode,float _Lhdr,float _Lsdr, 681 | float _pct_ref,float _pct_ip,float _pct_wp,float _pct_sdr_skin,float _pct_hdr_skin,float _WhiteShift, 682 | float _pRx,float _pRy,float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy, 683 | bool _fastmode,uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 684 | virtual ~ConverXYZ_BT2446_C_HDRtoSDR(); 685 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 686 | 687 | int __stdcall SetCacheHints(int cachehints, int frame_range); 688 | 689 | private: 690 | bool sleep,fastmode; 691 | uint16_t *lookupY_16; 692 | float *lookupX_16,*lookupiY_16,*lookupZ_16; 693 | float *lookup2X_16,*lookup2Y_16,*lookup2Z_16; 694 | float *lookupY_32,*lookupiY_32; 695 | float *lookup2X_32,*lookup2Y_32,*lookup2Z_32; 696 | double pct_ref,pct_ip,pct_wp; 697 | double pct_sdr_skin,pct_hdr_skin; 698 | double Yhdr_ip,Ysdr_ip,Ysdr_wp,Yhdr_ref; 699 | double coeff_k[4],Lhdr,Lsdr; 700 | bool ChromaC,PQMode; 701 | float WhiteShift; 702 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 703 | 704 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy; 705 | double Xmin,Ymin,Zmin,CoeffX,CoeffY,CoeffZ; 706 | double Xn,Yn,Zn; 707 | 708 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 709 | uint8_t pixelsize; // AVS16 710 | uint8_t bits_per_pixel; 711 | 712 | VideoInfo *vi_RGBPS; 713 | 714 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 715 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 716 | uint8_t threads,threads_number; 717 | uint32_t UserId; 718 | 719 | double fdico(double a,double b,double k1,double x); 720 | bool dicotomie(double k1,double &k3); 721 | 722 | ThreadPoolFunction StaticThreadpoolF; 723 | 724 | static void StaticThreadpool(void *ptr); 725 | 726 | void FreeData(void); 727 | }; 728 | 729 | 730 | class ConvertXYZ_ACES_HDRtoSDR : public GenericVideoFilter 731 | { 732 | public: 733 | ConvertXYZ_ACES_HDRtoSDR(PClip _child,double _a_X,double _b_X,double _c_X,double _d_X,double _e_X, 734 | double _a_Y,double _b_Y,double _c_Y,double _d_Y,double _e_Y,double _a_Z,double _b_Z,double _c_Z, 735 | double _d_Z,double _e_Z,double _exp_X,double _exp_Y,double _exp_Z, 736 | float _pRx,float _pRy,float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy, 737 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 738 | virtual ~ConvertXYZ_ACES_HDRtoSDR(); 739 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 740 | 741 | int __stdcall SetCacheHints(int cachehints, int frame_range); 742 | 743 | private: 744 | bool sleep,fastmode; 745 | double a_X,b_X,c_X,d_X,e_X; 746 | double a_Y,b_Y,c_Y,d_Y,e_Y; 747 | double a_Z,b_Z,c_Z,d_Z,e_Z; 748 | double exp_X,exp_Y,exp_Z; 749 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy; 750 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16; 751 | float *lookupX_32,*lookupY_32,*lookupZ_32; 752 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 753 | 754 | double Xmin,Ymin,Zmin,CoeffX,CoeffY,CoeffZ; 755 | 756 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 757 | uint8_t pixelsize; // AVS16 758 | uint8_t bits_per_pixel; 759 | 760 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 761 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 762 | uint8_t threads,threads_number; 763 | uint32_t UserId; 764 | 765 | ThreadPoolFunction StaticThreadpoolF; 766 | 767 | static void StaticThreadpool(void *ptr); 768 | 769 | void FreeData(void); 770 | }; 771 | 772 | 773 | class ConvertRGB_ACES_HDRtoSDR : public GenericVideoFilter 774 | { 775 | public: 776 | ConvertRGB_ACES_HDRtoSDR(PClip _child,double _a_R,double _b_R,double _c_R, 777 | double _d_R,double _e_R,double _a_G,double _b_G,double _c_G,double _d_G, 778 | double _e_G,double _a_B,double _b_B,double _c_B,double _d_B,double _e_B, 779 | double _exp_R,double _exp_G,double _exp_B, 780 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env); 781 | virtual ~ConvertRGB_ACES_HDRtoSDR(); 782 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 783 | 784 | int __stdcall SetCacheHints(int cachehints, int frame_range); 785 | 786 | private: 787 | bool sleep,fastmode; 788 | double a_R,b_R,c_R,d_R,e_R; 789 | double a_G,b_G,c_G,d_G,e_G; 790 | double a_B,b_B,c_B,d_B,e_B; 791 | double exp_R,exp_G,exp_B; 792 | uint16_t *lookupR_16,*lookupG_16,*lookupB_16; 793 | float *lookupR_32,*lookupG_32,*lookupB_32; 794 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable; 795 | 796 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 797 | uint8_t pixelsize; // AVS16 798 | uint8_t bits_per_pixel; 799 | 800 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 801 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS]; 802 | uint8_t threads,threads_number; 803 | uint32_t UserId; 804 | 805 | ThreadPoolFunction StaticThreadpoolF; 806 | 807 | static void StaticThreadpool(void *ptr); 808 | 809 | void FreeData(void); 810 | }; 811 | -------------------------------------------------------------------------------- /HDRTools/HDRTools_AVX2_asm_x64.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; HDRTools() 3 | ; 4 | ; Several functions for working on HDR data, and linear to non-linear convertions. 5 | ; Copyright (C) 2018 JPSDR 6 | ; 7 | ; HDRTools is free software; you can redistribute it and/or modify 8 | ; it under the terms of the GNU General Public License as published by 9 | ; the Free Software Foundation; either version 2, or (at your option) 10 | ; any later version. 11 | ; 12 | ; HDRTools is distributed in the hope that it will be useful, 13 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | ; GNU General Public License for more details. 16 | ; 17 | ; You should have received a copy of the GNU General Public License 18 | ; along with GNU Make; see the file COPYING. If not, write to 19 | ; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | ; 21 | ; 22 | 23 | .data 24 | 25 | align 16 26 | 27 | data segment align(32) 28 | 29 | data_f_0 real4 8 dup(0.0) 30 | data_f_1 real4 8 dup(1.0) 31 | 32 | data_f_1048575 real4 8 dup(1048575.0) 33 | data_f_65535 real4 8 dup(65535.0) 34 | data_dw_1048575 dword 8 dup(1048575) 35 | data_dw_65535 dword 8 dup(65535) 36 | data_dw_0 dword 8 dup(0) 37 | 38 | data_w_128 word 16 dup(128) 39 | data_w_32 word 16 dup(32) 40 | data_w_8 word 16 dup(8) 41 | 42 | .code 43 | 44 | ;*************************************************** 45 | ;** YUV to RGB functions ** 46 | ;*************************************************** 47 | 48 | ;JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword 49 | ; src1 = rcx 50 | ; src2 = rdx 51 | ; dst = r8 52 | ; w = r9d 53 | 54 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc public frame 55 | 56 | .endprolog 57 | 58 | vpcmpeqb ymm3,ymm3,ymm3 59 | 60 | mov r10,rcx ; r10=src1 61 | xor rax,rax 62 | mov ecx,r9d 63 | mov r11,32 64 | 65 | Convert_Planar420_to_Planar422_8_AVX2_1: 66 | vmovdqa ymm0,YMMWORD ptr[r10+rax] 67 | vmovdqa ymm1,YMMWORD ptr[rdx+rax] 68 | vpxor ymm2,ymm0,ymm3 69 | vpxor ymm1,ymm1,ymm3 70 | vpavgb ymm2,ymm2,ymm1 71 | vpxor ymm2,ymm2,ymm3 72 | vpavgb ymm2,ymm2,ymm0 73 | 74 | vmovdqa YMMWORD ptr[r8+rax],ymm2 75 | add rax,r11 76 | loop Convert_Planar420_to_Planar422_8_AVX2_1 77 | 78 | vzeroupper 79 | 80 | ret 81 | 82 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 endp 83 | 84 | 85 | ;JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword 86 | ; src1 = rcx 87 | ; src2 = rdx 88 | ; dst = r8 89 | ; w = r9d 90 | 91 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc public frame 92 | 93 | .endprolog 94 | 95 | vpcmpeqb ymm3,ymm3,ymm3 96 | 97 | mov r10,rcx ; r10=src1 98 | xor rax,rax 99 | mov ecx,r9d 100 | mov r11,32 101 | 102 | Convert_Planar420_to_Planar422_16_AVX2_1: 103 | vmovdqa ymm0,YMMWORD ptr[r10+rax] 104 | vmovdqa ymm1,YMMWORD ptr[rdx+rax] 105 | vpxor ymm2,ymm0,ymm3 106 | vpxor ymm1,ymm1,ymm3 107 | vpavgw ymm2,ymm2,ymm1 108 | vpxor ymm2,ymm2,ymm3 109 | vpavgw ymm2,ymm2,ymm0 110 | 111 | vmovdqa YMMWORD ptr[r8+rax],ymm2 112 | add rax,r11 113 | loop Convert_Planar420_to_Planar422_16_AVX2_1 114 | 115 | vzeroupper 116 | 117 | ret 118 | 119 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 endp 120 | 121 | 122 | ;*************************************************** 123 | ;** RGB to YUV functions ** 124 | ;*************************************************** 125 | 126 | 127 | ;JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2 proc src_R:dword,src_G:dword,src_B:dword,dst:dword,w:dword,h:dword,lookup:dword, 128 | ; src_modulo_R:dword,src_modulo_G:dword,src_modulo_B:dword,dst_modulo:dword 129 | ; src_R = rcx 130 | ; src_G = rdx 131 | ; src_B = r8 132 | ; dst = r9 133 | 134 | JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2 proc public frame 135 | 136 | w equ dword ptr[rbp+48] 137 | h equ dword ptr[rbp+56] 138 | lookup equ qword ptr[rbp+64] 139 | src_modulo_R equ qword ptr[rbp+72] 140 | src_modulo_G equ qword ptr[rbp+80] 141 | src_modulo_B equ qword ptr[rbp+88] 142 | dst_modulo equ qword ptr[rbp+96] 143 | 144 | push rbp 145 | .pushreg rbp 146 | mov rbp,rsp 147 | push rdi 148 | .pushreg rdi 149 | push rsi 150 | .pushreg rsi 151 | push rbx 152 | .pushreg rbx 153 | push r12 154 | .pushreg r12 155 | push r13 156 | .pushreg r13 157 | push r14 158 | .pushreg r14 159 | push r15 160 | .pushreg r15 161 | .endprolog 162 | 163 | vmovaps ymm3,YMMWORD ptr data_f_1048575 164 | vmovaps ymm4,YMMWORD ptr data_f_0 165 | vmovaps ymm5,YMMWORD ptr data_f_1 166 | 167 | cld 168 | mov rdi,r9 169 | mov r9,rcx 170 | mov r10,rdx ; src_B=r8,src_G=r10,src_R=r9 171 | mov rbx,lookup 172 | mov r11d,w 173 | mov r12,src_modulo_R 174 | mov r13,src_modulo_G 175 | mov r14,src_modulo_B 176 | mov r15,dst_modulo 177 | xor rax,rax 178 | 179 | Convert_LinearRGBPStoRGB64_AVX2_1: 180 | mov ecx,r11d 181 | Convert_LinearRGBPStoRGB64_AVX2_2: 182 | xor rdx,rdx 183 | vmaxps ymm0,ymm4,YMMWORD ptr[r8] 184 | vmaxps ymm1,ymm4,YMMWORD ptr[r10] 185 | vmaxps ymm2,ymm4,YMMWORD ptr[r9] 186 | vminps ymm0,ymm0,ymm5 187 | vminps ymm1,ymm1,ymm5 188 | vminps ymm2,ymm2,ymm5 189 | vmulps ymm0,ymm0,ymm3 190 | vmulps ymm1,ymm1,ymm3 191 | vmulps ymm2,ymm2,ymm3 192 | vcvtps2dq ymm0,ymm0 193 | vcvtps2dq ymm1,ymm1 194 | vcvtps2dq ymm2,ymm2 195 | 196 | vpextrd eax,xmm0,0 197 | mov ax,word ptr[rbx+2*rax] 198 | stosw 199 | vpextrd eax,xmm1,0 200 | mov ax,word ptr[rbx+2*rax] 201 | stosw 202 | vpextrd eax,xmm2,0 203 | mov ax,word ptr[rbx+2*rax] 204 | stosw 205 | xor eax,eax 206 | stosw 207 | dec ecx 208 | jz Convert_LinearRGBPStoRGB64_AVX2_3 209 | inc rdx 210 | 211 | vpextrd eax,xmm0,1 212 | mov ax,word ptr[rbx+2*rax] 213 | stosw 214 | vpextrd eax,xmm1,1 215 | mov ax,word ptr[rbx+2*rax] 216 | stosw 217 | vpextrd eax,xmm2,1 218 | mov ax,word ptr[rbx+2*rax] 219 | stosw 220 | xor eax,eax 221 | stosw 222 | dec ecx 223 | jz Convert_LinearRGBPStoRGB64_AVX2_3 224 | inc rdx 225 | 226 | vpextrd eax,xmm0,2 227 | mov ax,word ptr[rbx+2*rax] 228 | stosw 229 | vpextrd eax,xmm1,2 230 | mov ax,word ptr[rbx+2*rax] 231 | stosw 232 | vpextrd eax,xmm2,2 233 | mov ax,word ptr[rbx+2*rax] 234 | stosw 235 | xor eax,eax 236 | stosw 237 | dec ecx 238 | jz Convert_LinearRGBPStoRGB64_AVX2_3 239 | inc rdx 240 | 241 | vpextrd eax,xmm0,3 242 | mov ax,word ptr[rbx+2*rax] 243 | stosw 244 | vpextrd eax,xmm1,3 245 | mov ax,word ptr[rbx+2*rax] 246 | stosw 247 | vpextrd eax,xmm2,3 248 | mov ax,word ptr[rbx+2*rax] 249 | stosw 250 | xor eax,eax 251 | stosw 252 | dec ecx 253 | jz Convert_LinearRGBPStoRGB64_AVX2_3 254 | inc rdx 255 | 256 | vextracti128 xmm0,ymm0,1 257 | vextracti128 xmm1,ymm1,1 258 | vextracti128 xmm2,ymm2,1 259 | 260 | vpextrd eax,xmm0,0 261 | mov ax,word ptr[rbx+2*rax] 262 | stosw 263 | vpextrd eax,xmm1,0 264 | mov ax,word ptr[rbx+2*rax] 265 | stosw 266 | vpextrd eax,xmm2,0 267 | mov ax,word ptr[rbx+2*rax] 268 | stosw 269 | xor eax,eax 270 | stosw 271 | dec ecx 272 | jz Convert_LinearRGBPStoRGB64_AVX2_3 273 | inc rdx 274 | 275 | vpextrd eax,xmm0,1 276 | mov ax,word ptr[rbx+2*rax] 277 | stosw 278 | vpextrd eax,xmm1,1 279 | mov ax,word ptr[rbx+2*rax] 280 | stosw 281 | vpextrd eax,xmm2,1 282 | mov ax,word ptr[rbx+2*rax] 283 | stosw 284 | xor eax,eax 285 | stosw 286 | dec ecx 287 | jz short Convert_LinearRGBPStoRGB64_AVX2_3 288 | inc rdx 289 | 290 | vpextrd eax,xmm0,2 291 | mov ax,word ptr[rbx+2*rax] 292 | stosw 293 | vpextrd eax,xmm1,2 294 | mov ax,word ptr[rbx+2*rax] 295 | stosw 296 | vpextrd eax,xmm2,2 297 | mov ax,word ptr[rbx+2*rax] 298 | stosw 299 | xor eax,eax 300 | stosw 301 | dec ecx 302 | jz short Convert_LinearRGBPStoRGB64_AVX2_3 303 | inc rdx 304 | 305 | vpextrd eax,xmm0,3 306 | mov ax,word ptr[rbx+2*rax] 307 | stosw 308 | vpextrd eax,xmm1,3 309 | mov ax,word ptr[rbx+2*rax] 310 | stosw 311 | vpextrd eax,xmm2,3 312 | mov ax,word ptr[rbx+2*rax] 313 | stosw 314 | xor eax,eax 315 | stosw 316 | dec ecx 317 | 318 | Convert_LinearRGBPStoRGB64_AVX2_3: 319 | inc rdx 320 | shl rdx,2 321 | add r8,rdx 322 | add r10,rdx 323 | add r9,rdx 324 | or ecx,ecx 325 | jnz Convert_LinearRGBPStoRGB64_AVX2_2 326 | 327 | add rdi,r15 328 | add r8,r14 329 | add r10,r13 330 | add r9,r12 331 | dec h 332 | jnz Convert_LinearRGBPStoRGB64_AVX2_1 333 | 334 | vzeroupper 335 | 336 | pop r15 337 | pop r14 338 | pop r13 339 | pop r12 340 | pop rbx 341 | pop rsi 342 | pop rdi 343 | pop rbp 344 | 345 | ret 346 | 347 | JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2 endp 348 | 349 | 350 | ;JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2 proc src_R:dword,src_G:dword,src_B:dword,dst:dword,w:dword,h:dword, 351 | ; src_pitch_R:dword,src_pitch_G:dword,src_pitch_B:dword,dst_pitch:dword 352 | ; src_R = rcx 353 | ; src_G = rdx 354 | ; src_B = r8 355 | ; dst = r9 356 | 357 | JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2 proc public frame 358 | 359 | w equ dword ptr[rbp+48] 360 | h equ dword ptr[rbp+56] 361 | src_pitch_R equ qword ptr[rbp+64] 362 | src_pitch_G equ qword ptr[rbp+72] 363 | src_pitch_B equ qword ptr[rbp+80] 364 | dst_pitch equ qword ptr[rbp+88] 365 | 366 | push rbp 367 | .pushreg rbp 368 | mov rbp,rsp 369 | push rsi 370 | .pushreg rsi 371 | push rbx 372 | .pushreg rbx 373 | push r12 374 | .pushreg r12 375 | push r13 376 | .pushreg r13 377 | push r14 378 | .pushreg r14 379 | push r15 380 | .pushreg r15 381 | sub rsp,32 382 | .allocstack 32 383 | vmovdqa XMMWORD ptr[rsp],xmm6 384 | .savexmm128 xmm6,0 385 | vmovdqa XMMWORD ptr[rsp+16],xmm7 386 | .savexmm128 xmm7,16 387 | .endprolog 388 | 389 | vmovaps ymm3,YMMWORD ptr data_f_65535 390 | vpxor xmm4,xmm4,xmm4 391 | 392 | mov rsi,rcx ; src_B=r8,src_G=rdx,src_R=rsi 393 | 394 | mov r11d,w 395 | mov r12,src_pitch_R 396 | mov r13,src_pitch_G 397 | mov r14,src_pitch_B 398 | mov r15,dst_pitch 399 | mov rbx,8 400 | mov r10d,h 401 | 402 | Convert_RGBPStoRGB64_AVX2_1: 403 | mov ecx,r11d 404 | xor rax,rax 405 | shr ecx,3 406 | jz Convert_RGBPStoRGB64_AVX2_3 407 | Convert_RGBPStoRGB64_AVX2_2: 408 | vmulps ymm0,ymm3,YMMWORD ptr[r8+4*rax] 409 | vmulps ymm1,ymm3,YMMWORD ptr[rsi+4*rax] 410 | vmulps ymm2,ymm3,YMMWORD ptr[rdx+4*rax] 411 | vcvtps2dq ymm0,ymm0 412 | vcvtps2dq ymm1,ymm1 413 | vcvtps2dq ymm2,ymm2 414 | 415 | vextracti128 xmm5,ymm0,1 416 | vextracti128 xmm6,ymm1,1 417 | vextracti128 xmm7,ymm2,1 418 | 419 | packusdw xmm0,xmm0 ;0000B4B3B2B1 420 | packusdw xmm1,xmm1 ;0000R4R3R2R1 421 | packusdw xmm2,xmm2 ;0000G4G3G2G1 422 | 423 | vpunpcklwd xmm0,xmm0,xmm1 ;R4B4R3B3R2B2R1B1 424 | vpunpcklwd xmm2,xmm2,xmm4 ;0G40G30G20G1 425 | vpunpckhwd xmm1,xmm0,xmm2 ;0R4G4B40R3G3B3 426 | vpunpcklwd xmm0,xmm0,xmm2 ;0R2G2B20R1G1B1 427 | 428 | packusdw xmm5,xmm5 ;0000B8B7B6B5 429 | packusdw xmm6,xmm6 ;0000R8R7R6R5 430 | packusdw xmm7,xmm7 ;0000G8G7G6G5 431 | 432 | vpunpcklwd xmm5,xmm5,xmm6 ;R8B8R7B7R6B6R5B5 433 | vpunpcklwd xmm7,xmm7,xmm4 ;0G80G70G60G5 434 | vpunpckhwd xmm6,xmm5,xmm7 ;0R8G8B80R7G7B7 435 | vpunpcklwd xmm5,xmm5,xmm7 ;0R6G6B60R5G5B5 436 | 437 | vmovdqa XMMWORD ptr[r9+8*rax],xmm0 438 | vmovdqa XMMWORD ptr[r9+8*rax+16],xmm1 439 | vmovdqa XMMWORD ptr[r9+8*rax+32],xmm5 440 | vmovdqa XMMWORD ptr[r9+8*rax+48],xmm6 441 | add rax,8 442 | dec ecx 443 | jnz Convert_RGBPStoRGB64_AVX2_2 444 | 445 | Convert_RGBPStoRGB64_AVX2_3: 446 | mov ecx,r11d 447 | and ecx,7 448 | jz Convert_RGBPStoRGB64_AVX2_7 449 | 450 | vmulps ymm0,ymm3,YMMWORD ptr[r8+4*rax] 451 | vmulps ymm1,ymm3,YMMWORD ptr[rsi+4*rax] 452 | vmulps ymm2,ymm3,YMMWORD ptr[rdx+4*rax] 453 | vcvtps2dq ymm0,ymm0 454 | vcvtps2dq ymm1,ymm1 455 | vcvtps2dq ymm2,ymm2 456 | 457 | vextracti128 xmm5,ymm0,1 458 | vextracti128 xmm6,ymm1,1 459 | vextracti128 xmm7,ymm2,1 460 | 461 | packusdw xmm0,xmm0 ;0000B4B3B2B1 462 | packusdw xmm1,xmm1 ;0000R4R3R2R1 463 | packusdw xmm2,xmm2 ;0000G4G3G2G1 464 | 465 | vpunpcklwd xmm0,xmm0,xmm1 ;R4B4R3B3R2B2R1B1 466 | vpunpcklwd xmm2,xmm2,xmm4 ;0G40G30G20G1 467 | vpunpckhwd xmm1,xmm0,xmm2 ;0R4G4B40R3G3B3 468 | vpunpcklwd xmm0,xmm0,xmm2 ;0R2G2B20R1G1B1 469 | 470 | packusdw xmm5,xmm5 ;0000B8B7B6B5 471 | packusdw xmm6,xmm6 ;0000R8R7R6R5 472 | packusdw xmm7,xmm7 ;0000G8G7G6G5 473 | 474 | vpunpcklwd xmm5,xmm5,xmm6 ;R8B8R7B7R6B6R5B5 475 | vpunpcklwd xmm7,xmm7,xmm4 ;0G80G70G60G5 476 | vpunpckhwd xmm6,xmm5,xmm7 ;0R8G8B80R7G7B7 477 | vpunpcklwd xmm5,xmm5,xmm7 ;0R6G6B60R5G5B5 478 | 479 | test ecx,4 480 | jnz short Convert_RGBPStoRGB64_AVX2_5 481 | test ecx,2 482 | jnz short Convert_RGBPStoRGB64_AVX2_4 483 | vmovq qword ptr[r9+8*rax],xmm0 484 | jmp short Convert_RGBPStoRGB64_AVX2_7 485 | 486 | Convert_RGBPStoRGB64_AVX2_4: 487 | vmovdqa XMMWORD ptr[r9+8*rax],xmm0 488 | test ecx,1 489 | jz short Convert_RGBPStoRGB64_AVX2_7 490 | vmovq qword ptr[r9+8*rax+16],xmm1 491 | jmp short Convert_RGBPStoRGB64_AVX2_7 492 | 493 | Convert_RGBPStoRGB64_AVX2_5: 494 | vmovdqa XMMWORD ptr[r9+8*rax],xmm0 495 | vmovdqa XMMWORD ptr[r9+8*rax+16],xmm1 496 | test ecx,2 497 | jnz short Convert_RGBPStoRGB64_AVX2_6 498 | test ecx,1 499 | jz short Convert_RGBPStoRGB64_AVX2_7 500 | vmovq qword ptr[r9+8*rax+32],xmm5 501 | jmp short Convert_RGBPStoRGB64_AVX2_7 502 | 503 | Convert_RGBPStoRGB64_AVX2_6: 504 | vmovdqa XMMWORD ptr[r9+8*rax+32],xmm5 505 | test ecx,1 506 | jz short Convert_RGBPStoRGB64_AVX2_7 507 | vmovq qword ptr[r9+8*rax+48],xmm6 508 | 509 | Convert_RGBPStoRGB64_AVX2_7: 510 | add rsi,r12 511 | add rdx,r13 512 | add r8,r14 513 | add r9,r15 514 | dec r10d 515 | jnz Convert_RGBPStoRGB64_AVX2_1 516 | 517 | vmovdqa xmm7,XMMWORD ptr[rsp+16] 518 | vmovdqa xmm6,XMMWORD ptr[rsp] 519 | add rsp,32 520 | 521 | vzeroupper 522 | 523 | pop r15 524 | pop r14 525 | pop r13 526 | pop r12 527 | pop rbx 528 | pop rsi 529 | pop rbp 530 | 531 | ret 532 | 533 | JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2 endp 534 | 535 | 536 | ;JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword,h:dword,src_pitch2:dword,dst_pitch:dword 537 | ; src1 = rcx 538 | ; src2 = rdx 539 | ; dst = r8 540 | ; w32 = r9d 541 | 542 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc public frame 543 | 544 | h equ dword ptr[rbp+48] 545 | src_pitch2 equ qword ptr[rbp+56] 546 | dst_pitch equ qword ptr[rbp+64] 547 | 548 | push rbp 549 | .pushreg rbp 550 | mov rbp,rsp 551 | push rsi 552 | .pushreg rsi 553 | push rbx 554 | .pushreg rbx 555 | push r12 556 | .pushreg r12 557 | .endprolog 558 | 559 | mov rsi,rcx 560 | mov r10d,h 561 | mov rbx,32 562 | mov r11,src_pitch2 563 | mov r12,dst_pitch 564 | 565 | Convert_Planar422_to_Planar420_8_AVX2_1: 566 | xor rax,rax 567 | mov ecx,r9d 568 | 569 | Convert_Planar422_to_Planar420_8_AVX2_2: 570 | vmovdqa ymm0,YMMWORD ptr[rsi+rax] 571 | vpavgb ymm0,ymm0,YMMWORD ptr[rdx+rax] 572 | 573 | vmovdqa YMMWORD ptr[r8+rax],ymm0 574 | add rax,rbx 575 | loop Convert_Planar422_to_Planar420_8_AVX2_2 576 | 577 | add rsi,r11 578 | add rdx,r11 579 | add r8,r12 580 | dec r10d 581 | jnz short Convert_Planar422_to_Planar420_8_AVX2_1 582 | 583 | vzeroupper 584 | 585 | pop r12 586 | pop rbx 587 | pop rsi 588 | pop rbp 589 | 590 | ret 591 | 592 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 endp 593 | 594 | 595 | ;JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword,h:dword,src_pitch2:dword,dst_pitch:dword 596 | ; src1 = rcx 597 | ; src2 = rdx 598 | ; dst = r8 599 | ; w16 = r9d 600 | 601 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc public frame 602 | 603 | h equ dword ptr[rbp+48] 604 | src_pitch2 equ qword ptr[rbp+56] 605 | dst_pitch equ qword ptr[rbp+64] 606 | 607 | push rbp 608 | .pushreg rbp 609 | mov rbp,rsp 610 | push rsi 611 | .pushreg rsi 612 | push rbx 613 | .pushreg rbx 614 | push r12 615 | .pushreg r12 616 | .endprolog 617 | 618 | mov rsi,rcx 619 | mov r10d,h 620 | mov rbx,32 621 | mov r11,src_pitch2 622 | mov r12,dst_pitch 623 | 624 | Convert_Planar422_to_Planar420_16_AVX2_1: 625 | xor rax,rax 626 | mov ecx,r9d 627 | 628 | Convert_Planar422_to_Planar420_16_AVX2_2: 629 | vmovdqa ymm0,YMMWORD ptr[rsi+rax] 630 | vpavgw ymm0,ymm0,YMMWORD ptr[rdx+rax] 631 | 632 | vmovdqa YMMWORD ptr[r8+rax],ymm0 633 | add rax,rbx 634 | loop Convert_Planar422_to_Planar420_16_AVX2_2 635 | 636 | add rsi,r11 637 | add rdx,r11 638 | add r8,r12 639 | dec r10d 640 | jnz short Convert_Planar422_to_Planar420_16_AVX2_1 641 | 642 | vzeroupper 643 | 644 | pop r12 645 | pop rbx 646 | pop rsi 647 | pop rbp 648 | 649 | ret 650 | 651 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 endp 652 | 653 | 654 | ;*************************************************** 655 | ;** XYZ/RGB functions ** 656 | ;*************************************************** 657 | 658 | 659 | ;*************************************************** 660 | ;** HLG functions ** 661 | ;*************************************************** 662 | 663 | 664 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 665 | ; src_pitch:dword,dst_pitch:dword 666 | ; src = rcx 667 | ; dst = rdx 668 | ; w = r8d 669 | ; h = r9d 670 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc public frame 671 | 672 | src_pitch equ qword ptr[rbp+48] 673 | dst_pitch equ qword ptr[rbp+56] 674 | 675 | push rbp 676 | .pushreg rbp 677 | mov rbp,rsp 678 | push rdi 679 | .pushreg rdi 680 | push rsi 681 | .pushreg rsi 682 | push rbx 683 | .pushreg rbx 684 | .endprolog 685 | 686 | vmovdqa ymm4,YMMWORD ptr data_w_128 687 | 688 | mov rsi,rcx 689 | mov rdi,rdx 690 | mov ebx,r8d 691 | mov r10,src_pitch 692 | mov r11,dst_pitch 693 | shr ebx,2 694 | mov rdx,128 695 | 696 | Convert_RGB64_16toRGB64_8_AVX2_loop_1: 697 | mov ecx,ebx 698 | xor rax,rax 699 | 700 | shr ecx,2 701 | jz Convert_RGB64_16toRGB64_8_AVX2_3 702 | 703 | Convert_RGB64_16toRGB64_8_AVX2_loop_2: 704 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax] 705 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32] 706 | vpaddusw ymm2,ymm4,YMMWORD ptr[rsi+rax+64] 707 | vpaddusw ymm3,ymm4,YMMWORD ptr[rsi+rax+96] 708 | vpsrlw ymm0,ymm0,8 709 | vpsrlw ymm1,ymm1,8 710 | vpsrlw ymm2,ymm2,8 711 | vpsrlw ymm3,ymm3,8 712 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 713 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1 714 | vmovdqa YMMWORD ptr[rdi+rax+64],ymm2 715 | vmovdqa YMMWORD ptr[rdi+rax+96],ymm3 716 | add rax,rdx 717 | loop Convert_RGB64_16toRGB64_8_AVX2_loop_2 718 | 719 | Convert_RGB64_16toRGB64_8_AVX2_3: 720 | test ebx,2 721 | jz short Convert_RGB64_16toRGB64_8_AVX2_4 722 | 723 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax] 724 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32] 725 | vpsrlw ymm0,ymm0,8 726 | vpsrlw ymm1,ymm1,8 727 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 728 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1 729 | add rax,64 730 | 731 | Convert_RGB64_16toRGB64_8_AVX2_4: 732 | test ebx,1 733 | jz short Convert_RGB64_16toRGB64_8_AVX2_5 734 | 735 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax] 736 | vpsrlw ymm0,ymm0,8 737 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 738 | add rax,32 739 | 740 | Convert_RGB64_16toRGB64_8_AVX2_5: 741 | test r8d,2 742 | jz short Convert_RGB64_16toRGB64_8_AVX2_6 743 | 744 | vpaddusw xmm0,xmm4,XMMWORD ptr[rsi+rax] 745 | vpsrlw xmm0,xmm0,8 746 | vmovdqa XMMWORD ptr[rdi+rax],xmm0 747 | add rax,16 748 | 749 | Convert_RGB64_16toRGB64_8_AVX2_6: 750 | test r8d,1 751 | jz short Convert_RGB64_16toRGB64_8_AVX2_7 752 | 753 | vmovq xmm0,qword ptr[rsi+rax] 754 | vpaddusw xmm0,xmm0,xmm4 755 | vpsrlw xmm0,xmm0,8 756 | vmovq qword ptr[rdi+rax],xmm0 757 | 758 | Convert_RGB64_16toRGB64_8_AVX2_7: 759 | add rsi,r10 760 | add rdi,r11 761 | dec r9d 762 | jnz Convert_RGB64_16toRGB64_8_AVX2_loop_1 763 | 764 | vzeroupper 765 | 766 | pop rbx 767 | pop rsi 768 | pop rdi 769 | pop rbp 770 | 771 | ret 772 | 773 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 endp 774 | 775 | 776 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 777 | ; src_pitch:dword,dst_pitch:dword 778 | ; src = rcx 779 | ; dst = rdx 780 | ; w = r8d 781 | ; h = r9d 782 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc public frame 783 | 784 | src_pitch equ qword ptr[rbp+48] 785 | dst_pitch equ qword ptr[rbp+56] 786 | 787 | push rbp 788 | .pushreg rbp 789 | mov rbp,rsp 790 | push rdi 791 | .pushreg rdi 792 | push rsi 793 | .pushreg rsi 794 | push rbx 795 | .pushreg rbx 796 | .endprolog 797 | 798 | vmovdqa ymm4,YMMWORD ptr data_w_32 799 | 800 | mov rsi,rcx 801 | mov rdi,rdx 802 | mov ebx,r8d 803 | mov r10,src_pitch 804 | mov r11,dst_pitch 805 | shr ebx,2 806 | mov rdx,128 807 | 808 | Convert_RGB64_16toRGB64_10_AVX2_loop_1: 809 | mov ecx,ebx 810 | xor rax,rax 811 | 812 | shr ecx,2 813 | jz Convert_RGB64_16toRGB64_10_AVX2_3 814 | 815 | Convert_RGB64_16toRGB64_10_AVX2_loop_2: 816 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax] 817 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32] 818 | vpaddusw ymm2,ymm4,YMMWORD ptr[rsi+rax+64] 819 | vpaddusw ymm3,ymm4,YMMWORD ptr[rsi+rax+96] 820 | vpsrlw ymm0,ymm0,6 821 | vpsrlw ymm1,ymm1,6 822 | vpsrlw ymm2,ymm2,6 823 | vpsrlw ymm3,ymm3,6 824 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 825 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1 826 | vmovdqa YMMWORD ptr[rdi+rax+64],ymm2 827 | vmovdqa YMMWORD ptr[rdi+rax+96],ymm3 828 | add rax,rdx 829 | loop Convert_RGB64_16toRGB64_10_AVX2_loop_2 830 | 831 | Convert_RGB64_16toRGB64_10_AVX2_3: 832 | test ebx,2 833 | jz short Convert_RGB64_16toRGB64_10_AVX2_4 834 | 835 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax] 836 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32] 837 | vpsrlw ymm0,ymm0,6 838 | vpsrlw ymm1,ymm1,6 839 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 840 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1 841 | add rax,64 842 | 843 | Convert_RGB64_16toRGB64_10_AVX2_4: 844 | test ebx,1 845 | jz short Convert_RGB64_16toRGB64_10_AVX2_5 846 | 847 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax] 848 | vpsrlw ymm0,ymm0,6 849 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 850 | add rax,32 851 | 852 | Convert_RGB64_16toRGB64_10_AVX2_5: 853 | test r8d,2 854 | jz short Convert_RGB64_16toRGB64_10_AVX2_6 855 | 856 | vpaddusw xmm0,xmm4,XMMWORD ptr[rsi+rax] 857 | vpsrlw xmm0,xmm0,6 858 | vmovdqa XMMWORD ptr[rdi+rax],xmm0 859 | add rax,16 860 | 861 | Convert_RGB64_16toRGB64_10_AVX2_6: 862 | test r8d,1 863 | jz short Convert_RGB64_16toRGB64_10_AVX2_7 864 | 865 | vmovq xmm0,qword ptr[rsi+rax] 866 | vpaddusw xmm0,xmm0,xmm4 867 | vpsrlw xmm0,xmm0,6 868 | vmovq qword ptr[rdi+rax],xmm0 869 | 870 | Convert_RGB64_16toRGB64_10_AVX2_7: 871 | add rsi,r10 872 | add rdi,r11 873 | dec r9d 874 | jnz Convert_RGB64_16toRGB64_10_AVX2_loop_1 875 | 876 | vzeroupper 877 | 878 | pop rbx 879 | pop rsi 880 | pop rdi 881 | pop rbp 882 | 883 | ret 884 | 885 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 endp 886 | 887 | 888 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 889 | ; src_pitch:dword,dst_pitch:dword 890 | ; src = rcx 891 | ; dst = rdx 892 | ; w = r8d 893 | ; h = r9d 894 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc public frame 895 | 896 | src_pitch equ qword ptr[rbp+48] 897 | dst_pitch equ qword ptr[rbp+56] 898 | 899 | push rbp 900 | .pushreg rbp 901 | mov rbp,rsp 902 | push rdi 903 | .pushreg rdi 904 | push rsi 905 | .pushreg rsi 906 | push rbx 907 | .pushreg rbx 908 | .endprolog 909 | 910 | vmovdqa ymm4,YMMWORD ptr data_w_8 911 | 912 | mov rsi,rcx 913 | mov rdi,rdx 914 | mov ebx,r8d 915 | mov r10,src_pitch 916 | mov r11,dst_pitch 917 | shr ebx,2 918 | mov rdx,128 919 | 920 | Convert_RGB64_16toRGB64_12_AVX2_loop_1: 921 | mov ecx,ebx 922 | xor rax,rax 923 | 924 | shr ecx,2 925 | jz Convert_RGB64_16toRGB64_12_AVX2_3 926 | 927 | Convert_RGB64_16toRGB64_12_AVX2_loop_2: 928 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax] 929 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32] 930 | vpaddusw ymm2,ymm4,YMMWORD ptr[rsi+rax+64] 931 | vpaddusw ymm3,ymm4,YMMWORD ptr[rsi+rax+96] 932 | vpsrlw ymm0,ymm0,4 933 | vpsrlw ymm1,ymm1,4 934 | vpsrlw ymm2,ymm2,4 935 | vpsrlw ymm3,ymm3,4 936 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 937 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1 938 | vmovdqa YMMWORD ptr[rdi+rax+64],ymm2 939 | vmovdqa YMMWORD ptr[rdi+rax+96],ymm3 940 | add rax,rdx 941 | loop Convert_RGB64_16toRGB64_12_AVX2_loop_2 942 | 943 | Convert_RGB64_16toRGB64_12_AVX2_3: 944 | test ebx,2 945 | jz short Convert_RGB64_16toRGB64_12_AVX2_4 946 | 947 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax] 948 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32] 949 | vpsrlw ymm0,ymm0,4 950 | vpsrlw ymm1,ymm1,4 951 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 952 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1 953 | add rax,64 954 | 955 | Convert_RGB64_16toRGB64_12_AVX2_4: 956 | test ebx,1 957 | jz short Convert_RGB64_16toRGB64_12_AVX2_5 958 | 959 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax] 960 | vpsrlw ymm0,ymm0,4 961 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 962 | add rax,32 963 | 964 | Convert_RGB64_16toRGB64_12_AVX2_5: 965 | test r8d,2 966 | jz short Convert_RGB64_16toRGB64_12_AVX2_6 967 | 968 | vpaddusw xmm0,xmm4,XMMWORD ptr[rsi+rax] 969 | vpsrlw xmm0,xmm0,4 970 | vmovdqa XMMWORD ptr[rdi+rax],xmm0 971 | add rax,16 972 | 973 | Convert_RGB64_16toRGB64_12_AVX2_6: 974 | test r8d,1 975 | jz short Convert_RGB64_16toRGB64_12_AVX2_7 976 | 977 | vmovq xmm0,qword ptr[rsi+rax] 978 | vpaddusw xmm0,xmm0,xmm4 979 | vpsrlw xmm0,xmm0,4 980 | vmovq qword ptr[rdi+rax],xmm0 981 | 982 | Convert_RGB64_16toRGB64_12_AVX2_7: 983 | add rsi,r10 984 | add rdi,r11 985 | dec r9d 986 | jnz Convert_RGB64_16toRGB64_12_AVX2_loop_1 987 | 988 | vzeroupper 989 | 990 | pop rbx 991 | pop rsi 992 | pop rdi 993 | pop rbp 994 | 995 | ret 996 | 997 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 endp 998 | 999 | 1000 | ;JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc dst:dword,srcY:dword,w:dword,h:dword,dst_pitch:dword,src_pitchY:dword 1001 | ; dst = rcx 1002 | ; srcY = rdx 1003 | ; w = r8d 1004 | ; h = r9d 1005 | 1006 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc public frame 1007 | 1008 | dst_pitch equ qword ptr[rbp+48] 1009 | src_pitchY equ qword ptr[rbp+56] 1010 | 1011 | push rbp 1012 | .pushreg rbp 1013 | mov rbp,rsp 1014 | push rsi 1015 | .pushreg rsi 1016 | push rdi 1017 | .pushreg rdi 1018 | push rbx 1019 | .pushreg rbx 1020 | push r12 1021 | .pushreg r12 1022 | .endprolog 1023 | 1024 | mov rdi,rcx 1025 | mov rsi,rdx 1026 | mov r10d,r8d 1027 | mov r11,dst_pitch 1028 | mov r12,src_pitchY 1029 | mov rdx,8 1030 | shr r10d,1 1031 | mov rbx,1 1032 | vpxor xmm4,xmm4,xmm4 1033 | 1034 | Convert_16_RGB64_HLG_OOTF_AVX2_1: 1035 | mov ecx,r10d 1036 | xor rax,rax 1037 | or ecx,ecx 1038 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_3 1039 | 1040 | Convert_16_RGB64_HLG_OOTF_AVX2_2: 1041 | vbroadcastss xmm0,dword ptr[rsi+rax] 1042 | vbroadcastss xmm1,dword ptr[rsi+rax+4] 1043 | vmovdqa xmm2,XMMWORD ptr[rdi+2*rax] 1044 | vinsertf128 ymm0,ymm0,xmm1,1 1045 | vpunpckhwd xmm3,xmm2,xmm4 1046 | vpunpcklwd xmm2,xmm2,xmm4 1047 | vinserti128 ymm2,ymm2,xmm3,1 1048 | vcvtdq2ps ymm2,ymm2 1049 | vmulps ymm2,ymm2,ymm0 1050 | vcvtps2dq ymm2,ymm2 1051 | vextracti128 xmm3,ymm2,1 1052 | vpackusdw xmm2,xmm2,xmm3 1053 | vmovdqa XMMWORD ptr[rdi+2*rax],xmm2 1054 | 1055 | add rax,rdx 1056 | loop Convert_16_RGB64_HLG_OOTF_AVX2_2 1057 | 1058 | Convert_16_RGB64_HLG_OOTF_AVX2_3: 1059 | test r8d,ebx 1060 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_4 1061 | 1062 | vbroadcastss xmm0,dword ptr[rsi+rax] 1063 | vmovq xmm2,qword ptr[rdi+2*rax] 1064 | vpunpcklwd xmm2,xmm2,xmm4 1065 | vcvtdq2ps xmm2,xmm2 1066 | vmulps xmm2,xmm2,xmm0 1067 | vcvtps2dq xmm2,xmm2 1068 | vpackusdw xmm2,xmm2,xmm2 1069 | vmovq qword ptr[rdi+2*rax],xmm2 1070 | 1071 | Convert_16_RGB64_HLG_OOTF_AVX2_4: 1072 | add rdi,r11 1073 | add rsi,r12 1074 | dec r9d 1075 | jnz Convert_16_RGB64_HLG_OOTF_AVX2_1 1076 | 1077 | vzeroupper 1078 | 1079 | pop r12 1080 | pop rbx 1081 | pop rdi 1082 | pop rsi 1083 | pop rbp 1084 | 1085 | ret 1086 | 1087 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 endp 1088 | 1089 | 1090 | ;*************************************************** 1091 | ;** XYZ/HDR/SDR functions ** 1092 | ;*************************************************** 1093 | 1094 | 1095 | ;JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword, 1096 | ; ValMin:dword,Coeff:dword 1097 | ; src = rcx 1098 | ; dst = rdx 1099 | ; w8 = r8d 1100 | ; h = r9d 1101 | 1102 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc public frame 1103 | 1104 | src_pitch equ qword ptr[rbp+48] 1105 | dst_pitch equ qword ptr[rbp+56] 1106 | ValMin equ qword ptr[rbp+64] 1107 | Coeff equ qword ptr[rbp+72] 1108 | 1109 | push rbp 1110 | .pushreg rbp 1111 | mov rbp,rsp 1112 | push rsi 1113 | .pushreg rsi 1114 | push rbx 1115 | .pushreg rbx 1116 | .endprolog 1117 | 1118 | mov rsi,ValMin 1119 | vbroadcastss ymm1,dword ptr[rsi] 1120 | mov rsi,Coeff 1121 | vbroadcastss ymm2,dword ptr[rsi] 1122 | 1123 | vmovdqa ymm3,YMMWORD ptr data_dw_1048575 1124 | vmovdqa ymm4,YMMWORD ptr data_dw_0 1125 | vmulps ymm2,ymm2,YMMWORD ptr data_f_1048575 1126 | 1127 | mov rsi,rcx 1128 | mov r10,src_pitch 1129 | mov r11,dst_pitch 1130 | mov rbx,32 1131 | 1132 | Scale_20_XYZ_AVX2_1: 1133 | xor rax,rax 1134 | mov ecx,r8d 1135 | Scale_20_XYZ_AVX2_2: 1136 | vaddps ymm0,ymm1,YMMWORD ptr[rsi+rax] 1137 | vmulps ymm0,ymm0,ymm2 1138 | vcvtps2dq ymm0,ymm0 1139 | vpminsd ymm0,ymm0,ymm3 1140 | vpmaxsd ymm0,ymm0,ymm4 1141 | vmovdqa YMMWORD ptr[rdx+rax],ymm0 1142 | 1143 | add rax,rbx 1144 | loop Scale_20_XYZ_AVX2_2 1145 | 1146 | add rsi,r10 1147 | add rdx,r11 1148 | dec r9d 1149 | jnz short Scale_20_XYZ_AVX2_1 1150 | 1151 | vzeroupper 1152 | 1153 | pop rbx 1154 | pop rsi 1155 | pop rbp 1156 | 1157 | ret 1158 | 1159 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 endp 1160 | 1161 | 1162 | ;JPSDR_HDRTools_Scale_20_RGB_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword 1163 | ; src = rcx 1164 | ; dst = rdx 1165 | ; w8 = r8d 1166 | ; h = r9d 1167 | 1168 | JPSDR_HDRTools_Scale_20_RGB_AVX2 proc public frame 1169 | 1170 | src_pitch equ qword ptr[rbp+48] 1171 | dst_pitch equ qword ptr[rbp+56] 1172 | ValMin equ qword ptr[rbp+64] 1173 | Coeff equ qword ptr[rbp+72] 1174 | 1175 | push rbp 1176 | .pushreg rbp 1177 | mov rbp,rsp 1178 | push rsi 1179 | .pushreg rsi 1180 | push rbx 1181 | .pushreg rbx 1182 | .endprolog 1183 | 1184 | vmovaps ymm1,YMMWORD ptr data_f_1048575 1185 | vmovdqa ymm2,YMMWORD ptr data_dw_1048575 1186 | vmovdqa ymm3,YMMWORD ptr data_dw_0 1187 | 1188 | mov rsi,rcx 1189 | mov r10,src_pitch 1190 | mov r11,dst_pitch 1191 | mov rbx,32 1192 | 1193 | Scale_20_RGB_AVX2_1: 1194 | xor rax,rax 1195 | mov ecx,r8d 1196 | Scale_20_RGB_AVX2_2: 1197 | vmulps ymm0,ymm1,YMMWORD ptr[rsi+rax] 1198 | vcvtps2dq ymm0,ymm0 1199 | vpminsd ymm0,ymm0,ymm2 1200 | vpmaxsd ymm0,ymm0,ymm3 1201 | vmovdqa YMMWORD ptr[rdx+rax],ymm0 1202 | 1203 | add rax,rbx 1204 | loop Scale_20_RGB_AVX2_2 1205 | 1206 | add rsi,r10 1207 | add rdx,r11 1208 | dec r9d 1209 | jnz short Scale_20_RGB_AVX2_1 1210 | 1211 | vzeroupper 1212 | 1213 | pop rbx 1214 | pop rsi 1215 | pop rbp 1216 | 1217 | ret 1218 | 1219 | JPSDR_HDRTools_Scale_20_RGB_AVX2 endp 1220 | 1221 | 1222 | ;JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc src:dword,dst1:dword,dst2:dword,w8:dword,h:dword,src_pitch:dword, 1223 | ; dst_pitch1:dword,dst_pitch2:dword,ValMinX:dword,CoeffX:dword,ValMinZ:dword,CoeffZ:dword 1224 | ; src = rcx 1225 | ; dst1 = rdx 1226 | ; dst2 = r8 1227 | ; w8 = r9d 1228 | 1229 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc public frame 1230 | 1231 | h equ dword ptr[rbp+48] 1232 | src_pitch equ qword ptr[rbp+56] 1233 | dst_pitch1 equ qword ptr[rbp+64] 1234 | dst_pitch2 equ qword ptr[rbp+72] 1235 | ValMinX equ qword ptr[rbp+80] 1236 | CoeffX equ qword ptr[rbp+88] 1237 | ValMinZ equ qword ptr[rbp+96] 1238 | CoeffZ equ qword ptr[rbp+104] 1239 | 1240 | push rbp 1241 | .pushreg rbp 1242 | mov rbp,rsp 1243 | push rsi 1244 | .pushreg rsi 1245 | push rbx 1246 | .pushreg rbx 1247 | push r12 1248 | .pushreg r12 1249 | push r13 1250 | .pushreg r13 1251 | sub rsp,48 1252 | .allocstack 48 1253 | vmovdqa XMMWORD ptr[rsp],xmm6 1254 | .savexmm128 xmm6,0 1255 | vmovdqa XMMWORD ptr[rsp+16],xmm7 1256 | .savexmm128 xmm7,16 1257 | vmovdqa XMMWORD ptr[rsp+32],xmm8 1258 | .savexmm128 xmm8,32 1259 | .endprolog 1260 | 1261 | mov rsi,ValMinX 1262 | vbroadcastss ymm2,dword ptr[rsi] 1263 | mov rsi,CoeffX 1264 | vbroadcastss ymm3,dword ptr[rsi] 1265 | mov rsi,ValMinZ 1266 | vbroadcastss ymm4,dword ptr[rsi] 1267 | mov rsi,CoeffZ 1268 | vbroadcastss ymm5,dword ptr[rsi] 1269 | 1270 | vmovdqa ymm6,YMMWORD ptr data_dw_65535 1271 | vmovdqa ymm7,YMMWORD ptr data_dw_0 1272 | vmulps ymm3,ymm3,YMMWORD ptr data_f_65535 1273 | vmulps ymm5,ymm5,YMMWORD ptr data_f_65535 1274 | 1275 | mov rsi,rcx 1276 | mov r10,src_pitch 1277 | mov r11,dst_pitch1 1278 | mov r12,dst_pitch2 1279 | mov r13d,h 1280 | mov rbx,32 1281 | 1282 | BT2446C_16_XYZ_AVX2_1: 1283 | xor rax,rax 1284 | mov ecx,r9d 1285 | BT2446C_16_XYZ_AVX2_2: 1286 | vmovaps ymm8,YMMWORD ptr[rsi+rax] 1287 | vmulps ymm0,ymm8,YMMWORD ptr[rdx+rax] 1288 | vmulps ymm1,ymm8,YMMWORD ptr[r8+rax] 1289 | vaddps ymm0,ymm0,ymm2 1290 | vaddps ymm1,ymm1,ymm4 1291 | vmulps ymm0,ymm0,ymm3 1292 | vmulps ymm1,ymm1,ymm5 1293 | vcvtps2dq ymm0,ymm0 1294 | vcvtps2dq ymm1,ymm1 1295 | vpminsd ymm0,ymm0,ymm6 1296 | vpminsd ymm1,ymm1,ymm6 1297 | vpmaxsd ymm0,ymm0,ymm7 1298 | vpmaxsd ymm1,ymm1,ymm7 1299 | vmovdqa YMMWORD ptr[rdx+rax],ymm0 1300 | vmovdqa YMMWORD ptr[r8+rax],ymm1 1301 | 1302 | add rax,rbx 1303 | loop BT2446C_16_XYZ_AVX2_2 1304 | 1305 | add rsi,r10 1306 | add rdx,r11 1307 | add r8,r12 1308 | dec r13d 1309 | jnz short BT2446C_16_XYZ_AVX2_1 1310 | 1311 | vmovdqa xmm8,XMMWORD ptr[rsp+32] 1312 | vmovdqa xmm7,XMMWORD ptr[rsp+16] 1313 | vmovdqa xmm6,XMMWORD ptr[rsp] 1314 | add rsp,48 1315 | 1316 | vzeroupper 1317 | 1318 | pop r13 1319 | pop r12 1320 | pop rbx 1321 | pop rsi 1322 | pop rbp 1323 | 1324 | ret 1325 | 1326 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 endp 1327 | 1328 | 1329 | end 1330 | --------------------------------------------------------------------------------