├── README.md
├── HDRTools - ReadMe.txt
├── HDRTools
├── HDRTools.rc
├── ThreadPool.cpp
├── HDRTools.vcxproj.user
├── avs
│ ├── filesystem.h
│ ├── minmax.h
│ ├── types.h
│ ├── win.h
│ ├── cpuid.h
│ ├── capi.h
│ ├── alignment.h
│ ├── posix.h
│ └── config.h
├── TransferFunctions.h
├── ThreadPoolDef.h
├── TransferFunctions.cpp
├── HDRTools.vcxproj.filters
├── ThreadPool.h
├── ThreadPoolInterface.h
├── MatrixClass.h
├── HDRTools.vcxproj
├── HDRTools_AVX2_asm.asm
├── HDRTools.h
└── HDRTools_AVX2_asm_x64.asm
└── HDRTools.sln
/README.md:
--------------------------------------------------------------------------------
1 | # HDRTools
2 | Avisynth HDR Tools plugin
3 |
--------------------------------------------------------------------------------
/HDRTools - ReadMe.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpsdr/HDRTools/HEAD/HDRTools - ReadMe.txt
--------------------------------------------------------------------------------
/HDRTools/HDRTools.rc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpsdr/HDRTools/HEAD/HDRTools/HDRTools.rc
--------------------------------------------------------------------------------
/HDRTools/ThreadPool.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpsdr/HDRTools/HEAD/HDRTools/ThreadPool.cpp
--------------------------------------------------------------------------------
/HDRTools/HDRTools.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/HDRTools/avs/filesystem.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | // Snippet copied from filesystem/README.md
4 |
5 | #if defined(__cplusplus) && __cplusplus >= 201703L && defined(__has_include)
6 | #if __has_include()
7 | #define GHC_USE_STD_FS
8 | #include
9 | namespace fs = std::filesystem;
10 | #endif
11 | #endif
12 | #ifndef GHC_USE_STD_FS
13 | #include
14 | namespace fs = ghc::filesystem;
15 | #endif
16 |
--------------------------------------------------------------------------------
/HDRTools/TransferFunctions.h:
--------------------------------------------------------------------------------
1 | /*
2 | * TransferFunctions
3 | *
4 | * OOTF,EOTF,OETF, etc... HDR and SDR core functions.
5 | * Copyright (C) 2019 JPSDR
6 | *
7 | * HDRTools is free software; you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation; either version 2, or (at your option)
10 | * any later version.
11 | *
12 | * HDRTools is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with GNU Make; see the file COPYING. If not, write to
19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 | *
21 | */
22 |
23 | void Set_l_HLG(double lw);
24 | double HLG_OETF(double x);
25 | double HLG_inv_OETF(double x);
26 | double HLG_OOTF(double x);
27 | double HLG_inv_OOTF(double x);
28 | double inv_OETF(double x);
29 | double OETF(double x);
30 | double EOTF(double x);
31 | double PQ_OOTF(double x);
32 | double PQ_OOTF_Inv(double x);
33 | double PQ_EOTF(double x);
34 | double PQ_inv_EOTF(double x);
35 |
--------------------------------------------------------------------------------
/HDRTools.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 11.00
3 | # Visual Studio 2010
4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "HDRTools", "HDRTools\HDRTools.vcxproj", "{1820278E-F1C3-48E8-A951-EE5E95079370}"
5 | EndProject
6 | Global
7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
8 | Debug|Win32 = Debug|Win32
9 | Debug|x64 = Debug|x64
10 | Release|Win32 = Release|Win32
11 | Release|x64 = Release|x64
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Debug|Win32.ActiveCfg = Debug|Win32
15 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Debug|Win32.Build.0 = Debug|Win32
16 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Debug|x64.ActiveCfg = Debug|x64
17 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Debug|x64.Build.0 = Debug|x64
18 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Release|Win32.ActiveCfg = Release|Win32
19 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Release|Win32.Build.0 = Release|Win32
20 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Release|x64.ActiveCfg = Release|x64
21 | {1820278E-F1C3-48E8-A951-EE5E95079370}.Release|x64.Build.0 = Release|x64
22 | EndGlobalSection
23 | GlobalSection(SolutionProperties) = preSolution
24 | HideSolutionNode = FALSE
25 | EndGlobalSection
26 | EndGlobal
27 |
--------------------------------------------------------------------------------
/HDRTools/ThreadPoolDef.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Threadpool
3 | *
4 | * Create and manage a threadpool.
5 | * Copyright (C) 2016 JPSDR
6 | *
7 | * Threadpool is free software; you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation; either version 2, or (at your option)
10 | * any later version.
11 | *
12 | * Threadpool is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with GNU Make; see the file COPYING. If not, write to
19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 | *
21 | */
22 |
23 | #ifndef __ThreadPoolDef_H__
24 | #define __ThreadPoolDef_H__
25 |
26 | #include
27 |
28 | #define MAX_MT_THREADS 128 // Maximum possible 255
29 | #define MAX_THREAD_POOL 64 // Maximum possible 127
30 |
31 | typedef void (*ThreadPoolFunction)(void *ptr);
32 |
33 | enum ThreadLevelName {NoneThreadLevel,IdleThreadLevel,LowestThreadLevel,BelowThreadLevel,
34 | NormalThreadLevel,AboveThreadLevel,HighestThreadLevel,CriticalThreadLevel};
35 |
36 | typedef struct _Public_MT_Data_Thread
37 | {
38 | ThreadPoolFunction pFunc;
39 | void *pClass;
40 | uint8_t f_process,thread_Id;
41 | void *pData;
42 | } Public_MT_Data_Thread;
43 |
44 |
45 | #endif // __ThreadPoolDef_H__
46 |
--------------------------------------------------------------------------------
/HDRTools/avs/minmax.h:
--------------------------------------------------------------------------------
1 | // This program is free software; you can redistribute it and/or modify
2 | // it under the terms of the GNU General Public License as published by
3 | // the Free Software Foundation; either version 2 of the License, or
4 | // (at your option) any later version.
5 | //
6 | // This program is distributed in the hope that it will be useful,
7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception. An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 |
32 | #ifndef AVSCORE_MINMAX_H
33 | #define AVSCORE_MINMAX_H
34 |
35 | template
36 | T min(T v1, T v2)
37 | {
38 | return v1 < v2 ? v1 : v2;
39 | }
40 |
41 | template
42 | T max(T v1, T v2)
43 | {
44 | return v1 > v2 ? v1 : v2;
45 | }
46 |
47 | template
48 | T clamp(T n, T min, T max)
49 | {
50 | n = n > max ? max : n;
51 | return n < min ? min : n;
52 | }
53 |
54 | #endif // AVSCORE_MINMAX_H
55 |
--------------------------------------------------------------------------------
/HDRTools/avs/types.h:
--------------------------------------------------------------------------------
1 | // Avisynth C Interface Version 0.20
2 | // Copyright 2003 Kevin Atkinson
3 |
4 | // This program is free software; you can redistribute it and/or modify
5 | // it under the terms of the GNU General Public License as published by
6 | // the Free Software Foundation; either version 2 of the License, or
7 | // (at your option) any later version.
8 | //
9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception. An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 |
33 | #ifndef AVS_TYPES_H
34 | #define AVS_TYPES_H
35 |
36 | // Define all types necessary for interfacing with avisynth.dll
37 | #include
38 | //#include
39 | #ifdef __cplusplus
40 | #include
41 | #include
42 | #else
43 | #include
44 | #include
45 | #endif
46 |
47 | // Raster types used by VirtualDub & Avisynth
48 | typedef uint32_t Pixel32;
49 | typedef uint8_t BYTE;
50 |
51 | // Audio Sample information
52 | typedef float SFLOAT;
53 |
54 | #endif //AVS_TYPES_H
55 |
--------------------------------------------------------------------------------
/HDRTools/avs/win.h:
--------------------------------------------------------------------------------
1 | // This program is free software; you can redistribute it and/or modify
2 | // it under the terms of the GNU General Public License as published by
3 | // the Free Software Foundation; either version 2 of the License, or
4 | // (at your option) any later version.
5 | //
6 | // This program is distributed in the hope that it will be useful,
7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception. An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 |
32 | #ifndef AVSCORE_WIN_H
33 | #define AVSCORE_WIN_H
34 |
35 | // Whenever you need windows headers, start by including this file, then the rest.
36 |
37 | // WWUUT? We require XP now?
38 | #if !defined(NTDDI_VERSION) && !defined(_WIN32_WINNT)
39 | #define NTDDI_VERSION 0x05020000
40 | #define _WIN32_WINNT 0x0502
41 | #endif
42 |
43 | #define WIN32_LEAN_AND_MEAN
44 | #define STRICT
45 | #if !defined(NOMINMAX)
46 | #define NOMINMAX
47 | #endif
48 |
49 | #include
50 |
51 | // Provision for UTF-8 max 4 bytes per code point
52 | #define AVS_MAX_PATH MAX_PATH*4
53 |
54 | #endif // AVSCORE_WIN_H
55 |
--------------------------------------------------------------------------------
/HDRTools/TransferFunctions.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * TransferFunctions
3 | *
4 | * OOTF,EOTF,OETF, etc... HDR and SDR core functions.
5 | * Copyright (C) 2019 JPSDR
6 | *
7 | * HDRTools is free software; you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation; either version 2, or (at your option)
10 | * any later version.
11 | *
12 | * HDRTools is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with GNU Make; see the file COPYING. If not, write to
19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 | *
21 | */
22 |
23 | #include
24 |
25 | static const double m1=0.1593017578125,im1=1.0/m1;
26 | static const double m2=78.84375,im2=1.0/m2;
27 | static const double c1=0.8359375;
28 | static const double c2=18.8515625;
29 | static const double c3=18.6875;
30 |
31 | static const double alpha=1.09929682680944,alpham1=alpha-1.0,ialpha=1.0/alpha;
32 | static const double beta=0.018053968510807;
33 | static const double alpha2=267.84,beta2=0.0003024,ialpha2=1.0/alpha2;
34 | static const double coeff_i12=1.0/12.0,coeff_i3=1.0/3.0,coeff_i45=1.0/0.45;
35 | static const double coeff_i24=1.0/2.404,coeff_i59=1.0/59.5208;
36 | static const double a=0.17883277;
37 | static const double b=1.0-4.0*a,c=0.5-a*log(4.0*a),ia=1.0/a;
38 | static double lm1=1.2-1.0,ilm1=(1.0/1.2)-1.0;
39 |
40 | void Set_l_HLG(double Lw)
41 | {
42 | lm1=(1.2+0.42*log10(Lw*0.001))-1.0;
43 | ilm1=(1.0/(1.2+0.42*log10(Lw*0.001)))-1.0;
44 | }
45 |
46 | double HLG_OETF(double x)
47 | {
48 | if (x<=coeff_i12) return(sqrt(3.0*x));
49 | else return(a*log(12.0*x-b)+c);
50 | }
51 |
52 | double HLG_inv_OETF(double x)
53 | {
54 | if (x<=0.5) return(x*x*coeff_i3);
55 | else return((exp((x-c)*ia)+b)*coeff_i12);
56 | }
57 |
58 | double HLG_OOTF(double x)
59 | {
60 | return(x*pow(x,lm1));
61 | }
62 |
63 | double HLG_inv_OOTF(double x)
64 | {
65 | return(x*pow(x,ilm1));
66 | }
67 |
68 | double inv_OETF(double x)
69 | {
70 | if (x<(beta*4.5)) return(x*coeff_i45);
71 | else return(pow(((x+alpham1))*ialpha,coeff_i45));
72 | }
73 |
74 | double OETF(double x)
75 | {
76 | if (x
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Fichiers d%27en-tête
20 |
21 |
22 | Fichiers d%27en-tête
23 |
24 |
25 | Fichiers d%27en-tête
26 |
27 |
28 | Fichiers d%27en-tête
29 |
30 |
31 | Fichiers d%27en-tête
32 |
33 |
34 | Fichiers d%27en-tête
35 |
36 |
37 | Fichiers d%27en-tête
38 |
39 |
40 |
41 |
42 | Fichiers sources
43 |
44 |
45 | Fichiers sources
46 |
47 |
48 | Fichiers sources
49 |
50 |
51 | Fichiers sources
52 |
53 |
54 | Fichiers sources
55 |
56 |
57 |
58 |
59 | Fichiers de ressources
60 |
61 |
62 |
63 |
64 | Fichiers sources
65 |
66 |
67 | Fichiers sources
68 |
69 |
70 | Fichiers sources
71 |
72 |
73 | Fichiers sources
74 |
75 |
76 | Fichiers sources
77 |
78 |
79 | Fichiers sources
80 |
81 |
82 | Fichiers sources
83 |
84 |
85 | Fichiers sources
86 |
87 |
88 | Fichiers sources
89 |
90 |
91 | Fichiers sources
92 |
93 |
94 |
--------------------------------------------------------------------------------
/HDRTools/avs/cpuid.h:
--------------------------------------------------------------------------------
1 | // This program is free software; you can redistribute it and/or modify
2 | // it under the terms of the GNU General Public License as published by
3 | // the Free Software Foundation; either version 2 of the License, or
4 | // (at your option) any later version.
5 | //
6 | // This program is distributed in the hope that it will be useful,
7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception. An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 |
32 | #ifndef AVSCORE_CPUID_H
33 | #define AVSCORE_CPUID_H
34 |
35 | // For GetCPUFlags. These are backwards-compatible with those in VirtualDub.
36 | // ending with SSE4_2
37 | // For emulation see https://software.intel.com/en-us/articles/intel-software-development-emulator
38 | enum {
39 | /* oldest CPU to support extension */
40 | CPUF_FORCE = 0x01, // N/A
41 | CPUF_FPU = 0x02, // 386/486DX
42 | CPUF_MMX = 0x04, // P55C, K6, PII
43 | CPUF_INTEGER_SSE = 0x08, // PIII, Athlon
44 | CPUF_SSE = 0x10, // PIII, Athlon XP/MP
45 | CPUF_SSE2 = 0x20, // PIV, K8
46 | CPUF_3DNOW = 0x40, // K6-2
47 | CPUF_3DNOW_EXT = 0x80, // Athlon
48 | CPUF_X86_64 = 0xA0, // Hammer (note: equiv. to 3DNow + SSE2, which
49 | // only Hammer will have anyway)
50 | CPUF_SSE3 = 0x100, // PIV+, K8 Venice
51 | CPUF_SSSE3 = 0x200, // Core 2
52 | CPUF_SSE4 = 0x400,
53 | CPUF_SSE4_1 = 0x400, // Penryn, Wolfdale, Yorkfield
54 | CPUF_AVX = 0x800, // Sandy Bridge, Bulldozer
55 | CPUF_SSE4_2 = 0x1000, // Nehalem
56 | // AVS+
57 | CPUF_AVX2 = 0x2000, // Haswell
58 | CPUF_FMA3 = 0x4000,
59 | CPUF_F16C = 0x8000,
60 | CPUF_MOVBE = 0x10000, // Big Endian move
61 | CPUF_POPCNT = 0x20000,
62 | CPUF_AES = 0x40000,
63 | CPUF_FMA4 = 0x80000,
64 |
65 | CPUF_AVX512F = 0x100000, // AVX-512 Foundation.
66 | CPUF_AVX512DQ = 0x200000, // AVX-512 DQ (Double/Quad granular) Instructions
67 | CPUF_AVX512PF = 0x400000, // AVX-512 Prefetch
68 | CPUF_AVX512ER = 0x800000, // AVX-512 Exponential and Reciprocal
69 | CPUF_AVX512CD = 0x1000000, // AVX-512 Conflict Detection
70 | CPUF_AVX512BW = 0x2000000, // AVX-512 BW (Byte/Word granular) Instructions
71 | CPUF_AVX512VL = 0x4000000, // AVX-512 VL (128/256 Vector Length) Extensions
72 | CPUF_AVX512IFMA = 0x8000000, // AVX-512 IFMA integer 52 bit
73 | CPUF_AVX512VBMI = 0x10000000,// AVX-512 VBMI
74 | };
75 |
76 | #ifdef BUILDING_AVSCORE
77 | int GetCPUFlags();
78 | void SetMaxCPU(int new_flags);
79 | #endif
80 |
81 | #endif // AVSCORE_CPUID_H
82 |
--------------------------------------------------------------------------------
/HDRTools/ThreadPool.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Threadpool
3 | *
4 | * Create and manage a threadpool.
5 | * Copyright (C) 2016 JPSDR
6 | *
7 | * Threadpool is free software; you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation; either version 2, or (at your option)
10 | * any later version.
11 | *
12 | * Threadpool is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with GNU Make; see the file COPYING. If not, write to
19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 | *
21 | */
22 |
23 | #ifndef __ThreadPool_H__
24 | #define __ThreadPool_H__
25 |
26 | #include
27 |
28 | #include "./ThreadPoolDef.h"
29 |
30 | #define THREADPOOL_VERSION "ThreadPool 1.4.5"
31 |
32 | #define MAX_PHYSICAL_CORES 64
33 |
34 | typedef struct _MT_Data_Thread
35 | {
36 | Public_MT_Data_Thread *MTData;
37 | uint8_t f_process,thread_Id;
38 | HANDLE nextJob,jobFinished;
39 | } MT_Data_Thread;
40 |
41 |
42 | typedef struct _Arch_CPU
43 | {
44 | uint8_t NbPhysCore,NbLogicCPU;
45 | uint8_t NbHT[MAX_PHYSICAL_CORES];
46 | ULONG_PTR ProcMask[MAX_PHYSICAL_CORES];
47 | ULONG_PTR FullMask;
48 | } Arch_CPU;
49 |
50 |
51 | class ThreadPool
52 | {
53 | public :
54 | ThreadPool(void);
55 | virtual ~ThreadPool();
56 |
57 | protected :
58 |
59 | Arch_CPU CPU;
60 |
61 | public :
62 |
63 | uint8_t GetThreadNumber(uint8_t thread_number,bool logical);
64 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,
65 | bool SetAffinity,bool sleep,ThreadLevelName priority);
66 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,
67 | bool SetAffinity,bool sleep)
68 | {return(AllocateThreads(thread_number,offset_core,offset_ht,UseMaxPhysCore,SetAffinity,sleep,NormalThreadLevel));}
69 | bool DeAllocateThreads(void);
70 | bool ChangeThreadsAffinity(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity);
71 | bool ChangeThreadsLevel(ThreadLevelName priority);
72 | bool RequestThreadPool(uint8_t thread_number,Public_MT_Data_Thread *Data,ThreadLevelName priority);
73 | bool RequestThreadPool(uint8_t thread_number,Public_MT_Data_Thread *Data)
74 | {return(RequestThreadPool(thread_number,Data,NoneThreadLevel));}
75 | bool ReleaseThreadPool(bool sleep);
76 | bool StartThreads(void);
77 | bool WaitThreadsEnd(void);
78 | bool GetThreadPoolStatus(void) {return(Status_Ok);}
79 | uint8_t GetCurrentThreadAllocated(void) {return(CurrentThreadsAllocated);}
80 | uint8_t GetCurrentThreadUsed(void) {return(CurrentThreadsUsed);}
81 | uint8_t GetLogicalCPUNumber(void) {return(CPU.NbLogicCPU);}
82 | uint8_t GetPhysicalCoreNumber(void) {return(CPU.NbPhysCore);}
83 |
84 | protected :
85 |
86 | MT_Data_Thread MT_Thread[MAX_MT_THREADS];
87 | HANDLE nextJob[MAX_MT_THREADS],jobFinished[MAX_MT_THREADS];
88 | HANDLE thds[MAX_MT_THREADS];
89 | DWORD tids[MAX_MT_THREADS];
90 | ULONG_PTR ThreadMask[MAX_MT_THREADS];
91 | bool ThreadSleep[MAX_MT_THREADS];
92 | ThreadLevelName nPriority;
93 |
94 | bool Status_Ok;
95 | uint8_t TotalThreadsRequested,CurrentThreadsAllocated,CurrentThreadsUsed;
96 |
97 | void FreeThreadPool(void);
98 | void DestroyThreadPool(void);
99 | void CreateThreadPool(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity,
100 | bool sleep,ThreadLevelName priority);
101 |
102 | private :
103 |
104 | static DWORD WINAPI StaticThreadpool(LPVOID lpParam);
105 |
106 | ThreadPool (const ThreadPool &other);
107 | ThreadPool& operator = (const ThreadPool &other);
108 | bool operator == (const ThreadPool &other) const;
109 | bool operator != (const ThreadPool &other) const;
110 | };
111 |
112 | #endif // __ThreadPool_H__
113 |
--------------------------------------------------------------------------------
/HDRTools/avs/capi.h:
--------------------------------------------------------------------------------
1 | // Avisynth C Interface Version 0.20
2 | // Copyright 2003 Kevin Atkinson
3 |
4 | // This program is free software; you can redistribute it and/or modify
5 | // it under the terms of the GNU General Public License as published by
6 | // the Free Software Foundation; either version 2 of the License, or
7 | // (at your option) any later version.
8 | //
9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception. An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 |
33 | #ifndef AVS_CAPI_H
34 | #define AVS_CAPI_H
35 |
36 | #include "config.h"
37 |
38 | #ifdef AVS_POSIX
39 | // this is also defined in avs/posix.h
40 | #ifndef AVS_HAIKU
41 | #define __declspec(x)
42 | #endif
43 | #endif
44 |
45 | #ifdef __cplusplus
46 | # define EXTERN_C extern "C"
47 | #else
48 | # define EXTERN_C
49 | #endif
50 |
51 | #ifdef AVS_WINDOWS
52 | #ifdef BUILDING_AVSCORE
53 | # if defined(GCC) && defined(X86_32)
54 | # define AVSC_CC
55 | # else // MSVC builds and 64-bit GCC
56 | # ifndef AVSC_USE_STDCALL
57 | # define AVSC_CC __cdecl
58 | # else
59 | # define AVSC_CC __stdcall
60 | # endif
61 | # endif
62 | #else // needed for programs that talk to AviSynth+
63 | # ifndef AVSC_WIN32_GCC32 // see comment below
64 | # ifndef AVSC_USE_STDCALL
65 | # define AVSC_CC __cdecl
66 | # else
67 | # define AVSC_CC __stdcall
68 | # endif
69 | # else
70 | # define AVSC_CC
71 | # endif
72 | #endif
73 | # else
74 | # define AVSC_CC
75 | #endif
76 |
77 | // On 64-bit Windows, there's only one calling convention,
78 | // so there is no difference between MSVC and GCC. On 32-bit,
79 | // this isn't true. The convention that GCC needs to use to
80 | // even build AviSynth+ as 32-bit makes anything that uses
81 | // it incompatible with 32-bit MSVC builds of AviSynth+.
82 | // The AVSC_WIN32_GCC32 define is meant to provide a user
83 | // switchable way to make builds of FFmpeg to test 32-bit
84 | // GCC builds of AviSynth+ without having to screw around
85 | // with alternate headers, while still default to the usual
86 | // situation of using 32-bit MSVC builds of AviSynth+.
87 |
88 | // Hopefully, this situation will eventually be resolved
89 | // and a broadly compatible solution will arise so the
90 | // same 32-bit FFmpeg build can handle either MSVC or GCC
91 | // builds of AviSynth+.
92 |
93 | #define AVSC_INLINE static __inline
94 |
95 | #ifdef BUILDING_AVSCORE
96 | #ifdef AVS_WINDOWS
97 | # ifndef AVS_STATIC_LIB
98 | # define AVSC_EXPORT __declspec(dllexport)
99 | # else
100 | # define AVSC_EXPORT
101 | # endif
102 | # define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name
103 | #else
104 | # define AVSC_EXPORT EXTERN_C
105 | # define AVSC_API(ret, name) EXTERN_C ret AVSC_CC name
106 | #endif
107 | #else
108 | # define AVSC_EXPORT EXTERN_C __declspec(dllexport)
109 | # ifndef AVS_STATIC_LIB
110 | # define AVSC_IMPORT __declspec(dllimport)
111 | # else
112 | # define AVSC_IMPORT
113 | # endif
114 | # ifndef AVSC_NO_DECLSPEC
115 | # define AVSC_API(ret, name) EXTERN_C AVSC_IMPORT ret AVSC_CC name
116 | # else
117 | # define AVSC_API(ret, name) typedef ret (AVSC_CC *name##_func)
118 | # endif
119 | #endif
120 |
121 | #endif //AVS_CAPI_H
122 |
--------------------------------------------------------------------------------
/HDRTools/avs/alignment.h:
--------------------------------------------------------------------------------
1 | // Avisynth C Interface Version 0.20
2 | // Copyright 2003 Kevin Atkinson
3 |
4 | // This program is free software; you can redistribute it and/or modify
5 | // it under the terms of the GNU General Public License as published by
6 | // the Free Software Foundation; either version 2 of the License, or
7 | // (at your option) any later version.
8 | //
9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception. An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 |
33 | #ifndef AVS_ALIGNMENT_H
34 | #define AVS_ALIGNMENT_H
35 |
36 | // Functions and macros to help work with alignment requirements.
37 |
38 | // Tells if a number is a power of two.
39 | #define IS_POWER2(n) ((n) && !((n) & ((n) - 1)))
40 |
41 | // Tells if the pointer "ptr" is aligned to "align" bytes.
42 | #define IS_PTR_ALIGNED(ptr, align) (((uintptr_t)ptr & ((uintptr_t)(align-1))) == 0)
43 |
44 | // Rounds up the number "n" to the next greater multiple of "align"
45 | #define ALIGN_NUMBER(n, align) (((n) + (align)-1) & (~((align)-1)))
46 |
47 | // Rounds up the pointer address "ptr" to the next greater multiple of "align"
48 | #define ALIGN_POINTER(ptr, align) (((uintptr_t)(ptr) + (align)-1) & (~(uintptr_t)((align)-1)))
49 |
50 | #ifdef __cplusplus
51 |
52 | #include
53 | #include
54 | #include
55 | #include "config.h"
56 |
57 | #if defined(MSVC) && _MSC_VER<1400
58 | // needed for VS2013, otherwise C++11 'alignas' works
59 | #define avs_alignas(x) __declspec(align(x))
60 | #else
61 | // assumes C++11 support
62 | #define avs_alignas(x) alignas(x)
63 | #endif
64 |
65 | template
66 | static bool IsPtrAligned(T* ptr, size_t align)
67 | {
68 | assert(IS_POWER2(align));
69 | return (bool)IS_PTR_ALIGNED(ptr, align);
70 | }
71 |
72 | template
73 | static T AlignNumber(T n, T align)
74 | {
75 | assert(IS_POWER2(align));
76 | return ALIGN_NUMBER(n, align);
77 | }
78 |
79 | template
80 | static T* AlignPointer(T* ptr, size_t align)
81 | {
82 | assert(IS_POWER2(align));
83 | return (T*)ALIGN_POINTER(ptr, align);
84 | }
85 |
86 | extern "C"
87 | {
88 | #else
89 | #include
90 | #endif // __cplusplus
91 |
92 | // Returns a new buffer that is at least the size "nbytes".
93 | // The buffer will be aligned to "align" bytes.
94 | // Returns NULL on error. On successful allocation,
95 | // the returned buffer must be freed using "avs_free".
96 | inline void* avs_malloc(size_t nbytes, size_t align)
97 | {
98 | if (!IS_POWER2(align))
99 | return NULL;
100 |
101 | size_t offset = sizeof(void*) + align - 1;
102 |
103 | void *orig = malloc(nbytes + offset);
104 | if (orig == NULL)
105 | return NULL;
106 |
107 | void **aligned = (void**)(((uintptr_t)orig + (uintptr_t)offset) & (~(uintptr_t)(align-1)));
108 | aligned[-1] = orig;
109 | return aligned;
110 | }
111 |
112 | // Buffers allocated using "avs_malloc" must be freed
113 | // using "avs_free" instead of "free".
114 | inline void avs_free(void *ptr)
115 | {
116 | // Mirroring free()'s semantic requires us to accept NULLs
117 | if (ptr == NULL)
118 | return;
119 |
120 | free(((void**)ptr)[-1]);
121 | }
122 |
123 | #ifdef __cplusplus
124 | } // extern "C"
125 |
126 | // The point of these undef's is to force using the template functions
127 | // if we are in C++ mode. For C, the user can rely only on the macros.
128 | #undef IS_PTR_ALIGNED
129 | #undef ALIGN_NUMBER
130 | #undef ALIGN_POINTER
131 |
132 | #endif // __cplusplus
133 |
134 | #endif //AVS_ALIGNMENT_H
135 |
--------------------------------------------------------------------------------
/HDRTools/avs/posix.h:
--------------------------------------------------------------------------------
1 | // This program is free software; you can redistribute it and/or modify
2 | // it under the terms of the GNU General Public License as published by
3 | // the Free Software Foundation; either version 2 of the License, or
4 | // (at your option) any later version.
5 | //
6 | // This program is distributed in the hope that it will be useful,
7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception. An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 |
32 | #ifdef AVS_POSIX
33 | #ifndef AVSCORE_POSIX_H
34 | #define AVSCORE_POSIX_H
35 |
36 | #ifdef __cplusplus
37 | #include
38 | #endif
39 | #include
40 | #include
41 |
42 | // Define these MSVC-extension used in Avisynth
43 | #define __single_inheritance
44 |
45 | // These things don't exist in Linux
46 | #if defined(AVS_HAIKU)
47 | #undef __declspec
48 | #endif
49 | #define __declspec(x)
50 | #define lstrlen strlen
51 | #define lstrcmp strcmp
52 | #define lstrcmpi strcasecmp
53 | #define _stricmp strcasecmp
54 | #define _strnicmp strncasecmp
55 | #define _strdup strdup
56 | #define SetCurrentDirectory(x) chdir(x)
57 | #define SetCurrentDirectoryW(x) chdir(x)
58 | #define GetCurrentDirectoryW(x) getcwd(x)
59 | #define _putenv putenv
60 | #define _alloca alloca
61 |
62 | // Borrowing some compatibility macros from AvxSynth, slightly modified
63 | #define UInt32x32To64(a, b) ((uint64_t)(((uint64_t)((uint32_t)(a))) * ((uint32_t)(b))))
64 | #define Int64ShrlMod32(a, b) ((uint64_t)((uint64_t)(a) >> (b)))
65 | #define Int32x32To64(a, b) ((int64_t)(((int64_t)((long)(a))) * ((long)(b))))
66 |
67 | #define InterlockedIncrement(x) __sync_add_and_fetch((x), 1)
68 | #define InterlockedDecrement(x) __sync_sub_and_fetch((x), 1)
69 | #define InterlockedExchangeAdd(x, v) __sync_add_and_fetch((x), (v))
70 |
71 | #define MulDiv(nNumber, nNumerator, nDenominator) (int32_t) (((int64_t) (nNumber) * (int64_t) (nNumerator) + (int64_t) ((nDenominator)/2)) / (int64_t) (nDenominator))
72 |
73 | #ifndef TRUE
74 | #define TRUE true
75 | #endif
76 |
77 | #ifndef FALSE
78 | #define FALSE false
79 | #endif
80 |
81 | #define S_FALSE (0x00000001)
82 | #define E_FAIL (0x80004005)
83 | #define FAILED(hr) ((hr) & 0x80000000)
84 | #define SUCCEEDED(hr) (!FAILED(hr))
85 |
86 | // Statuses copied from comments in exception.cpp
87 | #define STATUS_GUARD_PAGE_VIOLATION 0x80000001
88 | #define STATUS_DATATYPE_MISALIGNMENT 0x80000002
89 | #define STATUS_BREAKPOINT 0x80000003
90 | #define STATUS_SINGLE_STEP 0x80000004
91 | #define STATUS_ACCESS_VIOLATION 0xc0000005
92 | #define STATUS_IN_PAGE_ERROR 0xc0000006
93 | #define STATUS_INVALID_HANDLE 0xc0000008
94 | #define STATUS_NO_MEMORY 0xc0000017
95 | #define STATUS_ILLEGAL_INSTRUCTION 0xc000001d
96 | #define STATUS_NONCONTINUABLE_EXCEPTION 0xc0000025
97 | #define STATUS_INVALID_DISPOSITION 0xc0000026
98 | #define STATUS_ARRAY_BOUNDS_EXCEEDED 0xc000008c
99 | #define STATUS_FLOAT_DENORMAL_OPERAND 0xc000008d
100 | #define STATUS_FLOAT_DIVIDE_BY_ZERO 0xc000008e
101 | #define STATUS_FLOAT_INEXACT_RESULT 0xc000008f
102 | #define STATUS_FLOAT_INVALID_OPERATION 0xc0000090
103 | #define STATUS_FLOAT_OVERFLOW 0xc0000091
104 | #define STATUS_FLOAT_STACK_CHECK 0xc0000092
105 | #define STATUS_FLOAT_UNDERFLOW 0xc0000093
106 | #define STATUS_INTEGER_DIVIDE_BY_ZERO 0xc0000094
107 | #define STATUS_INTEGER_OVERFLOW 0xc0000095
108 | #define STATUS_PRIVILEGED_INSTRUCTION 0xc0000096
109 | #define STATUS_STACK_OVERFLOW 0xc00000fd
110 |
111 | // Calling convension
112 | #ifndef AVS_HAIKU
113 | #define __stdcall
114 | #define __cdecl
115 | #endif
116 |
117 | // PowerPC OS X is really niche these days, but this painless equivocation
118 | // of the function/macro names used in posix_get_available_memory()
119 | // is all it takes to let it work. The G5 was 64-bit, and if 10.5 Leopard
120 | // can run in native 64-bit, it probably uses the names in that block as-is.
121 | #ifdef AVS_MACOS
122 | #ifdef PPC32
123 | #define vm_statistics64_data_t vm_statistics_data_t
124 | #define HOST_VM_INFO64_COUNT HOST_VM_INFO_COUNT
125 | #define HOST_VM_INFO64 HOST_VM_INFO
126 | #define host_statistics64 host_statistics
127 | #endif // PPC32
128 | #endif // AVS_MACOS
129 |
130 | #endif // AVSCORE_POSIX_H
131 | #endif // AVS_POSIX
132 |
--------------------------------------------------------------------------------
/HDRTools/ThreadPoolInterface.h:
--------------------------------------------------------------------------------
1 | /*
2 | * ThreadpoolInterface
3 | *
4 | * Allow to use the threadpool, kind of API.
5 | * Copyright (C) 2017 JPSDR
6 | *
7 | * ThreadpoolInterface is free software; you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation; either version 2, or (at your option)
10 | * any later version.
11 | *
12 | * ThreadpoolInterface is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with GNU Make; see the file COPYING. If not, write to
19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 | *
21 | */
22 |
23 | #ifndef __ThreadPoolInterface_H__
24 | #define __ThreadPoolInterface_H__
25 |
26 | #include
27 | #include
28 |
29 | #include "./ThreadPoolDef.h"
30 |
31 | #define THREADPOOLINTERFACE_VERSION "ThreadPoolInterface 1.12.1"
32 |
33 | class ThreadPoolInterface;
34 |
35 | class UserData
36 | {
37 | friend ThreadPoolInterface;
38 |
39 | public :
40 |
41 | UserData(void);
42 | virtual ~UserData(void);
43 |
44 | protected :
45 |
46 | uint32_t UserId;
47 | bool AllowSeveral;
48 | bool AllowWaiting;
49 | bool AllowTimeOut;
50 | bool AllowRetryMax;
51 | DWORD TimeOut;
52 | uint8_t RetryMax;
53 | int8_t NbrePool;
54 | int8_t UsedPool[MAX_THREAD_POOL];
55 | };
56 |
57 |
58 | class ThreadPoolInterface
59 | {
60 | public :
61 |
62 | virtual ~ThreadPoolInterface(void);
63 | static ThreadPoolInterface* Init(uint8_t num);
64 |
65 | uint8_t GetThreadNumber(uint8_t thread_number,bool logical);
66 | int16_t AddPool(uint8_t num);
67 | bool CreatePool(uint8_t num);
68 | bool DeletePool(uint8_t num);
69 | bool RemovePool(uint8_t num);
70 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,
71 | bool SetAffinity,bool sleep,ThreadLevelName priority,int8_t nPool);
72 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,
73 | bool SetAffinity,bool sleep,int8_t nPool)
74 | {return(AllocateThreads(thread_number,offset_core,offset_ht,UseMaxPhysCore,SetAffinity,sleep,
75 | NormalThreadLevel,nPool));}
76 | bool GetUserId(uint32_t &UserId);
77 | bool RemoveUserId(uint32_t UserId);
78 | bool ChangeThreadsAffinity(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity,int8_t nPool);
79 | bool ChangeThreadsLevel(ThreadLevelName priority,int8_t nPool);
80 | bool DeAllocateUserThreads(uint32_t UserId,bool check);
81 | bool DeAllocatePoolThreads(uint8_t nPool,bool check);
82 | bool DeAllocateAllThreads(bool check);
83 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data,
84 | ThreadLevelName priority,int8_t nPool,bool Exclusive);
85 | bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data,
86 | ThreadLevelName priority,int8_t &nPool,bool Exclusive);
87 | bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data);
88 | bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data,
89 | ThreadLevelName priority);
90 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data,
91 | int8_t nPool,bool Exclusive)
92 | {return(RequestThreadPool(UserId,thread_number,Data,NoneThreadLevel,nPool,Exclusive));}
93 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data)
94 | {return(RequestThreadPool(UserId,thread_number,Data,NoneThreadLevel,-1,false));}
95 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data,
96 | ThreadLevelName priority)
97 | {return(RequestThreadPool(UserId,thread_number,Data,priority,-1,false));}
98 | bool ReleaseThreadPool(uint32_t UserId,bool sleep);
99 | bool ReleaseThreadPool(uint32_t UserId,bool sleep,int8_t idxPool);
100 | bool StartThreads(uint32_t UserId);
101 | bool StartThreads(uint32_t UserId,int8_t idxPool);
102 | bool WaitThreadsEnd(uint32_t UserId);
103 | bool WaitThreadsEnd(uint32_t UserId,int8_t idxPool);
104 | bool GetThreadPoolStatus(uint32_t UserId,int8_t idxPool,int8_t nPool);
105 | uint8_t GetCurrentThreadAllocated(uint32_t UserId,int8_t idxPool,int8_t nPool);
106 | uint8_t GetCurrentThreadUsed(uint32_t UserId,int8_t idxPool,int8_t nPool);
107 | bool EnableAllowSeveral(uint32_t UserId);
108 | bool DisableAllowSeveral(uint32_t UserId);
109 | bool IsAllowedSeveral(uint32_t UserId);
110 | bool EnableWaitonRequest(uint32_t UserId);
111 | bool DisableWaitonRequest(uint32_t UserId);
112 | bool EnableTimeOutonRequest(uint32_t UserId);
113 | bool DisableTimeOutonRequest(uint32_t UserId);
114 | bool EnableRetryMaxonRequest(uint32_t UserId);
115 | bool DisableRetryMaxonRequest(uint32_t UserId);
116 | bool ConfigureTimeOutValue(uint32_t UserId, DWORD dwMilliseconds);
117 | bool ConfigureRetryMaxValue(uint32_t UserId, uint8_t NbreMax);
118 | int8_t GetPoolAllocated(uint32_t UserId);
119 | int8_t GetPoolNumber(uint32_t UserId,int8_t idxPool);
120 | int8_t GetPoolIndex(uint32_t UserId,int8_t nPool);
121 | uint8_t GetLogicalCPUNumber(void);
122 | uint8_t GetPhysicalCoreNumber(void);
123 |
124 | protected :
125 |
126 | bool Status_Ok;
127 | uint8_t NbrePool;
128 |
129 | public :
130 |
131 | bool GetThreadPoolInterfaceStatus(void) {return(Status_Ok);}
132 | int8_t GetCurrentPoolCreated(void) {return((Status_Ok) ? NbrePool:-1);}
133 |
134 | protected :
135 |
136 | ThreadPoolInterface(void);
137 |
138 | CRITICAL_SECTION CriticalSection;
139 | HANDLE ghMutexResources;
140 | BOOL CSectionOk;
141 | HANDLE JobsEnded[MAX_THREAD_POOL],ThreadPoolFree[MAX_THREAD_POOL];
142 | std::vector TabId;
143 | HANDLE EndExclusive;
144 | bool Error_Occured;
145 |
146 | bool ThreadPoolRequested[MAX_THREAD_POOL],JobsRunning[MAX_THREAD_POOL];
147 | bool ThreadPoolReleased[MAX_THREAD_POOL],ThreadWaitEnd[MAX_THREAD_POOL];
148 | bool ThreadPoolWaitFree[MAX_THREAD_POOL];
149 | uint32_t ThreadPoolUserId[MAX_THREAD_POOL];
150 | bool ExclusiveMode;
151 | uint8_t NbrePoolEvent;
152 |
153 | bool CreatePoolEvent(uint8_t num);
154 | void FreeData(void);
155 | void FreePool(void);
156 | void FreePool(int8_t nPool);
157 | bool EnterCS(void);
158 | void LeaveCS(void);
159 | bool GetMutex(void);
160 | void FreeMutex(void);
161 | int32_t GetUserIdIndex(uint32_t UserId);
162 | bool ReleaseThreadPoolCore(uint32_t UserId,int32_t index,bool sleep,int8_t nPool,int8_t idxPool);
163 | bool StartThreadsCore(int8_t nPool);
164 | bool WaitThreadsEndCore(uint32_t UserId,int8_t nPool,int8_t idxPool);
165 |
166 | private :
167 |
168 | ThreadPoolInterface (const ThreadPoolInterface &other);
169 | ThreadPoolInterface& operator = (const ThreadPoolInterface &other);
170 | bool operator == (const ThreadPoolInterface &other) const;
171 | bool operator != (const ThreadPoolInterface &other) const;
172 | };
173 |
174 | #endif // __ThreadPoolInterface_H__
175 |
--------------------------------------------------------------------------------
/HDRTools/avs/config.h:
--------------------------------------------------------------------------------
1 | // Avisynth C Interface Version 0.20
2 | // Copyright 2003 Kevin Atkinson
3 |
4 | // This program is free software; you can redistribute it and/or modify
5 | // it under the terms of the GNU General Public License as published by
6 | // the Free Software Foundation; either version 2 of the License, or
7 | // (at your option) any later version.
8 | //
9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception. An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 |
33 | #ifndef AVS_CONFIG_H
34 | #define AVS_CONFIG_H
35 |
36 | // Undefine this to get cdecl calling convention
37 | #define AVSC_USE_STDCALL 1
38 |
39 | // NOTE TO PLUGIN AUTHORS:
40 | // Because FRAME_ALIGN can be substantially higher than the alignment
41 | // a plugin actually needs, plugins should not use FRAME_ALIGN to check for
42 | // alignment. They should always request the exact alignment value they need.
43 | // This is to make sure that plugins work over the widest range of AviSynth
44 | // builds possible.
45 | #define FRAME_ALIGN 64
46 |
47 | #if defined(_M_AMD64) || defined(__x86_64)
48 | # define X86_64
49 | #elif defined(_M_IX86) || defined(__i386__)
50 | # define X86_32
51 | // VS2017 introduced _M_ARM64
52 | #elif defined(_M_ARM64) || defined(__aarch64__)
53 | # define ARM64
54 | #elif defined(_M_ARM) || defined(__arm__)
55 | # define ARM32
56 | #elif defined(__PPC64__)
57 | # define PPC64
58 | #elif defined(_M_PPC) || defined(__PPC__) || defined(__POWERPC__)
59 | # define PPC32
60 | #elif defined(__riscv)
61 | # define RISCV
62 | #elif defined(__loongarch__)
63 | # define LOONGARCH
64 | #elif defined(__sparc_v9__)
65 | # define SPARC
66 | #elif defined(__mips__)
67 | # define MIPS
68 | #else
69 | # error Unsupported CPU architecture.
70 | #endif
71 |
72 | // VC++ LLVM-Clang-cl MinGW-Gnu
73 | // MSVC x x
74 | // MSVC_PURE x
75 | // CLANG x
76 | // GCC x
77 |
78 | #if defined(__clang__)
79 | // Check clang first. clang-cl also defines __MSC_VER
80 | // We set MSVC because they are mostly compatible
81 | # define CLANG
82 | #if defined(_MSC_VER)
83 | # define MSVC
84 | # define AVS_FORCEINLINE __attribute__((always_inline))
85 | #else
86 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline
87 | #endif
88 | #elif defined(_MSC_VER)
89 | # define MSVC
90 | # define MSVC_PURE
91 | # define AVS_FORCEINLINE __forceinline
92 | #elif defined(__GNUC__)
93 | # define GCC
94 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline
95 | #elif defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
96 | // Intel C++ Compilers with MSVC command line interface will not appear here rather at _MSC_VER
97 | # define AVS_FORCEINLINE inline
98 | # undef __forceinline
99 | # define __forceinline inline
100 | #else
101 | # error Unsupported compiler.
102 | # define AVS_FORCEINLINE inline
103 | # undef __forceinline
104 | # define __forceinline inline
105 | #endif
106 |
107 | #if defined(_WIN32)
108 | # define AVS_WINDOWS
109 | #elif defined(__linux__)
110 | # define AVS_LINUX
111 | # define AVS_POSIX
112 | #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
113 | # define AVS_BSD
114 | # define AVS_POSIX
115 | #elif defined(__APPLE__)
116 | # define AVS_MACOS
117 | # define AVS_POSIX
118 | #elif defined(__HAIKU__)
119 | # define AVS_HAIKU
120 | # define AVS_POSIX
121 | #else
122 | # error Operating system unsupported.
123 | #endif
124 |
125 | #if defined(AVS_WINDOWS)
126 | # if defined(X86_32) || defined(X86_64)
127 | # define AVS_WINDOWS_X86
128 | # elif defined(ARM64) || defined(ARM32)
129 | # define AVS_WINDOWS_ARM
130 | # endif
131 | #endif
132 |
133 | #if defined(MSVC) && !defined(AVS_WINDOWS_X86)
134 | # error Unsupported combination of compiler, operating system, and machine architecture.
135 | #endif
136 |
137 | // useful warnings disabler macros for supported compilers
138 |
139 | #if defined(_MSC_VER)
140 | #define DISABLE_WARNING_PUSH __pragma(warning( push ))
141 | #define DISABLE_WARNING_POP __pragma(warning( pop ))
142 | #define DISABLE_WARNING(warningNumber) __pragma(warning( disable : warningNumber ))
143 |
144 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(4101)
145 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(4505)
146 | // other warnings you want to deactivate...
147 |
148 | #elif defined(__GNUC__) || defined(__clang__)
149 | #define DO_PRAGMA(X) _Pragma(#X)
150 | #define DISABLE_WARNING_PUSH DO_PRAGMA(GCC diagnostic push)
151 | #define DISABLE_WARNING_POP DO_PRAGMA(GCC diagnostic pop)
152 | #define DISABLE_WARNING(warningName) DO_PRAGMA(GCC diagnostic ignored #warningName)
153 |
154 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(-Wunused-variable)
155 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(-Wunused-function)
156 | // other warnings you want to deactivate...
157 |
158 | #else
159 | #define DISABLE_WARNING_PUSH
160 | #define DISABLE_WARNING_POP
161 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE
162 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION
163 | // other warnings you want to deactivate...
164 |
165 | #endif
166 |
167 | #if defined(AVS_WINDOWS) && defined(_USING_V110_SDK71_)
168 | // Windows XP does not have proper initialization for
169 | // thread local variables.
170 | // Use workaround instead __declspec(thread)
171 | #define XP_TLS
172 | #endif
173 |
174 | #ifndef MSVC
175 | // GCC and Clang can be used on big endian systems, MSVC can't.
176 | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
177 | # define AVS_ENDIANNESS "little"
178 | # elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
179 | # define AVS_ENDIANNESS "big"
180 | # else
181 | # define AVS_ENDIANNESS "middle"
182 | # endif
183 | #else
184 | #define AVS_ENDIANNESS "little"
185 | #endif
186 |
187 | #endif //AVS_CONFIG_H
188 |
--------------------------------------------------------------------------------
/HDRTools/MatrixClass.h:
--------------------------------------------------------------------------------
1 | /*
2 | * MatrixClass
3 | *
4 | * Matrix and vector class allowing several operations.
5 | * Copyright (C) 2017 JPSDR
6 | *
7 | * MatrixClass is free software; you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation; either version 2, or (at your option)
10 | * any later version.
11 | *
12 | * MatrixClass is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with GNU Make; see the file COPYING. If not, write to
19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 | *
21 | */
22 |
23 | #ifndef _MATRIX_CLASS_H
24 | #define _MATRIX_CLASS_H
25 |
26 | #include
27 | #include
28 | #include
29 |
30 | typedef enum MTRXCL_COEFF_DATA_TYPE_ {
31 | MTRXCL_DATA_NONE,MTRXCL_DATA_DOUBLE,MTRXCL_DATA_FLOAT,MTRXCL_DATA_UINT64,MTRXCL_DATA_INT64,
32 | MTRXCL_DATA_UINT32,MTRXCL_DATA_INT32,MTRXCL_DATA_UINT16,MTRXCL_DATA_INT16,
33 | MTRXCL_DATA_UINT8,MTRXCL_DATA_INT8} MTRXCL_COEFF_DATA_TYPE;
34 |
35 |
36 | void SetCPUMatrixClass(const bool SSE2,const bool AVX,const bool AVX2,const bool AVX512);
37 |
38 |
39 | class Vector
40 | {
41 | public :
42 | Vector(void);
43 | Vector(const uint16_t l,const MTRXCL_COEFF_DATA_TYPE data);
44 | Vector(const Vector &x);
45 | virtual ~Vector(void);
46 |
47 | inline bool AllocCheck(void) const {return(Coeff!=nullptr);}
48 | bool Create(void);
49 | bool Create(const uint16_t l,const MTRXCL_COEFF_DATA_TYPE data);
50 | bool Create(const Vector &x);
51 | bool CopyStrict(const Vector &x);
52 | bool CopyRaw(const void *ptr);
53 | bool CopyRaw(const void *ptr,const uint16_t lgth);
54 | bool ExportRaw(void *ptr);
55 | bool ExportRaw(void *ptr,const uint16_t lgth);
56 | void Destroy(void);
57 | bool FillD(const double data);
58 | bool FillF(const float data);
59 | bool FillZero(void);
60 | inline MTRXCL_COEFF_DATA_TYPE GetDataType(void) const {return(data_type);}
61 | bool SetInfo(const uint16_t l,const MTRXCL_COEFF_DATA_TYPE data);
62 | void GetInfo(uint16_t &l, MTRXCL_COEFF_DATA_TYPE &data) const;
63 | inline uint16_t GetLength(void) const {return(length);}
64 | inline void* GetPtrVector(void) const {return(Coeff);}
65 | inline size_t GetDataSize(void) const {return(size);}
66 | inline double GetD(const uint16_t i) const {return(((double *)Coeff)[i]);}
67 | inline float GetF(const uint16_t i) const {return(((float *)Coeff)[i]);}
68 | inline void SetD(const uint16_t i,const double d) {((double *)Coeff)[i]=d;}
69 | inline void SetF(const uint16_t i,const float d) {((float *)Coeff)[i]=d;}
70 | bool GetSafeD(const uint16_t i,double &d) const ;
71 | bool SetSafeD(const uint16_t i,const double d);
72 | bool GetSafeF(const uint16_t i,float &d) const ;
73 | bool SetSafeF(const uint16_t i,const float d);
74 |
75 | protected :
76 | void *Coeff;
77 | uint16_t length;
78 | size_t size;
79 | MTRXCL_COEFF_DATA_TYPE data_type;
80 |
81 | private :
82 | Vector& operator = (const Vector &other);
83 | bool operator == (const Vector &other) const;
84 | bool operator != (const Vector &other) const;
85 | };
86 |
87 | class Matrix;
88 |
89 | class Vector_Compute : public Vector
90 | {
91 | protected :
92 | bool SSE2_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
93 |
94 | public :
95 | Vector_Compute(void);
96 | Vector_Compute(const uint16_t l,const MTRXCL_COEFF_DATA_TYPE data);
97 | Vector_Compute(const Vector_Compute &x);
98 | virtual ~Vector_Compute(void);
99 |
100 | inline void SetSSE2(const bool val) {SSE2_Enable=val;}
101 | inline void SetAVX(const bool val) {AVX_Enable=val; if (val) {SSE2_Enable=true;}}
102 | inline void SetAVX2(const bool val) {AVX2_Enable=val; if (val) {AVX_Enable=true; SSE2_Enable=true;}}
103 | inline void SetAVX512(const bool val) {AVX512_Enable=val; if (val) {AVX2_Enable=true; AVX_Enable=true; SSE2_Enable=true;}}
104 |
105 | bool Mult(const double coef,const Vector &x);
106 | bool Mult(const double coef);
107 | bool Add(const double coef,const Vector &x);
108 | bool Add(const double coef);
109 | bool Sub(const double coef,const Vector &x);
110 | bool Sub(const double coef);
111 | bool Add_X(const Vector &x,const Vector &y);
112 | bool Add_X(const Vector &x);
113 | bool Sub_X(const Vector &x,const Vector &y);
114 | bool Sub_X(const Vector &x);
115 | bool InvSub_X(const Vector &x);
116 | bool Mult_X(const Vector &x,const Vector &y);
117 | bool Mult_X(const Vector &x);
118 |
119 | bool Product_AX(const Matrix &ma,const Vector &x);
120 | bool Product_AX(const Matrix &ma);
121 | bool Product_tAX(const Matrix &ma,const Vector &x);
122 | bool Product_tAX(const Matrix &ma);
123 |
124 | bool Norme2(double &result);
125 | bool Distance2(const Vector &x,double &result);
126 | bool Norme1(double &result);
127 | bool Distance1(const Vector &x,double &result);
128 |
129 | protected :
130 | // Float
131 | void MultF(const double coef,const Vector &x);
132 | void MultF(const double coef);
133 | void AddF(const double coef,const Vector &x);
134 | void AddF(const double coef);
135 | void SubF(const double coef,const Vector &x);
136 | void SubF(const double coef);
137 | void AddF_X(const Vector &x,const Vector &y);
138 | void AddF_X(const Vector &x);
139 | void SubF_X(const Vector &x,const Vector &y);
140 | void SubF_X(const Vector &x);
141 | void InvSubF_X(const Vector &x);
142 | void MultF_X(const Vector &x,const Vector &y);
143 | void MultF_X(const Vector &x);
144 |
145 | void ProductF_AX(const Matrix &ma,const Vector &x);
146 | void ProductF_tAX(const Matrix &ma,const Vector &x);
147 |
148 | double Norme2F(void);
149 | double Distance2F(const Vector &x);
150 | double Norme1F(void);
151 | double Distance1F(const Vector &x);
152 |
153 | // Double
154 | void MultD(const double coef,const Vector &x);
155 | void MultD(const double coef);
156 | void AddD(const double coef,const Vector &x);
157 | void AddD(const double coef);
158 | void SubD(const double coef,const Vector &x);
159 | void SubD(const double coef);
160 | void AddD_X(const Vector &x,const Vector &y);
161 | void AddD_X(const Vector &x);
162 | void SubD_X(const Vector &x,const Vector &y);
163 | void SubD_X(const Vector &x);
164 | void InvSubD_X(const Vector &x);
165 | void MultD_X(const Vector &x,const Vector &y);
166 | void MultD_X(const Vector &x);
167 |
168 | void ProductD_AX(const Matrix &ma,const Vector &x);
169 | void ProductD_tAX(const Matrix &ma,const Vector &x);
170 |
171 | double Norme2D(void);
172 | double Distance2D(const Vector &x);
173 | double Norme1D(void);
174 | double Distance1D(const Vector &x);
175 |
176 | private :
177 | Vector_Compute& operator = (const Vector_Compute &other);
178 | bool operator == (const Vector_Compute &other) const;
179 | bool operator != (const Vector_Compute &other) const;
180 | };
181 |
182 |
183 | class Matrix
184 | {
185 | public :
186 | Matrix(void);
187 | Matrix(const uint16_t l,const uint16_t c,const MTRXCL_COEFF_DATA_TYPE data);
188 | Matrix(const Matrix &m);
189 | virtual ~Matrix(void);
190 |
191 | inline bool AllocCheck(void) const {return(Coeff!=nullptr);}
192 | bool Create(void);
193 | bool Create(const uint16_t l,const uint16_t c,const MTRXCL_COEFF_DATA_TYPE data);
194 | bool Create(const Matrix &m);
195 | bool CopyStrict(const Matrix &m);
196 | bool CopyRaw(const void *ptr);
197 | bool CopyRaw(const void *ptr,const ptrdiff_t ptr_pitch);
198 | bool CopyRaw(const void *ptr,const ptrdiff_t ptr_pitch,const uint16_t ln,const uint16_t co);
199 | bool ExportRaw(void *ptr);
200 | bool ExportRaw(void *ptr,const ptrdiff_t ptr_pitch);
201 | bool ExportRaw(void *ptr,const ptrdiff_t ptr_pitch,const uint16_t ln,const uint16_t co);
202 | void Destroy(void);
203 | bool FillD(const double data);
204 | bool FillF(const float data);
205 | bool FillZero(void);
206 | inline MTRXCL_COEFF_DATA_TYPE GetDataType(void) const {return(data_type);}
207 | bool SetInfo(const uint16_t l,const uint16_t c,const MTRXCL_COEFF_DATA_TYPE data);
208 | void GetInfo(uint16_t &l,uint16_t &c, MTRXCL_COEFF_DATA_TYPE &data) const;
209 | inline uint16_t GetLines(void) const {return(lines);}
210 | inline uint16_t GetColumns(void) const {return(columns);}
211 | inline void* GetPtrMatrix(void) const {return(Coeff);}
212 | inline void* GetPtrMatrixLine(const uint16_t i) const {return((void *)((uint8_t *)Coeff+i*pitch));}
213 | inline ptrdiff_t GetPitch(void) const {return(pitch);}
214 | inline size_t GetDataSize(void) const {return(size);}
215 | inline double GetD(const uint16_t i,const uint16_t j) const {return(((double *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]);}
216 | inline float GetF(const uint16_t i,const uint16_t j) const {return(((float *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]);}
217 | inline void SetD(const uint16_t i,const uint16_t j,const double d) {((double *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]=d;}
218 | inline void SetF(const uint16_t i,const uint16_t j,const float d) {((float *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]=d;}
219 | bool GetSafeD(const uint16_t i,const uint16_t j,double &d) const ;
220 | bool SetSafeD(const uint16_t i,const uint16_t j,const double d);
221 | bool GetSafeF(const uint16_t i,const uint16_t j,float &d) const ;
222 | bool SetSafeF(const uint16_t i,const uint16_t j,const float d);
223 |
224 | protected :
225 | void *Coeff;
226 | uint16_t columns,lines;
227 | size_t size;
228 | ptrdiff_t pitch;
229 | MTRXCL_COEFF_DATA_TYPE data_type;
230 |
231 | Matrix& operator=(const Matrix&){return(*this);}
232 |
233 | private :
234 | bool operator == (const Matrix &other) const;
235 | bool operator != (const Matrix &other) const;
236 | };
237 |
238 |
239 | class Matrix_Compute : public Matrix
240 | {
241 | protected :
242 | double zero_value;
243 | bool SSE2_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
244 |
245 | public :
246 | Matrix_Compute(void);
247 | Matrix_Compute(const uint16_t l,const uint16_t c,const MTRXCL_COEFF_DATA_TYPE data);
248 | Matrix_Compute(const Matrix_Compute &m);
249 | virtual ~Matrix_Compute(void);
250 |
251 | inline void SetSSE2(const bool val) {SSE2_Enable=val;}
252 | inline void SetAVX(const bool val) {AVX_Enable=val; if (val) {SSE2_Enable=true;} }
253 | inline void SetAVX2(const bool val) {AVX2_Enable=val; if (val) {AVX_Enable=true; SSE2_Enable=true;}}
254 | inline void SetAVX512(const bool val) {AVX512_Enable=val; if (val) {AVX2_Enable=true; AVX_Enable=true; SSE2_Enable=true;}}
255 |
256 | bool CreateTranspose(const Matrix &m);
257 | bool CopyStrict(const Matrix_Compute &m);
258 | inline void SetZeroValue(const double z) {zero_value=fabs(z);}
259 | inline double GetZeroValue(void) const {return(zero_value);}
260 |
261 | bool Transpose(void);
262 | bool Transpose(const Matrix &ma);
263 |
264 | bool Mult(const double coef,const Matrix &ma);
265 | bool Mult(const double coef);
266 | bool Add(const double coef,const Matrix &ma);
267 | bool Add(const double coef);
268 | bool Sub(const double coef,const Matrix &ma);
269 | bool Sub(const double coef);
270 | bool Add_A(const Matrix &ma,const Matrix &mb);
271 | bool Add_A(const Matrix &ma);
272 | bool Sub_A(const Matrix &ma,const Matrix &mb);
273 | bool Sub_A(const Matrix &ma);
274 | bool InvSub_A(const Matrix &ma);
275 | bool Mult_A(const Matrix &ma,const Matrix &mb);
276 | bool Mult_A(const Matrix &ma);
277 |
278 | bool Product_AB(const Matrix &ma,const Matrix &mb);
279 | bool Product_AtB(const Matrix &ma,const Matrix &mb);
280 | bool Product_tAA(const Matrix &ma);
281 | bool Product_tAA(void);
282 |
283 | bool Inverse(const Matrix &ma);
284 | bool Inverse(void);
285 | int8_t InverseSafe(const Matrix_Compute &ma);
286 | int8_t InverseSafe(void);
287 |
288 | bool Norme2(double &result);
289 | bool Distance2(const Matrix &ma,double &result);
290 | bool Norme1(double &result);
291 | bool Distance1(const Matrix &ma,double &result);
292 |
293 | protected :
294 | // Float
295 | void TransposeF(const Matrix &ma);
296 |
297 | void MultF(const double coef,const Matrix &ma);
298 | void MultF(const double coef);
299 | void AddF(const double coef,const Matrix &ma);
300 | void AddF(const double coef);
301 | void SubF(const double coef,const Matrix &ma);
302 | void SubF(const double coef);
303 | void AddF_A(const Matrix &ma,const Matrix &mb);
304 | void AddF_A(const Matrix &ma);
305 | void SubF_A(const Matrix &ma,const Matrix &mb);
306 | void SubF_A(const Matrix &ma);
307 | void InvSubF_A(const Matrix &ma);
308 | void MultF_A(const Matrix &ma,const Matrix &mb);
309 | void MultF_A(const Matrix &ma);
310 |
311 | void ProductF_AB(const Matrix &ma,const Matrix &mb);
312 | void ProductF_AtB(const Matrix &ma,const Matrix &mb);
313 |
314 | bool InverseF(const Matrix &ma);
315 | int8_t InverseSafeF(const Matrix_Compute &ma);
316 |
317 | double Norme2F(void);
318 | double Distance2F(const Matrix &ma);
319 | double Norme1F(void);
320 | double Distance1F(const Matrix &ma);
321 |
322 | // Double
323 | void MultD(const double coef,const Matrix &ma);
324 | void MultD(const double coef);
325 | void AddD(const double coef,const Matrix &ma);
326 | void AddD(const double coef);
327 | void SubD(const double coef,const Matrix &ma);
328 | void SubD(const double coef);
329 | void AddD_A(const Matrix &ma,const Matrix &mb);
330 | void AddD_A(const Matrix &ma);
331 | void SubD_A(const Matrix &ma,const Matrix &mb);
332 | void SubD_A(const Matrix &ma);
333 | void InvSubD_A(const Matrix &ma);
334 | void MultD_A(const Matrix &ma,const Matrix &mb);
335 | void MultD_A(const Matrix &ma);
336 |
337 | void TransposeD(const Matrix &ma);
338 |
339 | void ProductD_AB(const Matrix &ma,const Matrix &mb);
340 | void ProductD_AtB(const Matrix &ma,const Matrix &mb);
341 |
342 | bool InverseD(const Matrix &ma);
343 | int8_t InverseSafeD(const Matrix_Compute &ma);
344 |
345 | double Norme2D(void);
346 | double Distance2D(const Matrix &ma);
347 | double Norme1D(void);
348 | double Distance1D(const Matrix &ma);
349 |
350 | // U64
351 | void TransposeU64(const Matrix &ma);
352 |
353 | // I64
354 | void TransposeI64(const Matrix &ma);
355 |
356 | // U32
357 | void TransposeU32(const Matrix &ma);
358 |
359 | // I32
360 | void TransposeI32(const Matrix &ma);
361 |
362 | // U16
363 | void TransposeU16(const Matrix &ma);
364 |
365 | // I16
366 | void TransposeI16(const Matrix &ma);
367 |
368 | // U8
369 | void TransposeU8(const Matrix &ma);
370 |
371 | // I8
372 | void TransposeI8(const Matrix &ma);
373 |
374 | Matrix_Compute& operator=(const Matrix_Compute&){return(*this);}
375 |
376 | private :
377 | bool operator == (const Matrix_Compute &other) const;
378 | bool operator != (const Matrix_Compute &other) const;
379 | };
380 |
381 | #endif
382 |
--------------------------------------------------------------------------------
/HDRTools/HDRTools.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | Win32
7 |
8 |
9 | Debug
10 | x64
11 |
12 |
13 | Release
14 | Win32
15 |
16 |
17 | Release
18 | x64
19 |
20 |
21 |
22 | {1820278E-F1C3-48E8-A951-EE5E95079370}
23 | Win32Proj
24 | HDRTools
25 |
26 |
27 |
28 | DynamicLibrary
29 | true
30 | MultiByte
31 |
32 |
33 | DynamicLibrary
34 | true
35 | MultiByte
36 |
37 |
38 | DynamicLibrary
39 | false
40 | true
41 | MultiByte
42 |
43 |
44 | DynamicLibrary
45 | false
46 | true
47 | MultiByte
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 | true
68 |
69 |
70 | true
71 |
72 |
73 | false
74 |
75 |
76 | false
77 |
78 |
79 |
80 | NotUsing
81 | Level3
82 | Disabled
83 | WIN32;_DEBUG;_WINDOWS;_USRDLL;HDRTOOLS_EXPORTS;%(PreprocessorDefinitions)
84 |
85 |
86 | Windows
87 | true
88 |
89 |
90 |
91 |
92 | NotUsing
93 | Level3
94 | Disabled
95 | WIN32;_DEBUG;_WINDOWS;_USRDLL;HDRTOOLS_EXPORTS;%(PreprocessorDefinitions)
96 |
97 |
98 | Windows
99 | true
100 |
101 |
102 |
103 |
104 | Level3
105 | NotUsing
106 | MaxSpeed
107 | true
108 | true
109 | WIN32;NDEBUG;_WINDOWS;_USRDLL;HDRTOOLS_EXPORTS;%(PreprocessorDefinitions)
110 | AnySuitable
111 | Speed
112 | true
113 | true
114 | true
115 | false
116 | true
117 | true
118 |
119 |
120 | Windows
121 | false
122 | true
123 | true
124 |
125 |
126 |
127 |
128 | Level3
129 | NotUsing
130 | MaxSpeed
131 | true
132 | true
133 | WIN32;NDEBUG;_WINDOWS;_USRDLL;HDRTOOLS_EXPORTS;%(PreprocessorDefinitions)
134 | AnySuitable
135 | Speed
136 | true
137 | true
138 | true
139 | false
140 | true
141 | true
142 |
143 |
144 | Windows
145 | false
146 | true
147 | true
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 | false
172 | false
173 | true
174 | true
175 |
176 |
177 | true
178 | true
179 | false
180 | false
181 |
182 |
183 | false
184 | false
185 | true
186 | true
187 | true
188 | true
189 |
190 |
191 | true
192 | true
193 | false
194 | false
195 | true
196 | true
197 |
198 |
199 | true
200 | false
201 | true
202 | false
203 | true
204 | true
205 | true
206 | true
207 |
208 |
209 | true
210 | false
211 | true
212 | false
213 | true
214 | true
215 | true
216 | true
217 |
218 |
219 | true
220 | false
221 | true
222 | false
223 | true
224 | true
225 | true
226 | true
227 |
228 |
229 | true
230 | false
231 | true
232 | false
233 | true
234 | true
235 | true
236 | true
237 |
238 |
239 | false
240 | false
241 | true
242 | true
243 |
244 |
245 | false
246 | false
247 | true
248 | true
249 |
250 |
251 |
252 |
253 |
254 |
255 |
--------------------------------------------------------------------------------
/HDRTools/HDRTools_AVX2_asm.asm:
--------------------------------------------------------------------------------
1 | ;
2 | ; HDRTools()
3 | ;
4 | ; Several functions for working on HDR data, and linear to non-linear convertions.
5 | ; Copyright (C) 2018 JPSDR
6 | ;
7 | ; HDRTools is free software; you can redistribute it and/or modify
8 | ; it under the terms of the GNU General Public License as published by
9 | ; the Free Software Foundation; either version 2, or (at your option)
10 | ; any later version.
11 | ;
12 | ; HDRTools is distributed in the hope that it will be useful,
13 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | ; GNU General Public License for more details.
16 | ;
17 | ; You should have received a copy of the GNU General Public License
18 | ; along with GNU Make; see the file COPYING. If not, write to
19 | ; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 | ;
21 | ;
22 |
23 | .xmm
24 | .model flat,c
25 |
26 | .data
27 |
28 | align 16
29 |
30 | data segment align(32)
31 |
32 | data_f_0 real4 8 dup(0.0)
33 | data_f_1 real4 8 dup(1.0)
34 |
35 | data_f_1048575 real4 8 dup(1048575.0)
36 | data_f_65535 real4 8 dup(65535.0)
37 | data_dw_1048575 dword 8 dup(1048575)
38 | data_dw_65535 dword 8 dup(65535)
39 | data_dw_0 dword 8 dup(0)
40 |
41 | data_w_128 word 16 dup(128)
42 | data_w_32 word 16 dup(32)
43 | data_w_8 word 16 dup(8)
44 |
45 | .code
46 |
47 |
48 | ;***************************************************
49 | ;** YUV to RGB functions **
50 | ;***************************************************
51 |
52 |
53 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword
54 |
55 | public JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2
56 |
57 | push esi
58 | push edi
59 | push ebx
60 |
61 | vpcmpeqb ymm3,ymm3,ymm3
62 |
63 | mov edi,dst
64 | mov esi,src1
65 | mov edx,src2
66 | xor eax,eax
67 | mov ecx,w
68 | mov ebx,32
69 |
70 | Convert_Planar420_to_Planar422_8_AVX2_1:
71 | vmovdqa ymm0,YMMWORD ptr[esi+eax]
72 | vmovdqa ymm1,YMMWORD ptr[edx+eax]
73 | vpxor ymm2,ymm0,ymm3
74 | vpxor ymm1,ymm1,ymm3
75 | vpavgb ymm2,ymm2,ymm1
76 | vpxor ymm2,ymm2,ymm3
77 | vpavgb ymm2,ymm2,ymm0
78 |
79 | vmovdqa YMMWORD ptr[edi+eax],ymm2
80 | add eax,ebx
81 | loop Convert_Planar420_to_Planar422_8_AVX2_1
82 |
83 | vzeroupper
84 |
85 | pop ebx
86 | pop edi
87 | pop esi
88 |
89 | ret
90 |
91 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 endp
92 |
93 |
94 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword
95 |
96 | public JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2
97 |
98 | push esi
99 | push edi
100 | push ebx
101 |
102 | vpcmpeqb ymm3,ymm3,ymm3
103 |
104 | mov edi,dst
105 | mov esi,src1
106 | mov edx,src2
107 | xor eax,eax
108 | mov ecx,w
109 | mov ebx,32
110 |
111 | Convert_Planar420_to_Planar422_16_AVX2_1:
112 | vmovdqa ymm0,YMMWORD ptr[esi+eax]
113 | vmovdqa ymm1,YMMWORD ptr[edx+eax]
114 | vpxor ymm2,ymm0,ymm3
115 | vpxor ymm1,ymm1,ymm3
116 | vpavgw ymm2,ymm2,ymm1
117 | vpxor ymm2,ymm2,ymm3
118 | vpavgw ymm2,ymm2,ymm0
119 |
120 | vmovdqa YMMWORD ptr[edi+eax],ymm2
121 | add eax,ebx
122 | loop Convert_Planar420_to_Planar422_16_AVX2_1
123 |
124 | vzeroupper
125 |
126 | pop ebx
127 | pop edi
128 | pop esi
129 |
130 | ret
131 |
132 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 endp
133 |
134 |
135 | ;***************************************************
136 | ;** RGB to YUV functions **
137 | ;***************************************************
138 |
139 |
140 | JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2 proc src_R:dword,src_G:dword,src_B:dword,dst:dword,w:dword,h:dword,lookup:dword,
141 | src_modulo_R:dword,src_modulo_G:dword,src_modulo_B:dword,dst_modulo:dword
142 |
143 | public JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2
144 |
145 | push esi
146 | push edi
147 | push ebx
148 |
149 | vmovaps ymm3,YMMWORD ptr data_f_1048575
150 | vmovaps ymm4,YMMWORD ptr data_f_0
151 | vmovaps ymm5,YMMWORD ptr data_f_1
152 |
153 | cld
154 | mov edi,dst
155 | mov ebx,lookup
156 |
157 | Convert_LinearRGBPStoRGB64_AVX2_1:
158 | mov ecx,w
159 | Convert_LinearRGBPStoRGB64_AVX2_2:
160 | mov esi,src_B
161 | xor edx,edx
162 | vmaxps ymm0,ymm4,YMMWORD ptr[esi]
163 | mov esi,src_G
164 | vminps ymm0,ymm0,ymm5
165 | vmaxps ymm1,ymm4,YMMWORD ptr[esi]
166 | mov esi,src_R
167 | vminps ymm1,ymm1,ymm5
168 | vmaxps ymm2,ymm4,YMMWORD ptr[esi]
169 | vmulps ymm0,ymm0,ymm3
170 | vminps ymm2,ymm2,ymm5
171 |
172 | vmulps ymm1,ymm1,ymm3
173 | vmulps ymm2,ymm2,ymm3
174 | vcvtps2dq ymm0,ymm0
175 | vcvtps2dq ymm1,ymm1
176 | vcvtps2dq ymm2,ymm2
177 |
178 | vpextrd eax,xmm0,0
179 | mov ax,word ptr[ebx+2*eax]
180 | stosw
181 | vpextrd eax,xmm1,0
182 | mov ax,word ptr[ebx+2*eax]
183 | stosw
184 | vpextrd eax,xmm2,0
185 | mov ax,word ptr[ebx+2*eax]
186 | stosw
187 | xor eax,eax
188 | stosw
189 | dec ecx
190 | jz Convert_LinearRGBPStoRGB64_AVX2_3
191 | inc edx
192 |
193 | vpextrd eax,xmm0,1
194 | mov ax,word ptr[ebx+2*eax]
195 | stosw
196 | vpextrd eax,xmm1,1
197 | mov ax,word ptr[ebx+2*eax]
198 | stosw
199 | vpextrd eax,xmm2,1
200 | mov ax,word ptr[ebx+2*eax]
201 | stosw
202 | xor eax,eax
203 | stosw
204 | dec ecx
205 | jz Convert_LinearRGBPStoRGB64_AVX2_3
206 | inc edx
207 |
208 | vpextrd eax,xmm0,2
209 | mov ax,word ptr[ebx+2*eax]
210 | stosw
211 | vpextrd eax,xmm1,2
212 | mov ax,word ptr[ebx+2*eax]
213 | stosw
214 | vpextrd eax,xmm2,2
215 | mov ax,word ptr[ebx+2*eax]
216 | stosw
217 | xor eax,eax
218 | stosw
219 | dec ecx
220 | jz Convert_LinearRGBPStoRGB64_AVX2_3
221 | inc edx
222 |
223 | vpextrd eax,xmm0,3
224 | mov ax,word ptr[ebx+2*eax]
225 | stosw
226 | vpextrd eax,xmm1,3
227 | mov ax,word ptr[ebx+2*eax]
228 | stosw
229 | vpextrd eax,xmm2,3
230 | mov ax,word ptr[ebx+2*eax]
231 | stosw
232 | xor eax,eax
233 | stosw
234 | dec ecx
235 | jz Convert_LinearRGBPStoRGB64_AVX2_3
236 | inc edx
237 |
238 | vextracti128 xmm0,ymm0,1
239 | vextracti128 xmm1,ymm1,1
240 | vextracti128 xmm2,ymm2,1
241 |
242 | vpextrd eax,xmm0,0
243 | mov ax,word ptr[ebx+2*eax]
244 | stosw
245 | vpextrd eax,xmm1,0
246 | mov ax,word ptr[ebx+2*eax]
247 | stosw
248 | vpextrd eax,xmm2,0
249 | mov ax,word ptr[ebx+2*eax]
250 | stosw
251 | xor eax,eax
252 | stosw
253 | dec ecx
254 | jz Convert_LinearRGBPStoRGB64_AVX2_3
255 | inc edx
256 |
257 | vpextrd eax,xmm0,1
258 | mov ax,word ptr[ebx+2*eax]
259 | stosw
260 | vpextrd eax,xmm1,1
261 | mov ax,word ptr[ebx+2*eax]
262 | stosw
263 | vpextrd eax,xmm2,1
264 | mov ax,word ptr[ebx+2*eax]
265 | stosw
266 | xor eax,eax
267 | stosw
268 | dec ecx
269 | jz short Convert_LinearRGBPStoRGB64_AVX2_3
270 | inc edx
271 |
272 | vpextrd eax,xmm0,2
273 | mov ax,word ptr[ebx+2*eax]
274 | stosw
275 | vpextrd eax,xmm1,2
276 | mov ax,word ptr[ebx+2*eax]
277 | stosw
278 | vpextrd eax,xmm2,2
279 | mov ax,word ptr[ebx+2*eax]
280 | stosw
281 | xor eax,eax
282 | stosw
283 | dec ecx
284 | jz short Convert_LinearRGBPStoRGB64_AVX2_3
285 | inc edx
286 |
287 | vpextrd eax,xmm0,3
288 | mov ax,word ptr[ebx+2*eax]
289 | stosw
290 | vpextrd eax,xmm1,3
291 | mov ax,word ptr[ebx+2*eax]
292 | stosw
293 | vpextrd eax,xmm2,3
294 | mov ax,word ptr[ebx+2*eax]
295 | stosw
296 | xor eax,eax
297 | stosw
298 | dec ecx
299 |
300 | Convert_LinearRGBPStoRGB64_AVX2_3:
301 | inc edx
302 | shl edx,2
303 | add src_B,edx
304 | add src_G,edx
305 | add src_R,edx
306 | or ecx,ecx
307 | jnz Convert_LinearRGBPStoRGB64_AVX2_2
308 |
309 | mov eax,dst_modulo
310 | mov edx,src_modulo_B
311 | add edi,eax
312 | add src_B,edx
313 | mov eax,src_modulo_G
314 | mov edx,src_modulo_R
315 | add src_G,eax
316 | add src_R,edx
317 | dec h
318 | jnz Convert_LinearRGBPStoRGB64_AVX2_1
319 |
320 | vzeroupper
321 |
322 | pop ebx
323 | pop edi
324 | pop esi
325 |
326 | ret
327 |
328 | JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2 endp
329 |
330 |
331 | JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2 proc src_R:dword,src_G:dword,src_B:dword,dst:dword,w:dword,h:dword,
332 | src_pitch_R:dword,src_pitch_G:dword,src_pitch_B:dword,dst_pitch:dword
333 |
334 | public JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2
335 |
336 | push esi
337 | push edi
338 | push ebx
339 |
340 | vmovaps ymm3,YMMWORD ptr data_f_65535
341 | vpxor xmm4,xmm4,xmm4
342 |
343 | mov esi,src_B
344 | mov ebx,src_G
345 | mov edx,src_R
346 | mov edi,dst
347 |
348 | Convert_RGBPStoRGB64_AVX2_1:
349 | mov ecx,w
350 | xor eax,eax
351 | shr ecx,3
352 | jz Convert_RGBPStoRGB64_AVX2_3
353 |
354 | Convert_RGBPStoRGB64_AVX2_2:
355 | vmulps ymm0,ymm3,YMMWORD ptr[esi+4*eax]
356 | vmulps ymm1,ymm3,YMMWORD ptr[edx+4*eax]
357 | vmulps ymm2,ymm3,YMMWORD ptr[ebx+4*eax]
358 | vcvtps2dq ymm0,ymm0
359 | vcvtps2dq ymm1,ymm1
360 | vcvtps2dq ymm2,ymm2
361 |
362 | vextracti128 xmm5,ymm0,1
363 | vextracti128 xmm6,ymm1,1
364 | vextracti128 xmm7,ymm2,1
365 |
366 | packusdw xmm0,xmm0 ;0000B4B3B2B1
367 | packusdw xmm1,xmm1 ;0000R4R3R2R1
368 | packusdw xmm2,xmm2 ;0000G4G3G2G1
369 |
370 | vpunpcklwd xmm0,xmm0,xmm1 ;R4B4R3B3R2B2R1B1
371 | vpunpcklwd xmm2,xmm2,xmm4 ;0G40G30G20G1
372 | vpunpckhwd xmm1,xmm0,xmm2 ;0R4G4B40R3G3B3
373 | vpunpcklwd xmm0,xmm0,xmm2 ;0R2G2B20R1G1B1
374 |
375 | packusdw xmm5,xmm5 ;0000B8B7B6B5
376 | packusdw xmm6,xmm6 ;0000R8R7R6R5
377 | packusdw xmm7,xmm7 ;0000G8G7G6G5
378 |
379 | vpunpcklwd xmm5,xmm5,xmm6 ;R8B8R7B7R6B6R5B5
380 | vpunpcklwd xmm7,xmm7,xmm4 ;0G80G70G60G5
381 | vpunpckhwd xmm6,xmm5,xmm7 ;0R8G8B80R7G7B7
382 | vpunpcklwd xmm5,xmm5,xmm7 ;0R6G6B60R5G5B5
383 |
384 | vmovdqa XMMWORD ptr[edi+8*eax],xmm0
385 | vmovdqa XMMWORD ptr[edi+8*eax+16],xmm1
386 | vmovdqa XMMWORD ptr[edi+8*eax+32],xmm5
387 | vmovdqa XMMWORD ptr[edi+8*eax+48],xmm6
388 | add eax,8
389 | dec ecx
390 | jnz Convert_RGBPStoRGB64_AVX2_2
391 |
392 | Convert_RGBPStoRGB64_AVX2_3:
393 | mov ecx,w
394 | and ecx,7
395 | jz Convert_RGBPStoRGB64_AVX2_7
396 |
397 | vmulps ymm0,ymm3,YMMWORD ptr[esi+4*eax]
398 | vmulps ymm1,ymm3,YMMWORD ptr[edx+4*eax]
399 | vmulps ymm2,ymm3,YMMWORD ptr[ebx+4*eax]
400 | vcvtps2dq ymm0,ymm0
401 | vcvtps2dq ymm1,ymm1
402 | vcvtps2dq ymm2,ymm2
403 |
404 | vextracti128 xmm5,ymm0,1
405 | vextracti128 xmm6,ymm1,1
406 | vextracti128 xmm7,ymm2,1
407 |
408 | packusdw xmm0,xmm0 ;0000B4B3B2B1
409 | packusdw xmm1,xmm1 ;0000R4R3R2R1
410 | packusdw xmm2,xmm2 ;0000G4G3G2G1
411 |
412 | vpunpcklwd xmm0,xmm0,xmm1 ;R4B4R3B3R2B2R1B1
413 | vpunpcklwd xmm2,xmm2,xmm4 ;0G40G30G20G1
414 | vpunpckhwd xmm1,xmm0,xmm2 ;0R4G4B40R3G3B3
415 | vpunpcklwd xmm0,xmm0,xmm2 ;0R2G2B20R1G1B1
416 |
417 | packusdw xmm5,xmm5 ;0000B8B7B6B5
418 | packusdw xmm6,xmm6 ;0000R8R7R6R5
419 | packusdw xmm7,xmm7 ;0000G8G7G6G5
420 |
421 | vpunpcklwd xmm5,xmm5,xmm6 ;R8B8R7B7R6B6R5B5
422 | vpunpcklwd xmm7,xmm7,xmm4 ;0G80G70G60G5
423 | vpunpckhwd xmm6,xmm5,xmm7 ;0R8G8B80R7G7B7
424 | vpunpcklwd xmm5,xmm5,xmm7 ;0R6G6B60R5G5B5
425 |
426 | test ecx,4
427 | jnz short Convert_RGBPStoRGB64_AVX2_5
428 | test ecx,2
429 | jnz short Convert_RGBPStoRGB64_AVX2_4
430 | vmovq qword ptr[edi+8*eax],xmm0
431 | jmp short Convert_RGBPStoRGB64_AVX2_7
432 |
433 | Convert_RGBPStoRGB64_AVX2_4:
434 | vmovdqa XMMWORD ptr[edi+8*eax],xmm0
435 | test ecx,1
436 | jz short Convert_RGBPStoRGB64_AVX2_7
437 | vmovq qword ptr[edi+8*eax+16],xmm1
438 | jmp short Convert_RGBPStoRGB64_AVX2_7
439 |
440 | Convert_RGBPStoRGB64_AVX2_5:
441 | vmovdqa XMMWORD ptr[edi+8*eax],xmm0
442 | vmovdqa XMMWORD ptr[edi+8*eax+16],xmm1
443 | test ecx,2
444 | jnz short Convert_RGBPStoRGB64_AVX2_6
445 | test ecx,1
446 | jz short Convert_RGBPStoRGB64_AVX2_7
447 | vmovq qword ptr[edi+8*eax+32],xmm5
448 | jmp short Convert_RGBPStoRGB64_AVX2_7
449 |
450 | Convert_RGBPStoRGB64_AVX2_6:
451 | vmovdqa XMMWORD ptr[edi+8*eax+32],xmm5
452 | test ecx,1
453 | jz short Convert_RGBPStoRGB64_AVX2_7
454 | vmovq qword ptr[edi+8*eax+48],xmm6
455 |
456 | Convert_RGBPStoRGB64_AVX2_7:
457 | add esi,src_pitch_B
458 | add ebx,src_pitch_G
459 | add edx,src_pitch_R
460 | add edi,dst_pitch
461 | dec h
462 | jnz Convert_RGBPStoRGB64_AVX2_1
463 |
464 | vzeroupper
465 |
466 | pop ebx
467 | pop edi
468 | pop esi
469 |
470 | ret
471 |
472 | JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2 endp
473 |
474 |
475 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword,h:dword,src_pitch2:dword,dst_pitch:dword
476 |
477 | public JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2
478 |
479 | push esi
480 | push edi
481 | push ebx
482 |
483 | mov edi,dst
484 | mov esi,src1
485 | mov edx,src2
486 | mov ebx,32
487 |
488 | Convert_Planar422_to_Planar420_8_AVX2_1:
489 | xor eax,eax
490 | mov ecx,w32
491 |
492 | Convert_Planar422_to_Planar420_8_AVX2_2:
493 | vmovdqa ymm0,YMMWORD ptr[esi+eax]
494 | vpavgb ymm0,ymm0,YMMWORD ptr[edx+eax]
495 |
496 | vmovdqa YMMWORD ptr[edi+eax],ymm0
497 | add eax,ebx
498 | loop Convert_Planar422_to_Planar420_8_AVX2_2
499 |
500 | add esi,src_pitch2
501 | add edx,src_pitch2
502 | add edi,dst_pitch
503 | dec h
504 | jnz short Convert_Planar422_to_Planar420_8_AVX2_1
505 |
506 | vzeroupper
507 |
508 | pop ebx
509 | pop edi
510 | pop esi
511 |
512 | ret
513 |
514 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 endp
515 |
516 |
517 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword,h:dword,src_pitch2:dword,dst_pitch:dword
518 |
519 | public JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2
520 |
521 | push esi
522 | push edi
523 | push ebx
524 |
525 | mov edi,dst
526 | mov esi,src1
527 | mov edx,src2
528 | mov ebx,32
529 |
530 | Convert_Planar422_to_Planar420_16_AVX2_1:
531 | xor eax,eax
532 | mov ecx,w16
533 |
534 | Convert_Planar422_to_Planar420_16_AVX2_2:
535 | vmovdqa ymm0,YMMWORD ptr[esi+eax]
536 | vpavgw ymm0,ymm0,YMMWORD ptr[edx+eax]
537 |
538 | vmovdqa YMMWORD ptr[edi+eax],ymm0
539 | add eax,ebx
540 | loop Convert_Planar422_to_Planar420_16_AVX2_2
541 |
542 | add esi,src_pitch2
543 | add edx,src_pitch2
544 | add edi,dst_pitch
545 | dec h
546 | jnz short Convert_Planar422_to_Planar420_16_AVX2_1
547 |
548 | vzeroupper
549 |
550 | pop ebx
551 | pop edi
552 | pop esi
553 |
554 | ret
555 |
556 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 endp
557 |
558 |
559 | ;***************************************************
560 | ;** XYZ/RGB functions **
561 | ;***************************************************
562 |
563 |
564 | ;***************************************************
565 | ;** HLG functions **
566 | ;***************************************************
567 |
568 |
569 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
570 | src_pitch:dword,dst_pitch:dword
571 |
572 | public JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2
573 |
574 | push esi
575 | push edi
576 | push ebx
577 |
578 | vmovdqa ymm4,YMMWORD ptr data_w_128
579 |
580 | mov esi,src
581 | mov edi,dst
582 | mov ebx,w
583 | shr ebx,2
584 | mov edx,128
585 |
586 | Convert_RGB64_16toRGB64_8_AVX2_loop_1:
587 | mov ecx,ebx
588 | xor eax,eax
589 |
590 | shr ecx,2
591 | jz short Convert_RGB64_16toRGB64_8_AVX2_3
592 |
593 | Convert_RGB64_16toRGB64_8_AVX2_loop_2:
594 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax]
595 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32]
596 | vpaddusw ymm2,ymm4,YMMWORD ptr[esi+eax+64]
597 | vpaddusw ymm3,ymm4,YMMWORD ptr[esi+eax+96]
598 | vpsrlw ymm0,ymm0,8
599 | vpsrlw ymm1,ymm1,8
600 | vpsrlw ymm2,ymm2,8
601 | vpsrlw ymm3,ymm3,8
602 | vmovdqa YMMWORD ptr[edi+eax],ymm0
603 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1
604 | vmovdqa YMMWORD ptr[edi+eax+64],ymm2
605 | vmovdqa YMMWORD ptr[edi+eax+96],ymm3
606 | add eax,edx
607 | loop Convert_RGB64_16toRGB64_8_AVX2_loop_2
608 |
609 | Convert_RGB64_16toRGB64_8_AVX2_3:
610 | test ebx,2
611 | jz short Convert_RGB64_16toRGB64_8_AVX2_4
612 |
613 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax]
614 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32]
615 | vpsrlw ymm0,ymm0,8
616 | vpsrlw ymm1,ymm1,8
617 | vmovdqa YMMWORD ptr[edi+eax],ymm0
618 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1
619 | add eax,64
620 |
621 | Convert_RGB64_16toRGB64_8_AVX2_4:
622 | test ebx,1
623 | jz short Convert_RGB64_16toRGB64_8_AVX2_5
624 |
625 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax]
626 | vpsrlw ymm0,ymm0,8
627 | vmovdqa YMMWORD ptr[edi+eax],ymm0
628 | add eax,32
629 |
630 | Convert_RGB64_16toRGB64_8_AVX2_5:
631 | test w,2
632 | jz short Convert_RGB64_16toRGB64_8_AVX2_6
633 |
634 | vpaddusw xmm0,xmm4,XMMWORD ptr[esi+eax]
635 | vpsrlw xmm0,xmm0,8
636 | vmovdqa XMMWORD ptr[edi+eax],xmm0
637 |
638 | add eax,16
639 |
640 | Convert_RGB64_16toRGB64_8_AVX2_6:
641 | test w,1
642 | jz short Convert_RGB64_16toRGB64_8_AVX2_7
643 |
644 | vmovq xmm0,qword ptr[esi+eax]
645 | vpaddusw xmm0,xmm0,xmm4
646 | vpsrlw xmm0,xmm0,8
647 | vmovq qword ptr[edi+eax],xmm0
648 |
649 | Convert_RGB64_16toRGB64_8_AVX2_7:
650 | add esi,src_pitch
651 | add edi,dst_pitch
652 | dec h
653 | jnz Convert_RGB64_16toRGB64_8_AVX2_loop_1
654 |
655 | vzeroupper
656 |
657 | pop ebx
658 | pop edi
659 | pop esi
660 |
661 | ret
662 |
663 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 endp
664 |
665 |
666 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
667 | src_pitch:dword,dst_pitch:dword
668 |
669 | public JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2
670 |
671 | push esi
672 | push edi
673 | push ebx
674 |
675 | vmovdqa ymm4,YMMWORD ptr data_w_32
676 |
677 | mov esi,src
678 | mov edi,dst
679 | mov ebx,w
680 | shr ebx,2
681 | mov edx,128
682 |
683 | Convert_RGB64_16toRGB64_10_AVX2_loop_1:
684 | mov ecx,ebx
685 | xor eax,eax
686 |
687 | shr ecx,2
688 | jz short Convert_RGB64_16toRGB64_10_AVX2_3
689 |
690 | Convert_RGB64_16toRGB64_10_AVX2_loop_2:
691 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax]
692 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32]
693 | vpaddusw ymm2,ymm4,YMMWORD ptr[esi+eax+64]
694 | vpaddusw ymm3,ymm4,YMMWORD ptr[esi+eax+96]
695 | vpsrlw ymm0,ymm0,6
696 | vpsrlw ymm1,ymm1,6
697 | vpsrlw ymm2,ymm2,6
698 | vpsrlw ymm3,ymm3,6
699 | vmovdqa YMMWORD ptr[edi+eax],ymm0
700 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1
701 | vmovdqa YMMWORD ptr[edi+eax+64],ymm2
702 | vmovdqa YMMWORD ptr[edi+eax+96],ymm3
703 | add eax,edx
704 | loop Convert_RGB64_16toRGB64_10_AVX2_loop_2
705 |
706 | Convert_RGB64_16toRGB64_10_AVX2_3:
707 | test ebx,2
708 | jz short Convert_RGB64_16toRGB64_10_AVX2_4
709 |
710 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax]
711 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32]
712 | vpsrlw ymm0,ymm0,6
713 | vpsrlw ymm1,ymm1,6
714 | vmovdqa YMMWORD ptr[edi+eax],ymm0
715 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1
716 | add eax,64
717 |
718 | Convert_RGB64_16toRGB64_10_AVX2_4:
719 | test ebx,1
720 | jz short Convert_RGB64_16toRGB64_10_AVX2_5
721 |
722 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax]
723 | vpsrlw ymm0,ymm0,6
724 | vmovdqa YMMWORD ptr[edi+eax],ymm0
725 | add eax,32
726 |
727 | Convert_RGB64_16toRGB64_10_AVX2_5:
728 | test w,2
729 | jz short Convert_RGB64_16toRGB64_10_AVX2_6
730 |
731 | vpaddusw xmm0,xmm4,XMMWORD ptr[esi+eax]
732 | vpsrlw xmm0,xmm0,6
733 | vmovdqa XMMWORD ptr[edi+eax],xmm0
734 |
735 | add eax,16
736 |
737 | Convert_RGB64_16toRGB64_10_AVX2_6:
738 | test w,1
739 | jz short Convert_RGB64_16toRGB64_10_AVX2_7
740 |
741 | vmovq xmm0,qword ptr[esi+eax]
742 | vpaddusw xmm0,xmm0,xmm4
743 | vpsrlw xmm0,xmm0,6
744 | vmovq qword ptr[edi+eax],xmm0
745 |
746 | Convert_RGB64_16toRGB64_10_AVX2_7:
747 | add esi,src_pitch
748 | add edi,dst_pitch
749 | dec h
750 | jnz Convert_RGB64_16toRGB64_10_AVX2_loop_1
751 |
752 | vzeroupper
753 |
754 | pop ebx
755 | pop edi
756 | pop esi
757 |
758 | ret
759 |
760 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 endp
761 |
762 |
763 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
764 | src_pitch:dword,dst_pitch:dword
765 |
766 | public JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2
767 |
768 | push esi
769 | push edi
770 | push ebx
771 |
772 | vmovdqa ymm4,YMMWORD ptr data_w_8
773 |
774 | mov esi,src
775 | mov edi,dst
776 | mov ebx,w
777 | shr ebx,2
778 | mov edx,128
779 |
780 | Convert_RGB64_16toRGB64_12_AVX2_loop_1:
781 | mov ecx,ebx
782 | xor eax,eax
783 |
784 | shr ecx,2
785 | jz short Convert_RGB64_16toRGB64_12_AVX2_3
786 |
787 | Convert_RGB64_16toRGB64_12_AVX2_loop_2:
788 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax]
789 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32]
790 | vpaddusw ymm2,ymm4,YMMWORD ptr[esi+eax+64]
791 | vpaddusw ymm3,ymm4,YMMWORD ptr[esi+eax+96]
792 | vpsrlw ymm0,ymm0,4
793 | vpsrlw ymm1,ymm1,4
794 | vpsrlw ymm2,ymm2,4
795 | vpsrlw ymm3,ymm3,4
796 | vmovdqa YMMWORD ptr[edi+eax],ymm0
797 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1
798 | vmovdqa YMMWORD ptr[edi+eax+64],ymm2
799 | vmovdqa YMMWORD ptr[edi+eax+96],ymm3
800 | add eax,edx
801 | loop Convert_RGB64_16toRGB64_12_AVX2_loop_2
802 |
803 | Convert_RGB64_16toRGB64_12_AVX2_3:
804 | test ebx,2
805 | jz short Convert_RGB64_16toRGB64_12_AVX2_4
806 |
807 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax]
808 | vpaddusw ymm1,ymm4,YMMWORD ptr[esi+eax+32]
809 | vpsrlw ymm0,ymm0,4
810 | vpsrlw ymm1,ymm1,4
811 | vmovdqa YMMWORD ptr[edi+eax],ymm0
812 | vmovdqa YMMWORD ptr[edi+eax+32],ymm1
813 | add eax,64
814 |
815 | Convert_RGB64_16toRGB64_12_AVX2_4:
816 | test ebx,1
817 | jz short Convert_RGB64_16toRGB64_12_AVX2_5
818 |
819 | vpaddusw ymm0,ymm4,YMMWORD ptr[esi+eax]
820 | vpsrlw ymm0,ymm0,4
821 | vmovdqa YMMWORD ptr[edi+eax],ymm0
822 | add eax,32
823 |
824 | Convert_RGB64_16toRGB64_12_AVX2_5:
825 | test w,2
826 | jz short Convert_RGB64_16toRGB64_12_AVX2_6
827 |
828 | vpaddusw xmm0,xmm4,XMMWORD ptr[esi+eax]
829 | vpsrlw xmm0,xmm0,4
830 | vmovdqa XMMWORD ptr[edi+eax],xmm0
831 |
832 | add eax,16
833 |
834 | Convert_RGB64_16toRGB64_12_AVX2_6:
835 | test w,1
836 | jz short Convert_RGB64_16toRGB64_12_AVX2_7
837 |
838 | vmovq xmm0,qword ptr[esi+eax]
839 | vpaddusw xmm0,xmm0,xmm4
840 | vpsrlw xmm0,xmm0,4
841 | vmovq qword ptr[edi+eax],xmm0
842 |
843 | Convert_RGB64_16toRGB64_12_AVX2_7:
844 | add esi,src_pitch
845 | add edi,dst_pitch
846 | dec h
847 | jnz Convert_RGB64_16toRGB64_12_AVX2_loop_1
848 |
849 | vzeroupper
850 |
851 | pop ebx
852 | pop edi
853 | pop esi
854 |
855 | ret
856 |
857 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 endp
858 |
859 |
860 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc dst:dword,srcY:dword,w:dword,h:dword,dst_pitch:dword,src_pitchY:dword
861 |
862 | public JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2
863 |
864 | push esi
865 | push edi
866 | push ebx
867 |
868 | mov ebx,w
869 | shr ebx,1
870 | mov esi,srcY
871 | mov edi,dst
872 | mov edx,8
873 | vpxor xmm4,xmm4,xmm4
874 |
875 | Convert_16_RGB64_HLG_OOTF_AVX2_1:
876 | mov ecx,ebx
877 | xor eax,eax
878 | or ecx,ecx
879 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_3
880 |
881 | Convert_16_RGB64_HLG_OOTF_AVX2_2:
882 | vbroadcastss xmm0,dword ptr[esi+eax]
883 | vbroadcastss xmm1,dword ptr[esi+eax+4]
884 | vmovdqa xmm2,XMMWORD ptr[edi+2*eax]
885 | vinsertf128 ymm0,ymm0,xmm1,1
886 | vpunpckhwd xmm3,xmm2,xmm4
887 | vpunpcklwd xmm2,xmm2,xmm4
888 | vinserti128 ymm2,ymm2,xmm3,1
889 | vcvtdq2ps ymm2,ymm2
890 | vmulps ymm2,ymm2,ymm0
891 | vcvtps2dq ymm2,ymm2
892 | vextracti128 xmm3,ymm2,1
893 | vpackusdw xmm2,xmm2,xmm3
894 | vmovdqa XMMWORD ptr[edi+2*eax],xmm2
895 |
896 | add eax,edx
897 | loop Convert_16_RGB64_HLG_OOTF_AVX2_2
898 |
899 | Convert_16_RGB64_HLG_OOTF_AVX2_3:
900 | test w,1
901 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_4
902 |
903 | vbroadcastss xmm0,dword ptr[esi+eax]
904 | vmovq xmm2,qword ptr[edi+2*eax]
905 | vpunpcklwd xmm2,xmm2,xmm4
906 | vcvtdq2ps xmm2,xmm2
907 | vmulps xmm2,xmm2,xmm0
908 | vcvtps2dq xmm2,xmm2
909 | vpackusdw xmm2,xmm2,xmm2
910 | vmovq qword ptr[edi+2*eax],xmm2
911 |
912 | Convert_16_RGB64_HLG_OOTF_AVX2_4:
913 | add edi,dst_pitch
914 | add esi,src_pitchY
915 | dec h
916 | jnz Convert_16_RGB64_HLG_OOTF_AVX2_1
917 |
918 | vzeroupper
919 |
920 | pop ebx
921 | pop edi
922 | pop esi
923 |
924 | ret
925 |
926 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 endp
927 |
928 |
929 | ;***************************************************
930 | ;** XYZ/HDR/SDR functions **
931 | ;***************************************************
932 |
933 |
934 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword,
935 | ValMin:dword,Coeff:dword
936 |
937 | public JPSDR_HDRTools_Scale_20_XYZ_AVX2
938 |
939 | push esi
940 | push edi
941 | push ebx
942 |
943 | mov esi,ValMin
944 | vbroadcastss ymm1,dword ptr[esi]
945 | mov esi,Coeff
946 | vbroadcastss ymm2,dword ptr[esi]
947 |
948 | vmovdqa ymm3,YMMWORD ptr data_dw_1048575
949 | vmovdqa ymm4,YMMWORD ptr data_dw_0
950 | vmulps ymm2,ymm2,YMMWORD ptr data_f_1048575
951 |
952 | mov esi,src
953 | mov edi,dst
954 | mov ebx,w8
955 | mov edx,32
956 |
957 | Scale_20_XYZ_AVX2_1:
958 | xor eax,eax
959 | mov ecx,ebx
960 | Scale_20_XYZ_AVX2_2:
961 | vaddps ymm0,ymm1,YMMWORD ptr[esi+eax]
962 | vmulps ymm0,ymm0,ymm2
963 | vcvtps2dq ymm0,ymm0
964 | vpminsd ymm0,ymm0,ymm3
965 | vpmaxsd ymm0,ymm0,ymm4
966 | vmovdqa YMMWORD ptr[edi+eax],ymm0
967 |
968 | add eax,edx
969 | loop Scale_20_XYZ_AVX2_2
970 |
971 | add esi,src_pitch
972 | add edi,dst_pitch
973 | dec h
974 | jnz short Scale_20_XYZ_AVX2_1
975 |
976 | vzeroupper
977 |
978 | pop ebx
979 | pop edi
980 | pop esi
981 |
982 | ret
983 |
984 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 endp
985 |
986 |
987 | JPSDR_HDRTools_Scale_20_RGB_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword
988 |
989 | public JPSDR_HDRTools_Scale_20_RGB_AVX2
990 |
991 | push esi
992 | push edi
993 | push ebx
994 |
995 | vmovaps ymm1,YMMWORD ptr data_f_1048575
996 | vmovdqa ymm2,YMMWORD ptr data_dw_1048575
997 | vmovdqa ymm3,YMMWORD ptr data_dw_0
998 |
999 | mov esi,src
1000 | mov edi,dst
1001 | mov ebx,w8
1002 | mov edx,32
1003 |
1004 | Scale_20_RGB_AVX2_1:
1005 | xor eax,eax
1006 | mov ecx,ebx
1007 | Scale_20_RGB_AVX2_2:
1008 | vmulps ymm0,ymm1,YMMWORD ptr[esi+eax]
1009 | vcvtps2dq ymm0,ymm0
1010 | vpminsd ymm0,ymm0,ymm2
1011 | vpmaxsd ymm0,ymm0,ymm3
1012 | vmovdqa YMMWORD ptr[edi+eax],ymm0
1013 |
1014 | add eax,edx
1015 | loop Scale_20_RGB_AVX2_2
1016 |
1017 | add esi,src_pitch
1018 | add edi,dst_pitch
1019 | dec h
1020 | jnz short Scale_20_RGB_AVX2_1
1021 |
1022 | vzeroupper
1023 |
1024 | pop ebx
1025 | pop edi
1026 | pop esi
1027 |
1028 | ret
1029 |
1030 | JPSDR_HDRTools_Scale_20_RGB_AVX2 endp
1031 |
1032 |
1033 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc src:dword,dst1:dword,dst2:dword,w8:dword,h:dword,src_pitch:dword,
1034 | dst_pitch1:dword,dst_pitch2:dword,ValMinX:dword,CoeffX:dword,ValMinZ:dword,CoeffZ:dword
1035 |
1036 | public JPSDR_HDRTools_BT2446C_16_XYZ_AVX2
1037 |
1038 | push esi
1039 | push edi
1040 | push ebx
1041 |
1042 | mov esi,ValMinX
1043 | vbroadcastss ymm2,dword ptr[esi]
1044 | mov esi,CoeffX
1045 | vbroadcastss ymm3,dword ptr[esi]
1046 | mov esi,ValMinZ
1047 | vbroadcastss ymm4,dword ptr[esi]
1048 | mov esi,CoeffZ
1049 | vbroadcastss ymm5,dword ptr[esi]
1050 |
1051 | vmovdqa ymm6,YMMWORD ptr data_dw_65535
1052 | vmovdqa ymm7,YMMWORD ptr data_dw_0
1053 | vmulps ymm3,ymm3,YMMWORD ptr data_f_65535
1054 | vmulps ymm5,ymm5,YMMWORD ptr data_f_65535
1055 |
1056 | mov esi,src
1057 | mov edi,dst1
1058 | mov edx,dst2
1059 | mov ebx,32
1060 |
1061 | BT2446C_16_XYZ_AVX2_1:
1062 | xor eax,eax
1063 | mov ecx,w8
1064 | BT2446C_16_XYZ_AVX2_2:
1065 | vmovaps ymm0,YMMWORD ptr[edi+eax]
1066 | vmovaps ymm1,YMMWORD ptr[edx+eax]
1067 | vmulps ymm0,ymm0,YMMWORD ptr[esi+eax]
1068 | vmulps ymm1,ymm1,YMMWORD ptr[esi+eax]
1069 | vaddps ymm0,ymm0,ymm2
1070 | vaddps ymm1,ymm1,ymm4
1071 | vmulps ymm0,ymm0,ymm3
1072 | vmulps ymm1,ymm1,ymm5
1073 | vcvtps2dq ymm0,ymm0
1074 | vcvtps2dq ymm1,ymm1
1075 | vpminsd ymm0,ymm0,ymm6
1076 | vpminsd ymm1,ymm1,ymm6
1077 | vpmaxsd ymm0,ymm0,ymm7
1078 | vpmaxsd ymm1,ymm1,ymm7
1079 | vmovdqa YMMWORD ptr[edi+eax],ymm0
1080 | vmovdqa YMMWORD ptr[edx+eax],ymm1
1081 |
1082 | add eax,ebx
1083 | loop BT2446C_16_XYZ_AVX2_2
1084 |
1085 | add esi,src_pitch
1086 | add edi,dst_pitch1
1087 | add edx,dst_pitch2
1088 | dec h
1089 | jnz short BT2446C_16_XYZ_AVX2_1
1090 |
1091 | vzeroupper
1092 |
1093 | pop ebx
1094 | pop edi
1095 | pop esi
1096 |
1097 | ret
1098 |
1099 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 endp
1100 |
1101 |
1102 | end
1103 |
1104 |
1105 |
1106 |
1107 |
1108 |
--------------------------------------------------------------------------------
/HDRTools/HDRTools.h:
--------------------------------------------------------------------------------
1 | /*
2 | * HDRTools()
3 | *
4 | * Several functions for working on HDR data, and linear to non-linear convertions.
5 | * Copyright (C) 2018 JPSDR
6 | *
7 | * HDRTools is free software; you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License as published by
9 | * the Free Software Foundation; either version 2, or (at your option)
10 | * any later version.
11 | *
12 | * HDRTools is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with GNU Make; see the file COPYING. If not, write to
19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 | *
21 | */
22 |
23 | #include "./avisynth.h"
24 | #include "./ThreadPoolInterface.h"
25 |
26 | #define HDRTOOLS_VERSION "HDRTools 1.2.3 JPSDR"
27 |
28 |
29 | typedef struct _dataLookUp
30 | {
31 | uint16_t Min_Y,Max_Y,Min_U,Max_U,Min_V,Max_V;
32 | int32_t Offset_Y,Offset_U,Offset_V,Offset_R,Offset_G,Offset_B;
33 | double Coeff_Y,Coeff_U,Coeff_V;
34 | } dataLookUp;
35 |
36 | typedef struct _MT_Data_Info_HDRTools
37 | {
38 | void *src1,*src2,*src3,*src4;
39 | void *dst1,*dst2,*dst3,*dst4;
40 | ptrdiff_t src_pitch1,src_pitch2,src_pitch3,src_pitch4;
41 | ptrdiff_t dst_pitch1,dst_pitch2,dst_pitch3,dst_pitch4;
42 | ptrdiff_t src_modulo1,src_modulo2,src_modulo3,src_modulo4;
43 | ptrdiff_t dst_modulo1,dst_modulo2,dst_modulo3,dst_modulo4;
44 | int32_t src_Y_h_min,src_Y_h_max,src_Y_w;
45 | int32_t src_UV_h_min,src_UV_h_max,src_UV_w;
46 | int32_t dst_Y_h_min,dst_Y_h_max,dst_Y_w;
47 | int32_t dst_UV_h_min,dst_UV_h_max,dst_UV_w;
48 | bool top,bottom;
49 | bool moveY8to16;
50 | } MT_Data_Info_HDRTools;
51 |
52 |
53 | class ConvertYUVtoLinearRGB : public GenericVideoFilter
54 | {
55 | public:
56 | ConvertYUVtoLinearRGB(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw,
57 | uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fullrange,bool _mpeg2c,uint8_t _threads, bool _sleep,
58 | bool negativePrefetch, IScriptEnvironment* env);
59 | virtual ~ConvertYUVtoLinearRGB();
60 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
61 |
62 | int __stdcall SetCacheHints(int cachehints, int frame_range);
63 |
64 | private:
65 | uint8_t Color,OutputMode,HDRMode,HLGColor;
66 | bool OOTF,mpeg2c,fullrange,EOTF;
67 | bool sleep,HLG_Mode;
68 | double HLG_Lb,HLG_Lw;
69 | uint16_t *lookup_Upscale8;
70 | uint32_t *lookup_Upscale16,*lookup_8to16;
71 | int16_t *lookupRGB_8;
72 | int32_t *lookupRGB_16,*lookupHLG_RGB_16;
73 | uint8_t *lookupL_8;
74 | uint16_t *lookupL_16;
75 | float *lookupL_32;
76 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF;
77 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
78 |
79 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
80 | uint8_t pixelsize; // AVS16
81 | uint8_t bits_per_pixel;
82 |
83 | VideoInfo *vi_original,*vi_422,*vi_444,*vi_RGB64,*vi_PlaneY_HLG;
84 |
85 | dataLookUp dl;
86 |
87 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
88 | MT_Data_Info_HDRTools MT_Data[3][MAX_MT_THREADS];
89 | uint8_t threads,threads_number[3],max_threads;
90 | uint32_t UserId;
91 |
92 | ThreadPoolFunction StaticThreadpoolF;
93 |
94 | static void StaticThreadpool(void *ptr);
95 |
96 | void FreeData(void);
97 | };
98 |
99 |
100 | class ConvertYUVtoXYZ : public GenericVideoFilter
101 | {
102 | public:
103 | ConvertYUVtoXYZ(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw,
104 | double _Crosstalk,uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fullrange,bool _mpeg2c,float _Rx,float _Ry,
105 | float _Gx,float _Gy,float _Bx,float _By,float _Wx,float _Wy,
106 | uint8_t _threads, bool _sleep, bool negativePrefetch, IScriptEnvironment* env);
107 | virtual ~ConvertYUVtoXYZ();
108 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
109 |
110 | int __stdcall SetCacheHints(int cachehints, int frame_range);
111 |
112 | private:
113 | uint8_t Color,OutputMode,HDRMode,HLGColor;
114 | bool OOTF,mpeg2c,fullrange,EOTF;
115 | float Rx,Ry,Gx,Gy,Bx,By,Wx,Wy;
116 | bool sleep,HLG_Mode;
117 | double HLG_Lb,HLG_Lw,Crosstalk;
118 | uint16_t *lookup_Upscale8;
119 | uint32_t *lookup_Upscale16,*lookup_8to16;
120 | int16_t *lookupRGB_8,*lookupXYZ_8,*lookupCrosstalk_8;
121 | int32_t *lookupRGB_16,*lookupXYZ_16,*lookupCrosstalk_16,*lookupHLG_RGB_16;
122 | uint8_t *lookupL_8;
123 | uint16_t *lookupL_16;
124 | float *lookupL_32;
125 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF;
126 | float Coeff_XYZ[9],*Coeff_XYZ_asm,Coeff_Crosstalk[9],*Coeff_Crosstalk_asm;
127 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
128 |
129 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
130 | uint8_t pixelsize; // AVS16
131 | uint8_t bits_per_pixel;
132 |
133 | VideoInfo *vi_original,*vi_422,*vi_444,*vi_RGB64,*vi_PlaneY_HLG;
134 |
135 | dataLookUp dl;
136 |
137 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
138 | MT_Data_Info_HDRTools MT_Data[3][MAX_MT_THREADS];
139 | uint8_t threads,threads_number[3],max_threads;
140 | uint32_t UserId;
141 |
142 | ThreadPoolFunction StaticThreadpoolF;
143 |
144 | static void StaticThreadpool(void *ptr);
145 |
146 | void FreeData(void);
147 | };
148 |
149 |
150 | class ConvertRGBtoXYZ : public GenericVideoFilter
151 | {
152 | public:
153 | ConvertRGBtoXYZ(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw,
154 | double _Crosstalk,uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fastmode,float _Rx,float _Ry,
155 | float _Gx,float _Gy,float _Bx,float _By,float _Wx,float _Wy,
156 | uint8_t _threads, bool _sleep, bool negativePrefetch, IScriptEnvironment* env);
157 | virtual ~ConvertRGBtoXYZ();
158 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
159 |
160 | int __stdcall SetCacheHints(int cachehints, int frame_range);
161 |
162 | private:
163 | uint8_t Color,OutputMode,HDRMode,HLGColor;
164 | bool OOTF,EOTF,fastmode;
165 | float Rx,Ry,Gx,Gy,Bx,By,Wx,Wy;
166 | bool sleep,HLG_Mode;
167 | double HLG_Lb,HLG_Lw,Crosstalk;
168 | int16_t *lookupXYZ_8,*lookupCrosstalk_8;;
169 | int32_t *lookupXYZ_16,*lookupCrosstalk_16,*lookupHLG_RGB_16;
170 | uint8_t *lookupL_8;
171 | uint16_t *lookupL_16,*lookupL_8to16;
172 | float *lookupL_32,*lookupL_8to32,*lookupL_20;
173 | float Coeff_XYZ[9],*Coeff_XYZ_asm,Coeff_Crosstalk[9],*Coeff_Crosstalk_asm;
174 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF;
175 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
176 |
177 | VideoInfo *vi_PlaneY_HLG;
178 |
179 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
180 | uint8_t pixelsize; // AVS16
181 | uint8_t bits_per_pixel;
182 |
183 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
184 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
185 | uint8_t threads,threads_number;
186 | uint32_t UserId;
187 |
188 | ThreadPoolFunction StaticThreadpoolF;
189 |
190 | static void StaticThreadpool(void *ptr);
191 |
192 | void FreeData(void);
193 | };
194 |
195 |
196 | class ConvertLinearRGBtoYUV : public GenericVideoFilter
197 | {
198 | public:
199 | ConvertLinearRGBtoYUV(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw,
200 | uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fullrange,bool _mpeg2c,bool _fastmode,uint8_t _threads, bool _sleep,
201 | bool negativePrefetch,IScriptEnvironment* env);
202 | virtual ~ConvertLinearRGBtoYUV();
203 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
204 |
205 | int __stdcall SetCacheHints(int cachehints, int frame_range);
206 |
207 | private:
208 | uint8_t Color,OutputMode,HDRMode,HLGColor;
209 | bool OOTF,mpeg2c,fullrange,fastmode,EOTF;
210 | bool sleep,HLG_Mode;
211 | double HLG_Lb,HLG_Lw;
212 | int16_t *lookupRGB_8;
213 | int32_t *lookupRGB_16,*lookupHLG_RGB_16;
214 | uint8_t *lookupL_8;
215 | uint16_t *lookupL_16,*lookupL_20;
216 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF;
217 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
218 |
219 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
220 | uint8_t pixelsize; // AVS16
221 | uint8_t bits_per_pixel;
222 |
223 | VideoInfo *vi_original,*vi_420,*vi_422,*vi_444,*vi_RGB32,*vi_RGB64,*vi_PlaneY_HLG;
224 |
225 | dataLookUp dl;
226 |
227 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
228 | MT_Data_Info_HDRTools MT_Data[3][MAX_MT_THREADS];
229 | uint8_t threads,threads_number[3],max_threads;
230 | uint32_t UserId;
231 |
232 | ThreadPoolFunction StaticThreadpoolF;
233 |
234 | static void StaticThreadpool(void *ptr);
235 |
236 | void FreeData(void);
237 | };
238 |
239 |
240 | class ConvertXYZtoYUV : public GenericVideoFilter
241 | {
242 | public:
243 | ConvertXYZtoYUV(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw,
244 | double _Crosstalk,uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fullrange,bool _mpeg2c,bool _fastmode,
245 | float _Rx,float _Ry,float _Gx,float _Gy,float _Bx,float _By,float _Wx,float _Wy,float _pRx,float _pRy,
246 | float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy,
247 | uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
248 | virtual ~ConvertXYZtoYUV();
249 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
250 |
251 | int __stdcall SetCacheHints(int cachehints, int frame_range);
252 |
253 | private:
254 | uint8_t Color,OutputMode,HDRMode,HLGColor;
255 | bool OOTF,mpeg2c,fullrange,fastmode,EOTF;
256 | float Rx,Ry,Gx,Gy,Bx,By,Wx,Wy;
257 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy;
258 | bool sleep,HLG_Mode;
259 | double HLG_Lb,HLG_Lw,Crosstalk;
260 | int16_t *lookupRGB_8,*lookupXYZ_8,*lookupCrosstalk_8;
261 | int32_t *lookupRGB_16,*lookupXYZ_16,*lookupCrosstalk_16,*lookupHLG_RGB_16;
262 | uint8_t *lookupL_8;
263 | uint16_t *lookupL_16,*lookupL_20;
264 | float Coeff_XYZ[9],*Coeff_XYZ_asm,Coeff_Crosstalk[9],*Coeff_Crosstalk_asm;
265 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF;
266 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
267 |
268 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
269 | uint8_t pixelsize; // AVS16
270 | uint8_t bits_per_pixel;
271 |
272 | VideoInfo *vi_original,*vi_420,*vi_422,*vi_444,*vi_RGB32,*vi_RGB64,*vi_PlaneY_HLG;
273 |
274 | dataLookUp dl;
275 |
276 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
277 | MT_Data_Info_HDRTools MT_Data[3][MAX_MT_THREADS];
278 | uint8_t threads,threads_number[3],max_threads;
279 | uint32_t UserId;
280 |
281 | ThreadPoolFunction StaticThreadpoolF;
282 |
283 | static void StaticThreadpool(void *ptr);
284 |
285 | void FreeData(void);
286 | };
287 |
288 |
289 | class ConvertXYZtoRGB : public GenericVideoFilter
290 | {
291 | public:
292 | ConvertXYZtoRGB(PClip _child,uint8_t _Color,uint8_t _OutputMode,uint8_t _HDRMode,double _HLG_Lb,double _HLG_Lw,
293 | double _Crosstalk,uint8_t _HLGColor,bool _OOTF,bool _EOTF,bool _fastmode,float _Rx,float _Ry,
294 | float _Gx,float _Gy,float _Bx,float _By,float _Wx,float _Wy,float _pRx,float _pRy,float _pGx,
295 | float _pGy,float _pBx,float _pBy,float _pWx,float _pWy,
296 | uint8_t _threads, bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
297 | virtual ~ConvertXYZtoRGB();
298 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
299 |
300 | int __stdcall SetCacheHints(int cachehints, int frame_range);
301 |
302 | private:
303 | uint8_t Color,OutputMode,HDRMode,HLGColor;
304 | bool OOTF,fastmode,EOTF;
305 | float Rx,Ry,Gx,Gy,Bx,By,Wx,Wy;
306 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy;
307 | bool sleep,HLG_Mode;
308 | double HLG_Lb,HLG_Lw,Crosstalk;
309 | int16_t *lookupXYZ_8,*lookupCrosstalk_8;
310 | int32_t *lookupXYZ_16,*lookupCrosstalk_16,*lookupHLG_RGB_16;
311 | uint8_t *lookupL_8;
312 | uint16_t *lookupL_16,*lookupL_20;
313 | float Coeff_XYZ[9],*Coeff_XYZ_asm,Coeff_Crosstalk[9],*Coeff_Crosstalk_asm,*lookupL_32;
314 | void *lookupHLG_OOTF,*lookupHLG_inv_OOTF;
315 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
316 |
317 | VideoInfo *vi_PlaneY_HLG;
318 |
319 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
320 | uint8_t pixelsize; // AVS16
321 | uint8_t bits_per_pixel;
322 |
323 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
324 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
325 | uint8_t threads,threads_number;
326 | uint32_t UserId;
327 |
328 | ThreadPoolFunction StaticThreadpoolF;
329 |
330 | static void StaticThreadpool(void *ptr);
331 |
332 | void FreeData(void);
333 | };
334 |
335 |
336 | class ConvertXYZ_Scale_HDRtoSDR : public GenericVideoFilter
337 | {
338 | public:
339 | ConvertXYZ_Scale_HDRtoSDR(PClip _child,float _Coeff_X,float _Coeff_Y,float _Coeff_Z,uint8_t _threads,
340 | bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
341 | virtual ~ConvertXYZ_Scale_HDRtoSDR();
342 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
343 |
344 | int __stdcall SetCacheHints(int cachehints, int frame_range);
345 |
346 | private:
347 | bool sleep;
348 | float Coeff_X,Coeff_Y,Coeff_Z;
349 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16;
350 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
351 |
352 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
353 | uint8_t pixelsize; // AVS16
354 | uint8_t bits_per_pixel;
355 |
356 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
357 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
358 | uint8_t threads,threads_number;
359 | uint32_t UserId;
360 |
361 | ThreadPoolFunction StaticThreadpoolF;
362 |
363 | static void StaticThreadpool(void *ptr);
364 |
365 | void FreeData(void);
366 | };
367 |
368 |
369 | class ConvertXYZ_Scale_SDRtoHDR : public GenericVideoFilter
370 | {
371 | public:
372 | ConvertXYZ_Scale_SDRtoHDR(PClip _child,float _Coeff_X,float _Coeff_Y,float _Coeff_Z,
373 | uint8_t _threads, bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
374 | virtual ~ConvertXYZ_Scale_SDRtoHDR();
375 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
376 |
377 | int __stdcall SetCacheHints(int cachehints, int frame_range);
378 |
379 | private:
380 | float MinMastering,MaxMastering;
381 | bool sleep;
382 | float Coeff_X,Coeff_Y,Coeff_Z;
383 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16;
384 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
385 |
386 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
387 | uint8_t pixelsize; // AVS16
388 | uint8_t bits_per_pixel;
389 |
390 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
391 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
392 | uint8_t threads,threads_number;
393 | uint32_t UserId;
394 |
395 | ThreadPoolFunction StaticThreadpoolF;
396 |
397 | static void StaticThreadpool(void *ptr);
398 |
399 | void FreeData(void);
400 | };
401 |
402 |
403 | class ConvertXYZ_Hable_HDRtoSDR : public GenericVideoFilter
404 | {
405 | public:
406 | ConvertXYZ_Hable_HDRtoSDR(PClip _child,double _exp_X,double _w_X,double _a_X,double _b_X,double _c_X,
407 | double _d_X,double _e_X,double _f_X,double _exp_Y,double _w_Y,double _a_Y,double _b_Y,double _c_Y,
408 | double _d_Y,double _e_Y,double _f_Y,double _exp_Z,double _w_Z,double _a_Z,double _b_Z,double _c_Z,
409 | double _d_Z,double _e_Z,double _f_Z,
410 | float _pRx,float _pRy,float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy,
411 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
412 | virtual ~ConvertXYZ_Hable_HDRtoSDR();
413 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
414 |
415 | int __stdcall SetCacheHints(int cachehints, int frame_range);
416 |
417 | private:
418 | bool sleep,fastmode;
419 | double exp_X,w_X,a_X,b_X,c_X,d_X,e_X,f_X;
420 | double exp_Y,w_Y,a_Y,b_Y,c_Y,d_Y,e_Y,f_Y;
421 | double exp_Z,w_Z,a_Z,b_Z,c_Z,d_Z,e_Z,f_Z;
422 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy;
423 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16;
424 | float *lookupX_32,*lookupY_32,*lookupZ_32;
425 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
426 |
427 | double Xmin,Ymin,Zmin,CoeffX,CoeffY,CoeffZ;
428 |
429 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
430 | uint8_t pixelsize; // AVS16
431 | uint8_t bits_per_pixel;
432 |
433 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
434 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
435 | uint8_t threads,threads_number;
436 | uint32_t UserId;
437 |
438 | ThreadPoolFunction StaticThreadpoolF;
439 |
440 | static void StaticThreadpool(void *ptr);
441 |
442 | void FreeData(void);
443 | };
444 |
445 |
446 | class ConvertRGB_Hable_HDRtoSDR : public GenericVideoFilter
447 | {
448 | public:
449 | ConvertRGB_Hable_HDRtoSDR(PClip _child,double _exp_R,double _w_R,double _a_R,double _b_R,double _c_R,
450 | double _d_R,double _e_R,double _f_R,double _exp_G,double _w_G,double _a_G,double _b_G,double _c_G,
451 | double _d_G,double _e_G,double _f_G,double _exp_B,double _w_B,double _a_B,double _b_B,double _c_B,
452 | double _d_B,double _e_B,double _f_B,
453 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
454 | virtual ~ConvertRGB_Hable_HDRtoSDR();
455 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
456 |
457 | int __stdcall SetCacheHints(int cachehints, int frame_range);
458 |
459 | private:
460 | bool sleep,fastmode;
461 | double exp_R,w_R,a_R,b_R,c_R,d_R,e_R,f_R;
462 | double exp_G,w_G,a_G,b_G,c_G,d_G,e_G,f_G;
463 | double exp_B,w_B,a_B,b_B,c_B,d_B,e_B,f_B;
464 | uint16_t *lookupR_16,*lookupG_16,*lookupB_16;
465 | float *lookupR_32,*lookupG_32,*lookupB_32;
466 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
467 |
468 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
469 | uint8_t pixelsize; // AVS16
470 | uint8_t bits_per_pixel;
471 |
472 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
473 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
474 | uint8_t threads,threads_number;
475 | uint32_t UserId;
476 |
477 | ThreadPoolFunction StaticThreadpoolF;
478 |
479 | static void StaticThreadpool(void *ptr);
480 |
481 | void FreeData(void);
482 | };
483 |
484 |
485 | class ConvertXYZ_Mobius_HDRtoSDR : public GenericVideoFilter
486 | {
487 | public:
488 | ConvertXYZ_Mobius_HDRtoSDR(PClip _child,double _exp_X,double _trans_X,double _peak_X,
489 | double _exp_Y,double _trans_Y,double _peak_Y,double _exp_Z,double _trans_Z,double _peak_Z,
490 | float _pRx,float _pRy,float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy,
491 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
492 | virtual ~ConvertXYZ_Mobius_HDRtoSDR();
493 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
494 |
495 | int __stdcall SetCacheHints(int cachehints, int frame_range);
496 |
497 | private:
498 | bool sleep,fastmode;
499 | double exp_X,trans_X,peak_X;
500 | double exp_Y,trans_Y,peak_Y;
501 | double exp_Z,trans_Z,peak_Z;
502 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy;
503 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16;
504 | float *lookupX_32,*lookupY_32,*lookupZ_32;
505 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
506 |
507 | double Xmin,Ymin,Zmin,CoeffX,CoeffY,CoeffZ;
508 |
509 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
510 | uint8_t pixelsize; // AVS16
511 | uint8_t bits_per_pixel;
512 |
513 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
514 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
515 | uint8_t threads,threads_number;
516 | uint32_t UserId;
517 |
518 | ThreadPoolFunction StaticThreadpoolF;
519 |
520 | static void StaticThreadpool(void *ptr);
521 |
522 | void FreeData(void);
523 | };
524 |
525 |
526 | class ConvertRGB_Mobius_HDRtoSDR : public GenericVideoFilter
527 | {
528 | public:
529 | ConvertRGB_Mobius_HDRtoSDR(PClip _child,double _exp_R,double _trans_R,double _peak_R,
530 | double _exp_G,double _trans_G,double _peak_G,double _exp_B,double _trans_B,double _peak_B,
531 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
532 | virtual ~ConvertRGB_Mobius_HDRtoSDR();
533 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
534 |
535 | int __stdcall SetCacheHints(int cachehints, int frame_range);
536 |
537 | private:
538 | bool sleep,fastmode;
539 | double exp_R,trans_R,peak_R;
540 | double exp_G,trans_G,peak_G;
541 | double exp_B,trans_B,peak_B;
542 | uint16_t *lookupR_16,*lookupG_16,*lookupB_16;
543 | float *lookupR_32,*lookupG_32,*lookupB_32;
544 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
545 |
546 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
547 | uint8_t pixelsize; // AVS16
548 | uint8_t bits_per_pixel;
549 |
550 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
551 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
552 | uint8_t threads,threads_number;
553 | uint32_t UserId;
554 |
555 | ThreadPoolFunction StaticThreadpoolF;
556 |
557 | static void StaticThreadpool(void *ptr);
558 |
559 | void FreeData(void);
560 | };
561 |
562 |
563 | class ConvertXYZ_Reinhard_HDRtoSDR : public GenericVideoFilter
564 | {
565 | public:
566 | ConvertXYZ_Reinhard_HDRtoSDR(PClip _child,double _exp_X,double _contr_X,double _peak_X,
567 | double _exp_Y,double _contr_Y,double _peak_Y,double _exp_Z,double _contr_Z,double _peak_Z,
568 | float _pRx,float _pRy,float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy,
569 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
570 | virtual ~ConvertXYZ_Reinhard_HDRtoSDR();
571 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
572 |
573 | int __stdcall SetCacheHints(int cachehints, int frame_range);
574 |
575 | private:
576 | bool sleep,fastmode;
577 | double exp_X,contr_X,peak_X;
578 | double exp_Y,contr_Y,peak_Y;
579 | double exp_Z,contr_Z,peak_Z;
580 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy;
581 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16;
582 | float *lookupX_32,*lookupY_32,*lookupZ_32;
583 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
584 |
585 | double Xmin,Ymin,Zmin,CoeffX,CoeffY,CoeffZ;
586 |
587 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
588 | uint8_t pixelsize; // AVS16
589 | uint8_t bits_per_pixel;
590 |
591 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
592 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
593 | uint8_t threads,threads_number;
594 | uint32_t UserId;
595 |
596 | ThreadPoolFunction StaticThreadpoolF;
597 |
598 | static void StaticThreadpool(void *ptr);
599 |
600 | void FreeData(void);
601 | };
602 |
603 |
604 | class ConvertRGB_Reinhard_HDRtoSDR : public GenericVideoFilter
605 | {
606 | public:
607 | ConvertRGB_Reinhard_HDRtoSDR(PClip _child,double _exp_R,double _contr_R,double _peak_R,
608 | double _exp_G,double _contr_G,double _peak_G,double _exp_B,double _contr_B,double _peak_B,
609 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
610 | virtual ~ConvertRGB_Reinhard_HDRtoSDR();
611 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
612 |
613 | int __stdcall SetCacheHints(int cachehints, int frame_range);
614 |
615 | private:
616 | bool sleep,fastmode;
617 | double exp_R,contr_R,peak_R;
618 | double exp_G,contr_G,peak_G;
619 | double exp_B,contr_B,peak_B;
620 | uint16_t *lookupR_16,*lookupG_16,*lookupB_16;
621 | float *lookupR_32,*lookupG_32,*lookupB_32;
622 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
623 |
624 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
625 | uint8_t pixelsize; // AVS16
626 | uint8_t bits_per_pixel;
627 |
628 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
629 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
630 | uint8_t threads,threads_number;
631 | uint32_t UserId;
632 |
633 | ThreadPoolFunction StaticThreadpoolF;
634 |
635 | static void StaticThreadpool(void *ptr);
636 |
637 | void FreeData(void);
638 | };
639 |
640 |
641 | class ConvertLinearRGBtoYUV_BT2446_A_HDRtoSDR : public GenericVideoFilter
642 | {
643 | public:
644 | ConvertLinearRGBtoYUV_BT2446_A_HDRtoSDR(PClip _child,double _Lhdr,double _Lsdr,double _CoeffAdj,
645 | bool _fastmode,uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
646 | virtual ~ConvertLinearRGBtoYUV_BT2446_A_HDRtoSDR();
647 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
648 |
649 | int __stdcall SetCacheHints(int cachehints, int frame_range);
650 |
651 | private:
652 | bool sleep,fastmode;
653 | double Lhdr,Lsdr,CoeffAdj;
654 | uint16_t *lookupEOTF_16,*lookupR_16,*lookupG_16,*lookupB_16;
655 | float *lookupY1_16,*lookupY2_16,*lookupBY_16,*lookupRY_16;
656 | uint32_t *lookupEOTF_32,*lookupR_32,*lookupG_32,*lookupB_32;
657 | float *lookupY1_32,*lookupY2_32,*lookupBY_32,*lookupRY_32;
658 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
659 |
660 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
661 | uint8_t pixelsize; // AVS16
662 | uint8_t bits_per_pixel;
663 |
664 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
665 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
666 | uint8_t threads,threads_number;
667 | uint32_t UserId;
668 |
669 | ThreadPoolFunction StaticThreadpoolF;
670 |
671 | static void StaticThreadpool(void *ptr);
672 |
673 | void FreeData(void);
674 | };
675 |
676 |
677 | class ConverXYZ_BT2446_C_HDRtoSDR : public GenericVideoFilter
678 | {
679 | public:
680 | ConverXYZ_BT2446_C_HDRtoSDR(PClip _child,bool _ChromaC,bool _PQMode,float _Lhdr,float _Lsdr,
681 | float _pct_ref,float _pct_ip,float _pct_wp,float _pct_sdr_skin,float _pct_hdr_skin,float _WhiteShift,
682 | float _pRx,float _pRy,float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy,
683 | bool _fastmode,uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
684 | virtual ~ConverXYZ_BT2446_C_HDRtoSDR();
685 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
686 |
687 | int __stdcall SetCacheHints(int cachehints, int frame_range);
688 |
689 | private:
690 | bool sleep,fastmode;
691 | uint16_t *lookupY_16;
692 | float *lookupX_16,*lookupiY_16,*lookupZ_16;
693 | float *lookup2X_16,*lookup2Y_16,*lookup2Z_16;
694 | float *lookupY_32,*lookupiY_32;
695 | float *lookup2X_32,*lookup2Y_32,*lookup2Z_32;
696 | double pct_ref,pct_ip,pct_wp;
697 | double pct_sdr_skin,pct_hdr_skin;
698 | double Yhdr_ip,Ysdr_ip,Ysdr_wp,Yhdr_ref;
699 | double coeff_k[4],Lhdr,Lsdr;
700 | bool ChromaC,PQMode;
701 | float WhiteShift;
702 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
703 |
704 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy;
705 | double Xmin,Ymin,Zmin,CoeffX,CoeffY,CoeffZ;
706 | double Xn,Yn,Zn;
707 |
708 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
709 | uint8_t pixelsize; // AVS16
710 | uint8_t bits_per_pixel;
711 |
712 | VideoInfo *vi_RGBPS;
713 |
714 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
715 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
716 | uint8_t threads,threads_number;
717 | uint32_t UserId;
718 |
719 | double fdico(double a,double b,double k1,double x);
720 | bool dicotomie(double k1,double &k3);
721 |
722 | ThreadPoolFunction StaticThreadpoolF;
723 |
724 | static void StaticThreadpool(void *ptr);
725 |
726 | void FreeData(void);
727 | };
728 |
729 |
730 | class ConvertXYZ_ACES_HDRtoSDR : public GenericVideoFilter
731 | {
732 | public:
733 | ConvertXYZ_ACES_HDRtoSDR(PClip _child,double _a_X,double _b_X,double _c_X,double _d_X,double _e_X,
734 | double _a_Y,double _b_Y,double _c_Y,double _d_Y,double _e_Y,double _a_Z,double _b_Z,double _c_Z,
735 | double _d_Z,double _e_Z,double _exp_X,double _exp_Y,double _exp_Z,
736 | float _pRx,float _pRy,float _pGx,float _pGy,float _pBx,float _pBy,float _pWx,float _pWy,
737 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
738 | virtual ~ConvertXYZ_ACES_HDRtoSDR();
739 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
740 |
741 | int __stdcall SetCacheHints(int cachehints, int frame_range);
742 |
743 | private:
744 | bool sleep,fastmode;
745 | double a_X,b_X,c_X,d_X,e_X;
746 | double a_Y,b_Y,c_Y,d_Y,e_Y;
747 | double a_Z,b_Z,c_Z,d_Z,e_Z;
748 | double exp_X,exp_Y,exp_Z;
749 | float pRx,pRy,pGx,pGy,pBx,pBy,pWx,pWy;
750 | uint16_t *lookupX_16,*lookupY_16,*lookupZ_16;
751 | float *lookupX_32,*lookupY_32,*lookupZ_32;
752 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
753 |
754 | double Xmin,Ymin,Zmin,CoeffX,CoeffY,CoeffZ;
755 |
756 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
757 | uint8_t pixelsize; // AVS16
758 | uint8_t bits_per_pixel;
759 |
760 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
761 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
762 | uint8_t threads,threads_number;
763 | uint32_t UserId;
764 |
765 | ThreadPoolFunction StaticThreadpoolF;
766 |
767 | static void StaticThreadpool(void *ptr);
768 |
769 | void FreeData(void);
770 | };
771 |
772 |
773 | class ConvertRGB_ACES_HDRtoSDR : public GenericVideoFilter
774 | {
775 | public:
776 | ConvertRGB_ACES_HDRtoSDR(PClip _child,double _a_R,double _b_R,double _c_R,
777 | double _d_R,double _e_R,double _a_G,double _b_G,double _c_G,double _d_G,
778 | double _e_G,double _a_B,double _b_B,double _c_B,double _d_B,double _e_B,
779 | double _exp_R,double _exp_G,double _exp_B,
780 | bool _fastmode, uint8_t _threads,bool _sleep,bool negativePrefetch,IScriptEnvironment* env);
781 | virtual ~ConvertRGB_ACES_HDRtoSDR();
782 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
783 |
784 | int __stdcall SetCacheHints(int cachehints, int frame_range);
785 |
786 | private:
787 | bool sleep,fastmode;
788 | double a_R,b_R,c_R,d_R,e_R;
789 | double a_G,b_G,c_G,d_G,e_G;
790 | double a_B,b_B,c_B,d_B,e_B;
791 | double exp_R,exp_G,exp_B;
792 | uint16_t *lookupR_16,*lookupG_16,*lookupB_16;
793 | float *lookupR_32,*lookupG_32,*lookupB_32;
794 | bool SSE2_Enable,SSE41_Enable,AVX_Enable,AVX2_Enable,AVX512_Enable;
795 |
796 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
797 | uint8_t pixelsize; // AVS16
798 | uint8_t bits_per_pixel;
799 |
800 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
801 | MT_Data_Info_HDRTools MT_Data[MAX_MT_THREADS];
802 | uint8_t threads,threads_number;
803 | uint32_t UserId;
804 |
805 | ThreadPoolFunction StaticThreadpoolF;
806 |
807 | static void StaticThreadpool(void *ptr);
808 |
809 | void FreeData(void);
810 | };
811 |
--------------------------------------------------------------------------------
/HDRTools/HDRTools_AVX2_asm_x64.asm:
--------------------------------------------------------------------------------
1 | ;
2 | ; HDRTools()
3 | ;
4 | ; Several functions for working on HDR data, and linear to non-linear convertions.
5 | ; Copyright (C) 2018 JPSDR
6 | ;
7 | ; HDRTools is free software; you can redistribute it and/or modify
8 | ; it under the terms of the GNU General Public License as published by
9 | ; the Free Software Foundation; either version 2, or (at your option)
10 | ; any later version.
11 | ;
12 | ; HDRTools is distributed in the hope that it will be useful,
13 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | ; GNU General Public License for more details.
16 | ;
17 | ; You should have received a copy of the GNU General Public License
18 | ; along with GNU Make; see the file COPYING. If not, write to
19 | ; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 | ;
21 | ;
22 |
23 | .data
24 |
25 | align 16
26 |
27 | data segment align(32)
28 |
29 | data_f_0 real4 8 dup(0.0)
30 | data_f_1 real4 8 dup(1.0)
31 |
32 | data_f_1048575 real4 8 dup(1048575.0)
33 | data_f_65535 real4 8 dup(65535.0)
34 | data_dw_1048575 dword 8 dup(1048575)
35 | data_dw_65535 dword 8 dup(65535)
36 | data_dw_0 dword 8 dup(0)
37 |
38 | data_w_128 word 16 dup(128)
39 | data_w_32 word 16 dup(32)
40 | data_w_8 word 16 dup(8)
41 |
42 | .code
43 |
44 | ;***************************************************
45 | ;** YUV to RGB functions **
46 | ;***************************************************
47 |
48 | ;JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword
49 | ; src1 = rcx
50 | ; src2 = rdx
51 | ; dst = r8
52 | ; w = r9d
53 |
54 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc public frame
55 |
56 | .endprolog
57 |
58 | vpcmpeqb ymm3,ymm3,ymm3
59 |
60 | mov r10,rcx ; r10=src1
61 | xor rax,rax
62 | mov ecx,r9d
63 | mov r11,32
64 |
65 | Convert_Planar420_to_Planar422_8_AVX2_1:
66 | vmovdqa ymm0,YMMWORD ptr[r10+rax]
67 | vmovdqa ymm1,YMMWORD ptr[rdx+rax]
68 | vpxor ymm2,ymm0,ymm3
69 | vpxor ymm1,ymm1,ymm3
70 | vpavgb ymm2,ymm2,ymm1
71 | vpxor ymm2,ymm2,ymm3
72 | vpavgb ymm2,ymm2,ymm0
73 |
74 | vmovdqa YMMWORD ptr[r8+rax],ymm2
75 | add rax,r11
76 | loop Convert_Planar420_to_Planar422_8_AVX2_1
77 |
78 | vzeroupper
79 |
80 | ret
81 |
82 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 endp
83 |
84 |
85 | ;JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword
86 | ; src1 = rcx
87 | ; src2 = rdx
88 | ; dst = r8
89 | ; w = r9d
90 |
91 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc public frame
92 |
93 | .endprolog
94 |
95 | vpcmpeqb ymm3,ymm3,ymm3
96 |
97 | mov r10,rcx ; r10=src1
98 | xor rax,rax
99 | mov ecx,r9d
100 | mov r11,32
101 |
102 | Convert_Planar420_to_Planar422_16_AVX2_1:
103 | vmovdqa ymm0,YMMWORD ptr[r10+rax]
104 | vmovdqa ymm1,YMMWORD ptr[rdx+rax]
105 | vpxor ymm2,ymm0,ymm3
106 | vpxor ymm1,ymm1,ymm3
107 | vpavgw ymm2,ymm2,ymm1
108 | vpxor ymm2,ymm2,ymm3
109 | vpavgw ymm2,ymm2,ymm0
110 |
111 | vmovdqa YMMWORD ptr[r8+rax],ymm2
112 | add rax,r11
113 | loop Convert_Planar420_to_Planar422_16_AVX2_1
114 |
115 | vzeroupper
116 |
117 | ret
118 |
119 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 endp
120 |
121 |
122 | ;***************************************************
123 | ;** RGB to YUV functions **
124 | ;***************************************************
125 |
126 |
127 | ;JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2 proc src_R:dword,src_G:dword,src_B:dword,dst:dword,w:dword,h:dword,lookup:dword,
128 | ; src_modulo_R:dword,src_modulo_G:dword,src_modulo_B:dword,dst_modulo:dword
129 | ; src_R = rcx
130 | ; src_G = rdx
131 | ; src_B = r8
132 | ; dst = r9
133 |
134 | JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2 proc public frame
135 |
136 | w equ dword ptr[rbp+48]
137 | h equ dword ptr[rbp+56]
138 | lookup equ qword ptr[rbp+64]
139 | src_modulo_R equ qword ptr[rbp+72]
140 | src_modulo_G equ qword ptr[rbp+80]
141 | src_modulo_B equ qword ptr[rbp+88]
142 | dst_modulo equ qword ptr[rbp+96]
143 |
144 | push rbp
145 | .pushreg rbp
146 | mov rbp,rsp
147 | push rdi
148 | .pushreg rdi
149 | push rsi
150 | .pushreg rsi
151 | push rbx
152 | .pushreg rbx
153 | push r12
154 | .pushreg r12
155 | push r13
156 | .pushreg r13
157 | push r14
158 | .pushreg r14
159 | push r15
160 | .pushreg r15
161 | .endprolog
162 |
163 | vmovaps ymm3,YMMWORD ptr data_f_1048575
164 | vmovaps ymm4,YMMWORD ptr data_f_0
165 | vmovaps ymm5,YMMWORD ptr data_f_1
166 |
167 | cld
168 | mov rdi,r9
169 | mov r9,rcx
170 | mov r10,rdx ; src_B=r8,src_G=r10,src_R=r9
171 | mov rbx,lookup
172 | mov r11d,w
173 | mov r12,src_modulo_R
174 | mov r13,src_modulo_G
175 | mov r14,src_modulo_B
176 | mov r15,dst_modulo
177 | xor rax,rax
178 |
179 | Convert_LinearRGBPStoRGB64_AVX2_1:
180 | mov ecx,r11d
181 | Convert_LinearRGBPStoRGB64_AVX2_2:
182 | xor rdx,rdx
183 | vmaxps ymm0,ymm4,YMMWORD ptr[r8]
184 | vmaxps ymm1,ymm4,YMMWORD ptr[r10]
185 | vmaxps ymm2,ymm4,YMMWORD ptr[r9]
186 | vminps ymm0,ymm0,ymm5
187 | vminps ymm1,ymm1,ymm5
188 | vminps ymm2,ymm2,ymm5
189 | vmulps ymm0,ymm0,ymm3
190 | vmulps ymm1,ymm1,ymm3
191 | vmulps ymm2,ymm2,ymm3
192 | vcvtps2dq ymm0,ymm0
193 | vcvtps2dq ymm1,ymm1
194 | vcvtps2dq ymm2,ymm2
195 |
196 | vpextrd eax,xmm0,0
197 | mov ax,word ptr[rbx+2*rax]
198 | stosw
199 | vpextrd eax,xmm1,0
200 | mov ax,word ptr[rbx+2*rax]
201 | stosw
202 | vpextrd eax,xmm2,0
203 | mov ax,word ptr[rbx+2*rax]
204 | stosw
205 | xor eax,eax
206 | stosw
207 | dec ecx
208 | jz Convert_LinearRGBPStoRGB64_AVX2_3
209 | inc rdx
210 |
211 | vpextrd eax,xmm0,1
212 | mov ax,word ptr[rbx+2*rax]
213 | stosw
214 | vpextrd eax,xmm1,1
215 | mov ax,word ptr[rbx+2*rax]
216 | stosw
217 | vpextrd eax,xmm2,1
218 | mov ax,word ptr[rbx+2*rax]
219 | stosw
220 | xor eax,eax
221 | stosw
222 | dec ecx
223 | jz Convert_LinearRGBPStoRGB64_AVX2_3
224 | inc rdx
225 |
226 | vpextrd eax,xmm0,2
227 | mov ax,word ptr[rbx+2*rax]
228 | stosw
229 | vpextrd eax,xmm1,2
230 | mov ax,word ptr[rbx+2*rax]
231 | stosw
232 | vpextrd eax,xmm2,2
233 | mov ax,word ptr[rbx+2*rax]
234 | stosw
235 | xor eax,eax
236 | stosw
237 | dec ecx
238 | jz Convert_LinearRGBPStoRGB64_AVX2_3
239 | inc rdx
240 |
241 | vpextrd eax,xmm0,3
242 | mov ax,word ptr[rbx+2*rax]
243 | stosw
244 | vpextrd eax,xmm1,3
245 | mov ax,word ptr[rbx+2*rax]
246 | stosw
247 | vpextrd eax,xmm2,3
248 | mov ax,word ptr[rbx+2*rax]
249 | stosw
250 | xor eax,eax
251 | stosw
252 | dec ecx
253 | jz Convert_LinearRGBPStoRGB64_AVX2_3
254 | inc rdx
255 |
256 | vextracti128 xmm0,ymm0,1
257 | vextracti128 xmm1,ymm1,1
258 | vextracti128 xmm2,ymm2,1
259 |
260 | vpextrd eax,xmm0,0
261 | mov ax,word ptr[rbx+2*rax]
262 | stosw
263 | vpextrd eax,xmm1,0
264 | mov ax,word ptr[rbx+2*rax]
265 | stosw
266 | vpextrd eax,xmm2,0
267 | mov ax,word ptr[rbx+2*rax]
268 | stosw
269 | xor eax,eax
270 | stosw
271 | dec ecx
272 | jz Convert_LinearRGBPStoRGB64_AVX2_3
273 | inc rdx
274 |
275 | vpextrd eax,xmm0,1
276 | mov ax,word ptr[rbx+2*rax]
277 | stosw
278 | vpextrd eax,xmm1,1
279 | mov ax,word ptr[rbx+2*rax]
280 | stosw
281 | vpextrd eax,xmm2,1
282 | mov ax,word ptr[rbx+2*rax]
283 | stosw
284 | xor eax,eax
285 | stosw
286 | dec ecx
287 | jz short Convert_LinearRGBPStoRGB64_AVX2_3
288 | inc rdx
289 |
290 | vpextrd eax,xmm0,2
291 | mov ax,word ptr[rbx+2*rax]
292 | stosw
293 | vpextrd eax,xmm1,2
294 | mov ax,word ptr[rbx+2*rax]
295 | stosw
296 | vpextrd eax,xmm2,2
297 | mov ax,word ptr[rbx+2*rax]
298 | stosw
299 | xor eax,eax
300 | stosw
301 | dec ecx
302 | jz short Convert_LinearRGBPStoRGB64_AVX2_3
303 | inc rdx
304 |
305 | vpextrd eax,xmm0,3
306 | mov ax,word ptr[rbx+2*rax]
307 | stosw
308 | vpextrd eax,xmm1,3
309 | mov ax,word ptr[rbx+2*rax]
310 | stosw
311 | vpextrd eax,xmm2,3
312 | mov ax,word ptr[rbx+2*rax]
313 | stosw
314 | xor eax,eax
315 | stosw
316 | dec ecx
317 |
318 | Convert_LinearRGBPStoRGB64_AVX2_3:
319 | inc rdx
320 | shl rdx,2
321 | add r8,rdx
322 | add r10,rdx
323 | add r9,rdx
324 | or ecx,ecx
325 | jnz Convert_LinearRGBPStoRGB64_AVX2_2
326 |
327 | add rdi,r15
328 | add r8,r14
329 | add r10,r13
330 | add r9,r12
331 | dec h
332 | jnz Convert_LinearRGBPStoRGB64_AVX2_1
333 |
334 | vzeroupper
335 |
336 | pop r15
337 | pop r14
338 | pop r13
339 | pop r12
340 | pop rbx
341 | pop rsi
342 | pop rdi
343 | pop rbp
344 |
345 | ret
346 |
347 | JPSDR_HDRTools_Convert_LinearRGBPStoRGB64_AVX2 endp
348 |
349 |
350 | ;JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2 proc src_R:dword,src_G:dword,src_B:dword,dst:dword,w:dword,h:dword,
351 | ; src_pitch_R:dword,src_pitch_G:dword,src_pitch_B:dword,dst_pitch:dword
352 | ; src_R = rcx
353 | ; src_G = rdx
354 | ; src_B = r8
355 | ; dst = r9
356 |
357 | JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2 proc public frame
358 |
359 | w equ dword ptr[rbp+48]
360 | h equ dword ptr[rbp+56]
361 | src_pitch_R equ qword ptr[rbp+64]
362 | src_pitch_G equ qword ptr[rbp+72]
363 | src_pitch_B equ qword ptr[rbp+80]
364 | dst_pitch equ qword ptr[rbp+88]
365 |
366 | push rbp
367 | .pushreg rbp
368 | mov rbp,rsp
369 | push rsi
370 | .pushreg rsi
371 | push rbx
372 | .pushreg rbx
373 | push r12
374 | .pushreg r12
375 | push r13
376 | .pushreg r13
377 | push r14
378 | .pushreg r14
379 | push r15
380 | .pushreg r15
381 | sub rsp,32
382 | .allocstack 32
383 | vmovdqa XMMWORD ptr[rsp],xmm6
384 | .savexmm128 xmm6,0
385 | vmovdqa XMMWORD ptr[rsp+16],xmm7
386 | .savexmm128 xmm7,16
387 | .endprolog
388 |
389 | vmovaps ymm3,YMMWORD ptr data_f_65535
390 | vpxor xmm4,xmm4,xmm4
391 |
392 | mov rsi,rcx ; src_B=r8,src_G=rdx,src_R=rsi
393 |
394 | mov r11d,w
395 | mov r12,src_pitch_R
396 | mov r13,src_pitch_G
397 | mov r14,src_pitch_B
398 | mov r15,dst_pitch
399 | mov rbx,8
400 | mov r10d,h
401 |
402 | Convert_RGBPStoRGB64_AVX2_1:
403 | mov ecx,r11d
404 | xor rax,rax
405 | shr ecx,3
406 | jz Convert_RGBPStoRGB64_AVX2_3
407 | Convert_RGBPStoRGB64_AVX2_2:
408 | vmulps ymm0,ymm3,YMMWORD ptr[r8+4*rax]
409 | vmulps ymm1,ymm3,YMMWORD ptr[rsi+4*rax]
410 | vmulps ymm2,ymm3,YMMWORD ptr[rdx+4*rax]
411 | vcvtps2dq ymm0,ymm0
412 | vcvtps2dq ymm1,ymm1
413 | vcvtps2dq ymm2,ymm2
414 |
415 | vextracti128 xmm5,ymm0,1
416 | vextracti128 xmm6,ymm1,1
417 | vextracti128 xmm7,ymm2,1
418 |
419 | packusdw xmm0,xmm0 ;0000B4B3B2B1
420 | packusdw xmm1,xmm1 ;0000R4R3R2R1
421 | packusdw xmm2,xmm2 ;0000G4G3G2G1
422 |
423 | vpunpcklwd xmm0,xmm0,xmm1 ;R4B4R3B3R2B2R1B1
424 | vpunpcklwd xmm2,xmm2,xmm4 ;0G40G30G20G1
425 | vpunpckhwd xmm1,xmm0,xmm2 ;0R4G4B40R3G3B3
426 | vpunpcklwd xmm0,xmm0,xmm2 ;0R2G2B20R1G1B1
427 |
428 | packusdw xmm5,xmm5 ;0000B8B7B6B5
429 | packusdw xmm6,xmm6 ;0000R8R7R6R5
430 | packusdw xmm7,xmm7 ;0000G8G7G6G5
431 |
432 | vpunpcklwd xmm5,xmm5,xmm6 ;R8B8R7B7R6B6R5B5
433 | vpunpcklwd xmm7,xmm7,xmm4 ;0G80G70G60G5
434 | vpunpckhwd xmm6,xmm5,xmm7 ;0R8G8B80R7G7B7
435 | vpunpcklwd xmm5,xmm5,xmm7 ;0R6G6B60R5G5B5
436 |
437 | vmovdqa XMMWORD ptr[r9+8*rax],xmm0
438 | vmovdqa XMMWORD ptr[r9+8*rax+16],xmm1
439 | vmovdqa XMMWORD ptr[r9+8*rax+32],xmm5
440 | vmovdqa XMMWORD ptr[r9+8*rax+48],xmm6
441 | add rax,8
442 | dec ecx
443 | jnz Convert_RGBPStoRGB64_AVX2_2
444 |
445 | Convert_RGBPStoRGB64_AVX2_3:
446 | mov ecx,r11d
447 | and ecx,7
448 | jz Convert_RGBPStoRGB64_AVX2_7
449 |
450 | vmulps ymm0,ymm3,YMMWORD ptr[r8+4*rax]
451 | vmulps ymm1,ymm3,YMMWORD ptr[rsi+4*rax]
452 | vmulps ymm2,ymm3,YMMWORD ptr[rdx+4*rax]
453 | vcvtps2dq ymm0,ymm0
454 | vcvtps2dq ymm1,ymm1
455 | vcvtps2dq ymm2,ymm2
456 |
457 | vextracti128 xmm5,ymm0,1
458 | vextracti128 xmm6,ymm1,1
459 | vextracti128 xmm7,ymm2,1
460 |
461 | packusdw xmm0,xmm0 ;0000B4B3B2B1
462 | packusdw xmm1,xmm1 ;0000R4R3R2R1
463 | packusdw xmm2,xmm2 ;0000G4G3G2G1
464 |
465 | vpunpcklwd xmm0,xmm0,xmm1 ;R4B4R3B3R2B2R1B1
466 | vpunpcklwd xmm2,xmm2,xmm4 ;0G40G30G20G1
467 | vpunpckhwd xmm1,xmm0,xmm2 ;0R4G4B40R3G3B3
468 | vpunpcklwd xmm0,xmm0,xmm2 ;0R2G2B20R1G1B1
469 |
470 | packusdw xmm5,xmm5 ;0000B8B7B6B5
471 | packusdw xmm6,xmm6 ;0000R8R7R6R5
472 | packusdw xmm7,xmm7 ;0000G8G7G6G5
473 |
474 | vpunpcklwd xmm5,xmm5,xmm6 ;R8B8R7B7R6B6R5B5
475 | vpunpcklwd xmm7,xmm7,xmm4 ;0G80G70G60G5
476 | vpunpckhwd xmm6,xmm5,xmm7 ;0R8G8B80R7G7B7
477 | vpunpcklwd xmm5,xmm5,xmm7 ;0R6G6B60R5G5B5
478 |
479 | test ecx,4
480 | jnz short Convert_RGBPStoRGB64_AVX2_5
481 | test ecx,2
482 | jnz short Convert_RGBPStoRGB64_AVX2_4
483 | vmovq qword ptr[r9+8*rax],xmm0
484 | jmp short Convert_RGBPStoRGB64_AVX2_7
485 |
486 | Convert_RGBPStoRGB64_AVX2_4:
487 | vmovdqa XMMWORD ptr[r9+8*rax],xmm0
488 | test ecx,1
489 | jz short Convert_RGBPStoRGB64_AVX2_7
490 | vmovq qword ptr[r9+8*rax+16],xmm1
491 | jmp short Convert_RGBPStoRGB64_AVX2_7
492 |
493 | Convert_RGBPStoRGB64_AVX2_5:
494 | vmovdqa XMMWORD ptr[r9+8*rax],xmm0
495 | vmovdqa XMMWORD ptr[r9+8*rax+16],xmm1
496 | test ecx,2
497 | jnz short Convert_RGBPStoRGB64_AVX2_6
498 | test ecx,1
499 | jz short Convert_RGBPStoRGB64_AVX2_7
500 | vmovq qword ptr[r9+8*rax+32],xmm5
501 | jmp short Convert_RGBPStoRGB64_AVX2_7
502 |
503 | Convert_RGBPStoRGB64_AVX2_6:
504 | vmovdqa XMMWORD ptr[r9+8*rax+32],xmm5
505 | test ecx,1
506 | jz short Convert_RGBPStoRGB64_AVX2_7
507 | vmovq qword ptr[r9+8*rax+48],xmm6
508 |
509 | Convert_RGBPStoRGB64_AVX2_7:
510 | add rsi,r12
511 | add rdx,r13
512 | add r8,r14
513 | add r9,r15
514 | dec r10d
515 | jnz Convert_RGBPStoRGB64_AVX2_1
516 |
517 | vmovdqa xmm7,XMMWORD ptr[rsp+16]
518 | vmovdqa xmm6,XMMWORD ptr[rsp]
519 | add rsp,32
520 |
521 | vzeroupper
522 |
523 | pop r15
524 | pop r14
525 | pop r13
526 | pop r12
527 | pop rbx
528 | pop rsi
529 | pop rbp
530 |
531 | ret
532 |
533 | JPSDR_HDRTools_Convert_RGBPStoRGB64_AVX2 endp
534 |
535 |
536 | ;JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword,h:dword,src_pitch2:dword,dst_pitch:dword
537 | ; src1 = rcx
538 | ; src2 = rdx
539 | ; dst = r8
540 | ; w32 = r9d
541 |
542 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc public frame
543 |
544 | h equ dword ptr[rbp+48]
545 | src_pitch2 equ qword ptr[rbp+56]
546 | dst_pitch equ qword ptr[rbp+64]
547 |
548 | push rbp
549 | .pushreg rbp
550 | mov rbp,rsp
551 | push rsi
552 | .pushreg rsi
553 | push rbx
554 | .pushreg rbx
555 | push r12
556 | .pushreg r12
557 | .endprolog
558 |
559 | mov rsi,rcx
560 | mov r10d,h
561 | mov rbx,32
562 | mov r11,src_pitch2
563 | mov r12,dst_pitch
564 |
565 | Convert_Planar422_to_Planar420_8_AVX2_1:
566 | xor rax,rax
567 | mov ecx,r9d
568 |
569 | Convert_Planar422_to_Planar420_8_AVX2_2:
570 | vmovdqa ymm0,YMMWORD ptr[rsi+rax]
571 | vpavgb ymm0,ymm0,YMMWORD ptr[rdx+rax]
572 |
573 | vmovdqa YMMWORD ptr[r8+rax],ymm0
574 | add rax,rbx
575 | loop Convert_Planar422_to_Planar420_8_AVX2_2
576 |
577 | add rsi,r11
578 | add rdx,r11
579 | add r8,r12
580 | dec r10d
581 | jnz short Convert_Planar422_to_Planar420_8_AVX2_1
582 |
583 | vzeroupper
584 |
585 | pop r12
586 | pop rbx
587 | pop rsi
588 | pop rbp
589 |
590 | ret
591 |
592 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 endp
593 |
594 |
595 | ;JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword,h:dword,src_pitch2:dword,dst_pitch:dword
596 | ; src1 = rcx
597 | ; src2 = rdx
598 | ; dst = r8
599 | ; w16 = r9d
600 |
601 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc public frame
602 |
603 | h equ dword ptr[rbp+48]
604 | src_pitch2 equ qword ptr[rbp+56]
605 | dst_pitch equ qword ptr[rbp+64]
606 |
607 | push rbp
608 | .pushreg rbp
609 | mov rbp,rsp
610 | push rsi
611 | .pushreg rsi
612 | push rbx
613 | .pushreg rbx
614 | push r12
615 | .pushreg r12
616 | .endprolog
617 |
618 | mov rsi,rcx
619 | mov r10d,h
620 | mov rbx,32
621 | mov r11,src_pitch2
622 | mov r12,dst_pitch
623 |
624 | Convert_Planar422_to_Planar420_16_AVX2_1:
625 | xor rax,rax
626 | mov ecx,r9d
627 |
628 | Convert_Planar422_to_Planar420_16_AVX2_2:
629 | vmovdqa ymm0,YMMWORD ptr[rsi+rax]
630 | vpavgw ymm0,ymm0,YMMWORD ptr[rdx+rax]
631 |
632 | vmovdqa YMMWORD ptr[r8+rax],ymm0
633 | add rax,rbx
634 | loop Convert_Planar422_to_Planar420_16_AVX2_2
635 |
636 | add rsi,r11
637 | add rdx,r11
638 | add r8,r12
639 | dec r10d
640 | jnz short Convert_Planar422_to_Planar420_16_AVX2_1
641 |
642 | vzeroupper
643 |
644 | pop r12
645 | pop rbx
646 | pop rsi
647 | pop rbp
648 |
649 | ret
650 |
651 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 endp
652 |
653 |
654 | ;***************************************************
655 | ;** XYZ/RGB functions **
656 | ;***************************************************
657 |
658 |
659 | ;***************************************************
660 | ;** HLG functions **
661 | ;***************************************************
662 |
663 |
664 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
665 | ; src_pitch:dword,dst_pitch:dword
666 | ; src = rcx
667 | ; dst = rdx
668 | ; w = r8d
669 | ; h = r9d
670 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc public frame
671 |
672 | src_pitch equ qword ptr[rbp+48]
673 | dst_pitch equ qword ptr[rbp+56]
674 |
675 | push rbp
676 | .pushreg rbp
677 | mov rbp,rsp
678 | push rdi
679 | .pushreg rdi
680 | push rsi
681 | .pushreg rsi
682 | push rbx
683 | .pushreg rbx
684 | .endprolog
685 |
686 | vmovdqa ymm4,YMMWORD ptr data_w_128
687 |
688 | mov rsi,rcx
689 | mov rdi,rdx
690 | mov ebx,r8d
691 | mov r10,src_pitch
692 | mov r11,dst_pitch
693 | shr ebx,2
694 | mov rdx,128
695 |
696 | Convert_RGB64_16toRGB64_8_AVX2_loop_1:
697 | mov ecx,ebx
698 | xor rax,rax
699 |
700 | shr ecx,2
701 | jz Convert_RGB64_16toRGB64_8_AVX2_3
702 |
703 | Convert_RGB64_16toRGB64_8_AVX2_loop_2:
704 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax]
705 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32]
706 | vpaddusw ymm2,ymm4,YMMWORD ptr[rsi+rax+64]
707 | vpaddusw ymm3,ymm4,YMMWORD ptr[rsi+rax+96]
708 | vpsrlw ymm0,ymm0,8
709 | vpsrlw ymm1,ymm1,8
710 | vpsrlw ymm2,ymm2,8
711 | vpsrlw ymm3,ymm3,8
712 | vmovdqa YMMWORD ptr[rdi+rax],ymm0
713 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1
714 | vmovdqa YMMWORD ptr[rdi+rax+64],ymm2
715 | vmovdqa YMMWORD ptr[rdi+rax+96],ymm3
716 | add rax,rdx
717 | loop Convert_RGB64_16toRGB64_8_AVX2_loop_2
718 |
719 | Convert_RGB64_16toRGB64_8_AVX2_3:
720 | test ebx,2
721 | jz short Convert_RGB64_16toRGB64_8_AVX2_4
722 |
723 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax]
724 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32]
725 | vpsrlw ymm0,ymm0,8
726 | vpsrlw ymm1,ymm1,8
727 | vmovdqa YMMWORD ptr[rdi+rax],ymm0
728 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1
729 | add rax,64
730 |
731 | Convert_RGB64_16toRGB64_8_AVX2_4:
732 | test ebx,1
733 | jz short Convert_RGB64_16toRGB64_8_AVX2_5
734 |
735 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax]
736 | vpsrlw ymm0,ymm0,8
737 | vmovdqa YMMWORD ptr[rdi+rax],ymm0
738 | add rax,32
739 |
740 | Convert_RGB64_16toRGB64_8_AVX2_5:
741 | test r8d,2
742 | jz short Convert_RGB64_16toRGB64_8_AVX2_6
743 |
744 | vpaddusw xmm0,xmm4,XMMWORD ptr[rsi+rax]
745 | vpsrlw xmm0,xmm0,8
746 | vmovdqa XMMWORD ptr[rdi+rax],xmm0
747 | add rax,16
748 |
749 | Convert_RGB64_16toRGB64_8_AVX2_6:
750 | test r8d,1
751 | jz short Convert_RGB64_16toRGB64_8_AVX2_7
752 |
753 | vmovq xmm0,qword ptr[rsi+rax]
754 | vpaddusw xmm0,xmm0,xmm4
755 | vpsrlw xmm0,xmm0,8
756 | vmovq qword ptr[rdi+rax],xmm0
757 |
758 | Convert_RGB64_16toRGB64_8_AVX2_7:
759 | add rsi,r10
760 | add rdi,r11
761 | dec r9d
762 | jnz Convert_RGB64_16toRGB64_8_AVX2_loop_1
763 |
764 | vzeroupper
765 |
766 | pop rbx
767 | pop rsi
768 | pop rdi
769 | pop rbp
770 |
771 | ret
772 |
773 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 endp
774 |
775 |
776 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
777 | ; src_pitch:dword,dst_pitch:dword
778 | ; src = rcx
779 | ; dst = rdx
780 | ; w = r8d
781 | ; h = r9d
782 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc public frame
783 |
784 | src_pitch equ qword ptr[rbp+48]
785 | dst_pitch equ qword ptr[rbp+56]
786 |
787 | push rbp
788 | .pushreg rbp
789 | mov rbp,rsp
790 | push rdi
791 | .pushreg rdi
792 | push rsi
793 | .pushreg rsi
794 | push rbx
795 | .pushreg rbx
796 | .endprolog
797 |
798 | vmovdqa ymm4,YMMWORD ptr data_w_32
799 |
800 | mov rsi,rcx
801 | mov rdi,rdx
802 | mov ebx,r8d
803 | mov r10,src_pitch
804 | mov r11,dst_pitch
805 | shr ebx,2
806 | mov rdx,128
807 |
808 | Convert_RGB64_16toRGB64_10_AVX2_loop_1:
809 | mov ecx,ebx
810 | xor rax,rax
811 |
812 | shr ecx,2
813 | jz Convert_RGB64_16toRGB64_10_AVX2_3
814 |
815 | Convert_RGB64_16toRGB64_10_AVX2_loop_2:
816 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax]
817 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32]
818 | vpaddusw ymm2,ymm4,YMMWORD ptr[rsi+rax+64]
819 | vpaddusw ymm3,ymm4,YMMWORD ptr[rsi+rax+96]
820 | vpsrlw ymm0,ymm0,6
821 | vpsrlw ymm1,ymm1,6
822 | vpsrlw ymm2,ymm2,6
823 | vpsrlw ymm3,ymm3,6
824 | vmovdqa YMMWORD ptr[rdi+rax],ymm0
825 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1
826 | vmovdqa YMMWORD ptr[rdi+rax+64],ymm2
827 | vmovdqa YMMWORD ptr[rdi+rax+96],ymm3
828 | add rax,rdx
829 | loop Convert_RGB64_16toRGB64_10_AVX2_loop_2
830 |
831 | Convert_RGB64_16toRGB64_10_AVX2_3:
832 | test ebx,2
833 | jz short Convert_RGB64_16toRGB64_10_AVX2_4
834 |
835 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax]
836 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32]
837 | vpsrlw ymm0,ymm0,6
838 | vpsrlw ymm1,ymm1,6
839 | vmovdqa YMMWORD ptr[rdi+rax],ymm0
840 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1
841 | add rax,64
842 |
843 | Convert_RGB64_16toRGB64_10_AVX2_4:
844 | test ebx,1
845 | jz short Convert_RGB64_16toRGB64_10_AVX2_5
846 |
847 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax]
848 | vpsrlw ymm0,ymm0,6
849 | vmovdqa YMMWORD ptr[rdi+rax],ymm0
850 | add rax,32
851 |
852 | Convert_RGB64_16toRGB64_10_AVX2_5:
853 | test r8d,2
854 | jz short Convert_RGB64_16toRGB64_10_AVX2_6
855 |
856 | vpaddusw xmm0,xmm4,XMMWORD ptr[rsi+rax]
857 | vpsrlw xmm0,xmm0,6
858 | vmovdqa XMMWORD ptr[rdi+rax],xmm0
859 | add rax,16
860 |
861 | Convert_RGB64_16toRGB64_10_AVX2_6:
862 | test r8d,1
863 | jz short Convert_RGB64_16toRGB64_10_AVX2_7
864 |
865 | vmovq xmm0,qword ptr[rsi+rax]
866 | vpaddusw xmm0,xmm0,xmm4
867 | vpsrlw xmm0,xmm0,6
868 | vmovq qword ptr[rdi+rax],xmm0
869 |
870 | Convert_RGB64_16toRGB64_10_AVX2_7:
871 | add rsi,r10
872 | add rdi,r11
873 | dec r9d
874 | jnz Convert_RGB64_16toRGB64_10_AVX2_loop_1
875 |
876 | vzeroupper
877 |
878 | pop rbx
879 | pop rsi
880 | pop rdi
881 | pop rbp
882 |
883 | ret
884 |
885 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 endp
886 |
887 |
888 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
889 | ; src_pitch:dword,dst_pitch:dword
890 | ; src = rcx
891 | ; dst = rdx
892 | ; w = r8d
893 | ; h = r9d
894 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc public frame
895 |
896 | src_pitch equ qword ptr[rbp+48]
897 | dst_pitch equ qword ptr[rbp+56]
898 |
899 | push rbp
900 | .pushreg rbp
901 | mov rbp,rsp
902 | push rdi
903 | .pushreg rdi
904 | push rsi
905 | .pushreg rsi
906 | push rbx
907 | .pushreg rbx
908 | .endprolog
909 |
910 | vmovdqa ymm4,YMMWORD ptr data_w_8
911 |
912 | mov rsi,rcx
913 | mov rdi,rdx
914 | mov ebx,r8d
915 | mov r10,src_pitch
916 | mov r11,dst_pitch
917 | shr ebx,2
918 | mov rdx,128
919 |
920 | Convert_RGB64_16toRGB64_12_AVX2_loop_1:
921 | mov ecx,ebx
922 | xor rax,rax
923 |
924 | shr ecx,2
925 | jz Convert_RGB64_16toRGB64_12_AVX2_3
926 |
927 | Convert_RGB64_16toRGB64_12_AVX2_loop_2:
928 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax]
929 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32]
930 | vpaddusw ymm2,ymm4,YMMWORD ptr[rsi+rax+64]
931 | vpaddusw ymm3,ymm4,YMMWORD ptr[rsi+rax+96]
932 | vpsrlw ymm0,ymm0,4
933 | vpsrlw ymm1,ymm1,4
934 | vpsrlw ymm2,ymm2,4
935 | vpsrlw ymm3,ymm3,4
936 | vmovdqa YMMWORD ptr[rdi+rax],ymm0
937 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1
938 | vmovdqa YMMWORD ptr[rdi+rax+64],ymm2
939 | vmovdqa YMMWORD ptr[rdi+rax+96],ymm3
940 | add rax,rdx
941 | loop Convert_RGB64_16toRGB64_12_AVX2_loop_2
942 |
943 | Convert_RGB64_16toRGB64_12_AVX2_3:
944 | test ebx,2
945 | jz short Convert_RGB64_16toRGB64_12_AVX2_4
946 |
947 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax]
948 | vpaddusw ymm1,ymm4,YMMWORD ptr[rsi+rax+32]
949 | vpsrlw ymm0,ymm0,4
950 | vpsrlw ymm1,ymm1,4
951 | vmovdqa YMMWORD ptr[rdi+rax],ymm0
952 | vmovdqa YMMWORD ptr[rdi+rax+32],ymm1
953 | add rax,64
954 |
955 | Convert_RGB64_16toRGB64_12_AVX2_4:
956 | test ebx,1
957 | jz short Convert_RGB64_16toRGB64_12_AVX2_5
958 |
959 | vpaddusw ymm0,ymm4,YMMWORD ptr[rsi+rax]
960 | vpsrlw ymm0,ymm0,4
961 | vmovdqa YMMWORD ptr[rdi+rax],ymm0
962 | add rax,32
963 |
964 | Convert_RGB64_16toRGB64_12_AVX2_5:
965 | test r8d,2
966 | jz short Convert_RGB64_16toRGB64_12_AVX2_6
967 |
968 | vpaddusw xmm0,xmm4,XMMWORD ptr[rsi+rax]
969 | vpsrlw xmm0,xmm0,4
970 | vmovdqa XMMWORD ptr[rdi+rax],xmm0
971 | add rax,16
972 |
973 | Convert_RGB64_16toRGB64_12_AVX2_6:
974 | test r8d,1
975 | jz short Convert_RGB64_16toRGB64_12_AVX2_7
976 |
977 | vmovq xmm0,qword ptr[rsi+rax]
978 | vpaddusw xmm0,xmm0,xmm4
979 | vpsrlw xmm0,xmm0,4
980 | vmovq qword ptr[rdi+rax],xmm0
981 |
982 | Convert_RGB64_16toRGB64_12_AVX2_7:
983 | add rsi,r10
984 | add rdi,r11
985 | dec r9d
986 | jnz Convert_RGB64_16toRGB64_12_AVX2_loop_1
987 |
988 | vzeroupper
989 |
990 | pop rbx
991 | pop rsi
992 | pop rdi
993 | pop rbp
994 |
995 | ret
996 |
997 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 endp
998 |
999 |
1000 | ;JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc dst:dword,srcY:dword,w:dword,h:dword,dst_pitch:dword,src_pitchY:dword
1001 | ; dst = rcx
1002 | ; srcY = rdx
1003 | ; w = r8d
1004 | ; h = r9d
1005 |
1006 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc public frame
1007 |
1008 | dst_pitch equ qword ptr[rbp+48]
1009 | src_pitchY equ qword ptr[rbp+56]
1010 |
1011 | push rbp
1012 | .pushreg rbp
1013 | mov rbp,rsp
1014 | push rsi
1015 | .pushreg rsi
1016 | push rdi
1017 | .pushreg rdi
1018 | push rbx
1019 | .pushreg rbx
1020 | push r12
1021 | .pushreg r12
1022 | .endprolog
1023 |
1024 | mov rdi,rcx
1025 | mov rsi,rdx
1026 | mov r10d,r8d
1027 | mov r11,dst_pitch
1028 | mov r12,src_pitchY
1029 | mov rdx,8
1030 | shr r10d,1
1031 | mov rbx,1
1032 | vpxor xmm4,xmm4,xmm4
1033 |
1034 | Convert_16_RGB64_HLG_OOTF_AVX2_1:
1035 | mov ecx,r10d
1036 | xor rax,rax
1037 | or ecx,ecx
1038 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_3
1039 |
1040 | Convert_16_RGB64_HLG_OOTF_AVX2_2:
1041 | vbroadcastss xmm0,dword ptr[rsi+rax]
1042 | vbroadcastss xmm1,dword ptr[rsi+rax+4]
1043 | vmovdqa xmm2,XMMWORD ptr[rdi+2*rax]
1044 | vinsertf128 ymm0,ymm0,xmm1,1
1045 | vpunpckhwd xmm3,xmm2,xmm4
1046 | vpunpcklwd xmm2,xmm2,xmm4
1047 | vinserti128 ymm2,ymm2,xmm3,1
1048 | vcvtdq2ps ymm2,ymm2
1049 | vmulps ymm2,ymm2,ymm0
1050 | vcvtps2dq ymm2,ymm2
1051 | vextracti128 xmm3,ymm2,1
1052 | vpackusdw xmm2,xmm2,xmm3
1053 | vmovdqa XMMWORD ptr[rdi+2*rax],xmm2
1054 |
1055 | add rax,rdx
1056 | loop Convert_16_RGB64_HLG_OOTF_AVX2_2
1057 |
1058 | Convert_16_RGB64_HLG_OOTF_AVX2_3:
1059 | test r8d,ebx
1060 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_4
1061 |
1062 | vbroadcastss xmm0,dword ptr[rsi+rax]
1063 | vmovq xmm2,qword ptr[rdi+2*rax]
1064 | vpunpcklwd xmm2,xmm2,xmm4
1065 | vcvtdq2ps xmm2,xmm2
1066 | vmulps xmm2,xmm2,xmm0
1067 | vcvtps2dq xmm2,xmm2
1068 | vpackusdw xmm2,xmm2,xmm2
1069 | vmovq qword ptr[rdi+2*rax],xmm2
1070 |
1071 | Convert_16_RGB64_HLG_OOTF_AVX2_4:
1072 | add rdi,r11
1073 | add rsi,r12
1074 | dec r9d
1075 | jnz Convert_16_RGB64_HLG_OOTF_AVX2_1
1076 |
1077 | vzeroupper
1078 |
1079 | pop r12
1080 | pop rbx
1081 | pop rdi
1082 | pop rsi
1083 | pop rbp
1084 |
1085 | ret
1086 |
1087 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 endp
1088 |
1089 |
1090 | ;***************************************************
1091 | ;** XYZ/HDR/SDR functions **
1092 | ;***************************************************
1093 |
1094 |
1095 | ;JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword,
1096 | ; ValMin:dword,Coeff:dword
1097 | ; src = rcx
1098 | ; dst = rdx
1099 | ; w8 = r8d
1100 | ; h = r9d
1101 |
1102 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc public frame
1103 |
1104 | src_pitch equ qword ptr[rbp+48]
1105 | dst_pitch equ qword ptr[rbp+56]
1106 | ValMin equ qword ptr[rbp+64]
1107 | Coeff equ qword ptr[rbp+72]
1108 |
1109 | push rbp
1110 | .pushreg rbp
1111 | mov rbp,rsp
1112 | push rsi
1113 | .pushreg rsi
1114 | push rbx
1115 | .pushreg rbx
1116 | .endprolog
1117 |
1118 | mov rsi,ValMin
1119 | vbroadcastss ymm1,dword ptr[rsi]
1120 | mov rsi,Coeff
1121 | vbroadcastss ymm2,dword ptr[rsi]
1122 |
1123 | vmovdqa ymm3,YMMWORD ptr data_dw_1048575
1124 | vmovdqa ymm4,YMMWORD ptr data_dw_0
1125 | vmulps ymm2,ymm2,YMMWORD ptr data_f_1048575
1126 |
1127 | mov rsi,rcx
1128 | mov r10,src_pitch
1129 | mov r11,dst_pitch
1130 | mov rbx,32
1131 |
1132 | Scale_20_XYZ_AVX2_1:
1133 | xor rax,rax
1134 | mov ecx,r8d
1135 | Scale_20_XYZ_AVX2_2:
1136 | vaddps ymm0,ymm1,YMMWORD ptr[rsi+rax]
1137 | vmulps ymm0,ymm0,ymm2
1138 | vcvtps2dq ymm0,ymm0
1139 | vpminsd ymm0,ymm0,ymm3
1140 | vpmaxsd ymm0,ymm0,ymm4
1141 | vmovdqa YMMWORD ptr[rdx+rax],ymm0
1142 |
1143 | add rax,rbx
1144 | loop Scale_20_XYZ_AVX2_2
1145 |
1146 | add rsi,r10
1147 | add rdx,r11
1148 | dec r9d
1149 | jnz short Scale_20_XYZ_AVX2_1
1150 |
1151 | vzeroupper
1152 |
1153 | pop rbx
1154 | pop rsi
1155 | pop rbp
1156 |
1157 | ret
1158 |
1159 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 endp
1160 |
1161 |
1162 | ;JPSDR_HDRTools_Scale_20_RGB_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword
1163 | ; src = rcx
1164 | ; dst = rdx
1165 | ; w8 = r8d
1166 | ; h = r9d
1167 |
1168 | JPSDR_HDRTools_Scale_20_RGB_AVX2 proc public frame
1169 |
1170 | src_pitch equ qword ptr[rbp+48]
1171 | dst_pitch equ qword ptr[rbp+56]
1172 | ValMin equ qword ptr[rbp+64]
1173 | Coeff equ qword ptr[rbp+72]
1174 |
1175 | push rbp
1176 | .pushreg rbp
1177 | mov rbp,rsp
1178 | push rsi
1179 | .pushreg rsi
1180 | push rbx
1181 | .pushreg rbx
1182 | .endprolog
1183 |
1184 | vmovaps ymm1,YMMWORD ptr data_f_1048575
1185 | vmovdqa ymm2,YMMWORD ptr data_dw_1048575
1186 | vmovdqa ymm3,YMMWORD ptr data_dw_0
1187 |
1188 | mov rsi,rcx
1189 | mov r10,src_pitch
1190 | mov r11,dst_pitch
1191 | mov rbx,32
1192 |
1193 | Scale_20_RGB_AVX2_1:
1194 | xor rax,rax
1195 | mov ecx,r8d
1196 | Scale_20_RGB_AVX2_2:
1197 | vmulps ymm0,ymm1,YMMWORD ptr[rsi+rax]
1198 | vcvtps2dq ymm0,ymm0
1199 | vpminsd ymm0,ymm0,ymm2
1200 | vpmaxsd ymm0,ymm0,ymm3
1201 | vmovdqa YMMWORD ptr[rdx+rax],ymm0
1202 |
1203 | add rax,rbx
1204 | loop Scale_20_RGB_AVX2_2
1205 |
1206 | add rsi,r10
1207 | add rdx,r11
1208 | dec r9d
1209 | jnz short Scale_20_RGB_AVX2_1
1210 |
1211 | vzeroupper
1212 |
1213 | pop rbx
1214 | pop rsi
1215 | pop rbp
1216 |
1217 | ret
1218 |
1219 | JPSDR_HDRTools_Scale_20_RGB_AVX2 endp
1220 |
1221 |
1222 | ;JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc src:dword,dst1:dword,dst2:dword,w8:dword,h:dword,src_pitch:dword,
1223 | ; dst_pitch1:dword,dst_pitch2:dword,ValMinX:dword,CoeffX:dword,ValMinZ:dword,CoeffZ:dword
1224 | ; src = rcx
1225 | ; dst1 = rdx
1226 | ; dst2 = r8
1227 | ; w8 = r9d
1228 |
1229 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc public frame
1230 |
1231 | h equ dword ptr[rbp+48]
1232 | src_pitch equ qword ptr[rbp+56]
1233 | dst_pitch1 equ qword ptr[rbp+64]
1234 | dst_pitch2 equ qword ptr[rbp+72]
1235 | ValMinX equ qword ptr[rbp+80]
1236 | CoeffX equ qword ptr[rbp+88]
1237 | ValMinZ equ qword ptr[rbp+96]
1238 | CoeffZ equ qword ptr[rbp+104]
1239 |
1240 | push rbp
1241 | .pushreg rbp
1242 | mov rbp,rsp
1243 | push rsi
1244 | .pushreg rsi
1245 | push rbx
1246 | .pushreg rbx
1247 | push r12
1248 | .pushreg r12
1249 | push r13
1250 | .pushreg r13
1251 | sub rsp,48
1252 | .allocstack 48
1253 | vmovdqa XMMWORD ptr[rsp],xmm6
1254 | .savexmm128 xmm6,0
1255 | vmovdqa XMMWORD ptr[rsp+16],xmm7
1256 | .savexmm128 xmm7,16
1257 | vmovdqa XMMWORD ptr[rsp+32],xmm8
1258 | .savexmm128 xmm8,32
1259 | .endprolog
1260 |
1261 | mov rsi,ValMinX
1262 | vbroadcastss ymm2,dword ptr[rsi]
1263 | mov rsi,CoeffX
1264 | vbroadcastss ymm3,dword ptr[rsi]
1265 | mov rsi,ValMinZ
1266 | vbroadcastss ymm4,dword ptr[rsi]
1267 | mov rsi,CoeffZ
1268 | vbroadcastss ymm5,dword ptr[rsi]
1269 |
1270 | vmovdqa ymm6,YMMWORD ptr data_dw_65535
1271 | vmovdqa ymm7,YMMWORD ptr data_dw_0
1272 | vmulps ymm3,ymm3,YMMWORD ptr data_f_65535
1273 | vmulps ymm5,ymm5,YMMWORD ptr data_f_65535
1274 |
1275 | mov rsi,rcx
1276 | mov r10,src_pitch
1277 | mov r11,dst_pitch1
1278 | mov r12,dst_pitch2
1279 | mov r13d,h
1280 | mov rbx,32
1281 |
1282 | BT2446C_16_XYZ_AVX2_1:
1283 | xor rax,rax
1284 | mov ecx,r9d
1285 | BT2446C_16_XYZ_AVX2_2:
1286 | vmovaps ymm8,YMMWORD ptr[rsi+rax]
1287 | vmulps ymm0,ymm8,YMMWORD ptr[rdx+rax]
1288 | vmulps ymm1,ymm8,YMMWORD ptr[r8+rax]
1289 | vaddps ymm0,ymm0,ymm2
1290 | vaddps ymm1,ymm1,ymm4
1291 | vmulps ymm0,ymm0,ymm3
1292 | vmulps ymm1,ymm1,ymm5
1293 | vcvtps2dq ymm0,ymm0
1294 | vcvtps2dq ymm1,ymm1
1295 | vpminsd ymm0,ymm0,ymm6
1296 | vpminsd ymm1,ymm1,ymm6
1297 | vpmaxsd ymm0,ymm0,ymm7
1298 | vpmaxsd ymm1,ymm1,ymm7
1299 | vmovdqa YMMWORD ptr[rdx+rax],ymm0
1300 | vmovdqa YMMWORD ptr[r8+rax],ymm1
1301 |
1302 | add rax,rbx
1303 | loop BT2446C_16_XYZ_AVX2_2
1304 |
1305 | add rsi,r10
1306 | add rdx,r11
1307 | add r8,r12
1308 | dec r13d
1309 | jnz short BT2446C_16_XYZ_AVX2_1
1310 |
1311 | vmovdqa xmm8,XMMWORD ptr[rsp+32]
1312 | vmovdqa xmm7,XMMWORD ptr[rsp+16]
1313 | vmovdqa xmm6,XMMWORD ptr[rsp]
1314 | add rsp,48
1315 |
1316 | vzeroupper
1317 |
1318 | pop r13
1319 | pop r12
1320 | pop rbx
1321 | pop rsi
1322 | pop rbp
1323 |
1324 | ret
1325 |
1326 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 endp
1327 |
1328 |
1329 | end
1330 |
--------------------------------------------------------------------------------