├── Plugins_JPSDR.sln
├── Plugins_JPSDR
    ├── AutoYUY2.cpp
    ├── AutoYUY2.h
    ├── AutoYUY2_AVX2_asm.asm
    ├── AutoYUY2_AVX2_asm_x64.asm
    ├── AutoYUY2_asm.asm
    ├── AutoYUY2_asm_x64.asm
    ├── HDRTools.cpp
    ├── HDRTools.h
    ├── HDRTools_AVX2_asm.asm
    ├── HDRTools_AVX2_asm_x64.asm
    ├── HDRTools_asm.asm
    ├── HDRTools_asm_x64.asm
    ├── MatrixClass.cpp
    ├── MatrixClass.h
    ├── MatrixClass_x64.asm
    ├── MatrixClass_x86.asm
    ├── PlanarFrame.cpp
    ├── PlanarFrame.h
    ├── PlanarFrame_asm.asm
    ├── PlanarFrame_asm_x64.asm
    ├── Plugins_JPSDR.vcxproj
    ├── Plugins_JPSDR.vcxproj.filters
    ├── Plugins_JPSDR.vcxproj.user
    ├── ThreadPool.cpp
    ├── ThreadPool.h
    ├── ThreadPoolDef.h
    ├── ThreadPoolInterface.cpp
    ├── ThreadPoolInterface.h
    ├── TransferFunctions.cpp
    ├── TransferFunctions.h
    ├── aWarpSharp.cpp
    ├── aWarpSharp.h
    ├── aWarpSharp_asm.asm
    ├── aWarpSharp_asm_x64.asm
    ├── avisynth.h
    ├── avs
    │   ├── alignment.h
    │   ├── capi.h
    │   ├── config.h
    │   ├── cpuid.h
    │   ├── filesystem.h
    │   ├── minmax.h
    │   ├── posix.h
    │   ├── types.h
    │   └── win.h
    ├── binary1.bin
    ├── internal.h
    ├── nnedi3.cpp
    ├── nnedi3.h
    ├── nnedi3_asm.asm
    ├── nnedi3_asm_FMA.asm
    ├── nnedi3_asm_FMA_x64.asm
    ├── nnedi3_asm_x64.asm
    ├── plugins_JPSDR.cpp
    ├── plugins_JPSDR.rc
    ├── resample.cpp
    ├── resample.h
    ├── resample_avx2.cpp
    ├── resample_avx2.h
    ├── resample_functions.cpp
    ├── resample_functions.h
    ├── resample_sse.cpp
    ├── resample_sse.h
    └── resource.h
├── README.md
└── plugins_JPSDR - Readme.txt


/Plugins_JPSDR.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 11.00
 3 | # Visual Studio 2010
 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Plugins_JPSDR", "Plugins_JPSDR\Plugins_JPSDR.vcxproj", "{0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}"
 5 | EndProject
 6 | Global
 7 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 8 | 		Debug|Win32 = Debug|Win32
 9 | 		Debug|x64 = Debug|x64
10 | 		Release|Win32 = Release|Win32
11 | 		Release|x64 = Release|x64
12 | 	EndGlobalSection
13 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | 		{0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Debug|Win32.ActiveCfg = Debug|Win32
15 | 		{0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Debug|Win32.Build.0 = Debug|Win32
16 | 		{0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Debug|x64.ActiveCfg = Debug|x64
17 | 		{0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Debug|x64.Build.0 = Debug|x64
18 | 		{0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Release|Win32.ActiveCfg = Release|Win32
19 | 		{0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Release|Win32.Build.0 = Release|Win32
20 | 		{0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Release|x64.ActiveCfg = Release|x64
21 | 		{0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Release|x64.Build.0 = Release|x64
22 | 	EndGlobalSection
23 | 	GlobalSection(SolutionProperties) = preSolution
24 | 		HideSolutionNode = FALSE
25 | 	EndGlobalSection
26 | EndGlobal
27 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/AutoYUY2.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  AutoYUY2()
 3 |  *
 4 |  *  Adaptive YV12 upsampling. Progressive picture areas are upsampled
 5 |  *  progressively and interlaced areas are upsampled interlaced.
 6 |  *  Copyright (C) 2005 Donald A. Graft
 7 |  *  Modified by JPSDR
 8 |  *	
 9 |  *  AutoYUY2 is free software; you can redistribute it and/or modify
10 |  *  it under the terms of the GNU General Public License as published by
11 |  *  the Free Software Foundation; either version 2, or (at your option)
12 |  *  any later version.
13 |  *   
14 |  *  AutoYUY2 is distributed in the hope that it will be useful,
15 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 |  *  GNU General Public License for more details.
18 |  *   
19 |  *  You should have received a copy of the GNU General Public License
20 |  *  along with GNU Make; see the file COPYING.  If not, write to
21 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
22 |  *
23 |  */
24 | 
25 | #include "./avisynth.h"
26 | #include "./ThreadPoolInterface.h"
27 | 
28 | #define AUTOYUY2_VERSION "AutoYUY2 4.1.10 JPSDR"
29 | // Inspired from Neuron2 filter
30 | 
31 | #define Interlaced_Tab_Size 3
32 | 
33 | #define myfree(ptr) if (ptr!=NULL) { free(ptr); ptr=NULL;}
34 | 
35 | 
36 | typedef struct _MT_Data_Info_AutoYUY2
37 | {
38 | 	void *src1,*src2,*src3;
39 | 	void *dst1,*dst2,*dst3;
40 | 	ptrdiff_t src_pitch1,src_pitch2,src_pitch3;
41 | 	ptrdiff_t dst_pitch1,dst_pitch2,dst_pitch3;
42 | 	int32_t src_Y_h_min,src_Y_h_max,src_Y_w;
43 | 	int32_t src_UV_h_min,src_UV_h_max,src_UV_w;
44 | 	int32_t dst_Y_h_min,dst_Y_h_max,dst_Y_w;
45 | 	int32_t dst_UV_h_min,dst_UV_h_max,dst_UV_w;
46 | 	bool top,bottom;
47 | } MT_Data_Info_AutoYUY2;
48 | 
49 | 
50 | class AutoYUY2 : public GenericVideoFilter
51 | {
52 | public:
53 | 	AutoYUY2(PClip _child, int _threshold, int _mode, int _output,uint8_t _threads, bool _sleep, bool negativePrefetch, IScriptEnvironment* env);
54 | 	virtual ~AutoYUY2();
55 |     PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
56 | 
57 | 	int __stdcall SetCacheHints(int cachehints, int frame_range);
58 | 
59 | private:
60 | 	int threshold;
61 | 	int mode;
62 | 	int output;
63 | 	bool sleep;
64 | 	uint16_t *lookup_Upscale8;
65 | 	uint32_t *lookup_Upscale16;
66 | 	bool *interlaced_tab_U[MAX_MT_THREADS][Interlaced_Tab_Size],*interlaced_tab_V[MAX_MT_THREADS][Interlaced_Tab_Size];
67 | 	bool SSE2_Enable,AVX_Enable,AVX2_Enable,has_at_least_v8;
68 | 
69 | 	bool grey,avsp,isRGBPfamily,isAlphaChannel;
70 | 	uint8_t pixelsize; // AVS16
71 | 	uint8_t bits_per_pixel;
72 | 
73 | 	Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
74 | 	MT_Data_Info_AutoYUY2 MT_Data[MAX_MT_THREADS];
75 | 	uint8_t threads,threads_number;
76 | 	uint32_t UserId;
77 | 	
78 | 	ThreadPoolFunction StaticThreadpoolF;
79 | 
80 | 	static void StaticThreadpool(void *ptr);
81 | 
82 | 	void FreeData(void);
83 | };
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/AutoYUY2_AVX2_asm.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ;  AutoYUY2()
  3 | ;
  4 | ;  Adaptive YV12 upsampling. Progressive picture areas are upsampled
  5 | ;  progressively and interlaced areas are upsampled interlaced.
  6 | ;  Copyright (C) 2005 Donald A. Graft
  7 | ;  ASM part made by JPSDR
  8 | ;	
  9 | ;  AutoYUY2 is free software; you can redistribute it and/or modify
 10 | ;  it under the terms of the GNU General Public License as published by
 11 | ;  the Free Software Foundation; either version 2, or (at your option)
 12 | ;  any later version.
 13 | ;   
 14 | ;  AutoYUY2 is distributed in the hope that it will be useful,
 15 | ;  but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | ;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 17 | ;  GNU General Public License for more details.
 18 | ;   
 19 | ;  You should have received a copy of the GNU General Public License
 20 | ;  along with GNU Make; see the file COPYING.  If not, write to
 21 | ;  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 22 | ;
 23 | 
 24 | .586
 25 | .xmm
 26 | .model flat,c
 27 | 
 28 | .code
 29 | 
 30 | 
 31 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword
 32 | 
 33 | 	public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2
 34 | 	
 35 | 	push esi
 36 | 	push edi
 37 | 	push ebx
 38 | 	
 39 | 	vpcmpeqb ymm3,ymm3,ymm3
 40 | 	
 41 | 	mov edi,dst
 42 | 	mov esi,src1
 43 | 	mov edx,src2
 44 | 	xor eax,eax
 45 | 	mov ecx,w32	
 46 | 	mov ebx,32
 47 | 	
 48 | Convert_Planar420_to_Planar422_x3x1_8_AVX2_1:
 49 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
 50 | 	vmovdqa ymm1,YMMWORD ptr[edx+eax]
 51 | 	vpxor ymm2,ymm0,ymm3
 52 | 	vpxor ymm1,ymm1,ymm3
 53 | 	vpavgb ymm2,ymm2,ymm1
 54 | 	vpxor ymm2,ymm2,ymm3
 55 | 	vpavgb ymm2,ymm2,ymm0
 56 | 	
 57 | 	vmovdqa YMMWORD ptr[edi+eax],ymm2
 58 | 	add eax,ebx
 59 | 	loop Convert_Planar420_to_Planar422_x3x1_8_AVX2_1
 60 | 	
 61 | 	vzeroupper
 62 | 	
 63 | 	pop ebx
 64 | 	pop edi
 65 | 	pop esi
 66 | 
 67 | 	ret
 68 | 
 69 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2 endp
 70 | 
 71 | 
 72 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword
 73 | 
 74 | 	public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2
 75 | 	
 76 | 	push esi
 77 | 	push edi
 78 | 	push ebx
 79 | 	
 80 | 	vpcmpeqb ymm3,ymm3,ymm3
 81 | 	
 82 | 	mov edi,dst
 83 | 	mov esi,src1
 84 | 	mov edx,src2
 85 | 	xor eax,eax	
 86 | 	mov ecx,w16
 87 | 	mov ebx,32
 88 | 	
 89 | Convert_Planar420_to_Planar422_x3x1_16_AVX2_1:
 90 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
 91 | 	vmovdqa ymm1,YMMWORD ptr[edx+eax]
 92 | 	vpxor ymm2,ymm0,ymm3
 93 | 	vpxor ymm1,ymm1,ymm3
 94 | 	vpavgw ymm2,ymm2,ymm1
 95 | 	vpxor ymm2,ymm2,ymm3
 96 | 	vpavgw ymm2,ymm2,ymm0
 97 | 	
 98 | 	vmovdqa YMMWORD ptr[edi+eax],ymm2
 99 | 	add eax,ebx
100 | 	loop Convert_Planar420_to_Planar422_x3x1_16_AVX2_1
101 | 	
102 | 	vzeroupper
103 | 	
104 | 	pop ebx
105 | 	pop edi
106 | 	pop esi
107 | 
108 | 	ret
109 | 
110 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2 endp
111 | 
112 | 
113 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword
114 | 
115 | 	public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2
116 | 	
117 | 	push esi
118 | 	push edi
119 | 	push ebx
120 | 	
121 | 	vpcmpeqb ymm3,ymm3,ymm3
122 | 	
123 | 	mov edi,dst
124 | 	mov esi,src1
125 | 	mov edx,src2
126 | 	xor eax,eax
127 | 	mov ecx,w32	
128 | 	mov ebx,32
129 | 	
130 | Convert_Planar420_to_Planar422_x3x5_8_AVX2_1:
131 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
132 | 	vmovdqa ymm1,YMMWORD ptr[edx+eax]
133 | 	vpxor ymm2,ymm0,ymm3
134 | 	vpxor ymm1,ymm1,ymm3
135 | 	vpavgb ymm2,ymm2,ymm1
136 | 	vpavgb ymm2,ymm2,ymm1
137 | 	vpxor ymm2,ymm2,ymm3
138 | 	vpavgb ymm2,ymm2,ymm0
139 | 	
140 | 	vmovdqa YMMWORD ptr[edi+eax],ymm2
141 | 	add eax,ebx
142 | 	loop Convert_Planar420_to_Planar422_x3x5_8_AVX2_1
143 | 	
144 | 	vzeroupper
145 | 	
146 | 	pop ebx
147 | 	pop edi
148 | 	pop esi
149 | 
150 | 	ret
151 | 
152 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2 endp
153 | 
154 | 
155 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword
156 | 
157 | 	public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2
158 | 	
159 | 	push esi
160 | 	push edi
161 | 	push ebx
162 | 	
163 | 	vpcmpeqb ymm3,ymm3,ymm3
164 | 	
165 | 	mov edi,dst
166 | 	mov esi,src1
167 | 	mov edx,src2
168 | 	xor eax,eax
169 | 	mov ecx,w16
170 | 	mov ebx,32
171 | 	
172 | Convert_Planar420_to_Planar422_x3x5_16_AVX2_1:
173 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
174 | 	vmovdqa ymm1,YMMWORD ptr[edx+eax]
175 | 	vpxor ymm2,ymm0,ymm3
176 | 	vpxor ymm1,ymm1,ymm3
177 | 	vpavgw ymm2,ymm2,ymm1
178 | 	vpavgw ymm2,ymm2,ymm1
179 | 	vpxor ymm2,ymm2,ymm3
180 | 	vpavgw ymm2,ymm2,ymm0
181 | 	
182 | 	vmovdqa YMMWORD ptr[edi+eax],ymm2
183 | 	add eax,ebx
184 | 	loop Convert_Planar420_to_Planar422_x3x5_16_AVX2_1
185 | 	
186 | 	vzeroupper
187 | 	
188 | 	pop ebx
189 | 	pop edi
190 | 	pop esi
191 | 
192 | 	ret
193 | 
194 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2 endp
195 | 
196 | 
197 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword
198 | 
199 | 	public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2
200 | 	
201 | 	push esi
202 | 	push edi
203 | 	push ebx
204 | 	
205 | 	vpcmpeqb ymm3,ymm3,ymm3
206 | 	
207 | 	mov edi,dst
208 | 	mov esi,src1
209 | 	mov edx,src2
210 | 	xor eax,eax
211 | 	mov ecx,w32	
212 | 	mov ebx,32
213 | 	
214 | Convert_Planar420_to_Planar422_x7x1_8_AVX2_1:
215 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
216 | 	vmovdqa ymm1,YMMWORD ptr[edx+eax]
217 | 	vpxor ymm2,ymm0,ymm3
218 | 	vpxor ymm1,ymm1,ymm3
219 | 	vpavgb ymm1,ymm1,ymm2
220 | 	vpavgb ymm1,ymm1,ymm2
221 | 	vpxor ymm1,ymm1,ymm3
222 | 	vpavgb ymm1,ymm1,ymm0
223 | 	vmovdqa YMMWORD ptr[edi+eax],ymm1
224 | 	add eax,ebx
225 | 	loop Convert_Planar420_to_Planar422_x7x1_8_AVX2_1
226 | 	
227 | 	vzeroupper
228 | 	
229 | 	pop ebx
230 | 	pop edi
231 | 	pop esi
232 | 
233 | 	ret
234 | 
235 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2 endp
236 | 
237 | 
238 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword
239 | 
240 | 	public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2
241 | 	
242 | 	push esi
243 | 	push edi
244 | 	push ebx
245 | 	
246 | 	vpcmpeqb ymm3,ymm3,ymm3
247 | 	
248 | 	mov edi,dst
249 | 	mov esi,src1
250 | 	mov edx,src2
251 | 	xor eax,eax
252 | 	mov ecx,w16
253 | 	mov ebx,32
254 | 	
255 | Convert_Planar420_to_Planar422_x7x1_16_AVX2_1:
256 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
257 | 	vmovdqa ymm1,YMMWORD ptr[edx+eax]
258 | 	vpxor ymm2,ymm0,ymm3
259 | 	vpxor ymm1,ymm1,ymm3
260 | 	vpavgw ymm1,ymm1,ymm2
261 | 	vpavgw ymm1,ymm1,ymm2
262 | 	vpxor ymm1,ymm1,ymm3
263 | 	vpavgw ymm1,ymm1,ymm0
264 | 	vmovdqa YMMWORD ptr[edi+eax],ymm1
265 | 	add eax,ebx
266 | 	loop Convert_Planar420_to_Planar422_x7x1_16_AVX2_1
267 | 	
268 | 	vzeroupper
269 | 	
270 | 	pop ebx
271 | 	pop edi
272 | 	pop esi
273 | 
274 | 	ret
275 | 
276 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2 endp
277 | 
278 | 
279 | end
280 | 
281 | 
282 | 
283 | 
284 | 
285 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/AutoYUY2_AVX2_asm_x64.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ;  AutoYUY2()
  3 | ;
  4 | ;  Adaptive YV12 upsampling. Progressive picture areas are upsampled
  5 | ;  progressively and interlaced areas are upsampled interlaced.
  6 | ;  Copyright (C) 2005 Donald A. Graft
  7 | ;  ASM part made by JPSDR
  8 | ;	
  9 | ;  AutoYUY2 is free software; you can redistribute it and/or modify
 10 | ;  it under the terms of the GNU General Public License as published by
 11 | ;  the Free Software Foundation; either version 2, or (at your option)
 12 | ;  any later version.
 13 | ;   
 14 | ;  AutoYUY2 is distributed in the hope that it will be useful,
 15 | ;  but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | ;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 17 | ;  GNU General Public License for more details.
 18 | ;   
 19 | ;  You should have received a copy of the GNU General Public License
 20 | ;  along with GNU Make; see the file COPYING.  If not, write to
 21 | ;  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 22 | ;
 23 | 
 24 | .data
 25 | 
 26 | .code
 27 | 
 28 | 
 29 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword
 30 | ; src1 = rcx
 31 | ; src2 = rdx
 32 | ; dst = r8
 33 | ; w32 = r9d
 34 | 
 35 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2 proc public frame
 36 | 
 37 | 	.endprolog
 38 | 		
 39 | 	vpcmpeqb ymm3,ymm3,ymm3
 40 | 	
 41 | 	mov r10,rcx				; r10=src1
 42 | 	xor rcx,rcx
 43 | 	xor rax,rax	
 44 | 	mov ecx,r9d	
 45 | 	mov r11,32
 46 | 	
 47 | Convert_Planar420_to_Planar422_x3x1_8_AVX2_1:
 48 | 	vmovdqa ymm0,YMMWORD ptr[r10+rax]
 49 | 	vmovdqa ymm1,YMMWORD ptr[rdx+rax]
 50 | 	vpxor ymm2,ymm0,ymm3
 51 | 	vpxor ymm1,ymm1,ymm3
 52 | 	vpavgb ymm2,ymm2,ymm1
 53 | 	vpxor ymm2,ymm2,ymm3
 54 | 	vpavgb ymm2,ymm2,ymm0
 55 | 	
 56 | 	vmovdqa YMMWORD ptr[r8+rax],ymm2
 57 | 	add rax,r11
 58 | 	loop Convert_Planar420_to_Planar422_x3x1_8_AVX2_1
 59 | 	
 60 | 	vzeroupper
 61 | 	
 62 | 	ret
 63 | 
 64 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2 endp
 65 | 
 66 | 
 67 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword
 68 | ; src1 = rcx
 69 | ; src2 = rdx
 70 | ; dst = r8
 71 | ; w16 = r9d
 72 | 
 73 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2 proc public frame
 74 | 
 75 | 	.endprolog
 76 | 		
 77 | 	vpcmpeqb ymm3,ymm3,ymm3
 78 | 	
 79 | 	mov r10,rcx				; r10=src1
 80 | 	xor rcx,rcx
 81 | 	xor rax,rax	
 82 | 	mov ecx,r9d	
 83 | 	mov r11,32
 84 | 	
 85 | Convert_Planar420_to_Planar422_x3x1_16_AVX2_1:
 86 | 	vmovdqa ymm0,YMMWORD ptr[r10+rax]
 87 | 	vmovdqa ymm1,YMMWORD ptr[rdx+rax]
 88 | 	vpxor ymm2,ymm0,ymm3
 89 | 	vpxor ymm1,ymm1,ymm3
 90 | 	vpavgw ymm2,ymm2,ymm1
 91 | 	vpxor ymm2,ymm2,ymm3
 92 | 	vpavgw ymm2,ymm2,ymm0
 93 | 	
 94 | 	vmovdqa YMMWORD ptr[r8+rax],ymm2
 95 | 	add rax,r11
 96 | 	loop Convert_Planar420_to_Planar422_x3x1_16_AVX2_1
 97 | 	
 98 | 	vzeroupper
 99 | 	
100 | 	ret
101 | 
102 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2 endp
103 | 
104 | 
105 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword
106 | ; src1 = rcx
107 | ; src2 = rdx
108 | ; dst = r8
109 | ; w32 = r9d
110 | 
111 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2 proc public frame
112 | 
113 | 	.endprolog
114 | 		
115 | 	vpcmpeqb ymm3,ymm3,ymm3
116 | 	
117 | 	mov r10,rcx				; r10=src1
118 | 	xor rcx,rcx
119 | 	xor rax,rax	
120 | 	mov ecx,r9d	
121 | 	mov r11,32
122 | 	
123 | Convert_Planar420_to_Planar422_x3x5_8_AVX2_1:
124 | 	vmovdqa ymm0,YMMWORD ptr[r10+rax]
125 | 	vmovdqa ymm1,YMMWORD ptr[rdx+rax]
126 | 	vpxor ymm2,ymm0,ymm3
127 | 	vpxor ymm1,ymm1,ymm3
128 | 	vpavgb ymm2,ymm2,ymm1
129 | 	vpavgb ymm2,ymm2,ymm1
130 | 	vpxor ymm2,ymm2,ymm3
131 | 	vpavgb ymm2,ymm2,ymm0
132 | 	
133 | 	vmovdqa YMMWORD ptr[r8+rax],ymm2
134 | 	add rax,r11
135 | 	loop Convert_Planar420_to_Planar422_x3x5_8_AVX2_1
136 | 	
137 | 	vzeroupper
138 | 	
139 | 	ret
140 | 
141 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2 endp
142 | 
143 | 
144 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword
145 | ; src1 = rcx
146 | ; src2 = rdx
147 | ; dst = r8
148 | ; w16 = r9d
149 | 
150 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2 proc public frame
151 | 
152 | 	.endprolog
153 | 		
154 | 	vpcmpeqb ymm3,ymm3,ymm3
155 | 	
156 | 	mov r10,rcx				; r10=src1
157 | 	xor rcx,rcx
158 | 	xor rax,rax	
159 | 	mov ecx,r9d	
160 | 	mov r11,32
161 | 	
162 | Convert_Planar420_to_Planar422_x3x5_16_AVX2_1:
163 | 	vmovdqa ymm0,YMMWORD ptr[r10+rax]
164 | 	vmovdqa ymm1,YMMWORD ptr[rdx+rax]
165 | 	vpxor ymm2,ymm0,ymm3
166 | 	vpxor ymm1,ymm1,ymm3
167 | 	vpavgw ymm2,ymm2,ymm1
168 | 	vpavgw ymm2,ymm2,ymm1
169 | 	vpxor ymm2,ymm2,ymm3
170 | 	vpavgw ymm2,ymm2,ymm0
171 | 	
172 | 	vmovdqa YMMWORD ptr[r8+rax],ymm2
173 | 	add rax,r11
174 | 	loop Convert_Planar420_to_Planar422_x3x5_16_AVX2_1
175 | 	
176 | 	vzeroupper
177 | 	
178 | 	ret
179 | 
180 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2 endp
181 | 
182 | 
183 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword
184 | ; src1 = rcx
185 | ; src2 = rdx
186 | ; dst = r8
187 | ; w32 = r9d
188 | 
189 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2 proc public frame
190 | 
191 | 	.endprolog
192 | 		
193 | 	vpcmpeqb ymm3,ymm3,ymm3
194 | 	
195 | 	mov r10,rcx				; r10=src1
196 | 	xor rcx,rcx
197 | 	xor rax,rax	
198 | 	mov ecx,r9d	
199 | 	mov r11,32
200 | 	
201 | Convert_Planar420_to_Planar422_x7x1_8_AVX2_1:
202 | 	vmovdqa ymm0,YMMWORD ptr[r10+rax]
203 | 	vmovdqa ymm1,YMMWORD ptr[rdx+rax]
204 | 	vpxor ymm2,ymm0,ymm3
205 | 	vpxor ymm1,ymm1,ymm3
206 | 	vpavgb ymm1,ymm1,ymm2
207 | 	vpavgb ymm1,ymm1,ymm2
208 | 	vpxor ymm1,ymm1,ymm3
209 | 	vpavgb ymm1,ymm1,ymm0
210 | 	vmovdqa YMMWORD ptr[r8+rax],ymm1
211 | 	add rax,r11
212 | 	loop Convert_Planar420_to_Planar422_x7x1_8_AVX2_1
213 | 	
214 | 	vzeroupper
215 | 	
216 | 	ret
217 | 
218 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2 endp
219 | 
220 | 
221 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword
222 | ; src1 = rcx
223 | ; src2 = rdx
224 | ; dst = r8
225 | ; w16 = r9d
226 | 
227 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2 proc public frame
228 | 
229 | 	.endprolog
230 | 		
231 | 	vpcmpeqb ymm3,ymm3,ymm3
232 | 	
233 | 	mov r10,rcx				; r10=src1
234 | 	xor rcx,rcx
235 | 	xor rax,rax	
236 | 	mov ecx,r9d	
237 | 	mov r11,32
238 | 	
239 | Convert_Planar420_to_Planar422_x7x1_16_AVX2_1:
240 | 	vmovdqa ymm0,YMMWORD ptr[r10+rax]
241 | 	vmovdqa ymm1,YMMWORD ptr[rdx+rax]
242 | 	vpxor ymm2,ymm0,ymm3
243 | 	vpxor ymm1,ymm1,ymm3
244 | 	vpavgw ymm1,ymm1,ymm2
245 | 	vpavgw ymm1,ymm1,ymm2
246 | 	vpxor ymm1,ymm1,ymm3
247 | 	vpavgw ymm1,ymm1,ymm0
248 | 	vmovdqa YMMWORD ptr[r8+rax],ymm1
249 | 	add rax,r11
250 | 	loop Convert_Planar420_to_Planar422_x7x1_16_AVX2_1
251 | 	
252 | 	vzeroupper
253 | 	
254 | 	ret
255 | 
256 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2 endp
257 | 
258 | 
259 | end
260 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/HDRTools_AVX2_asm.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ;  HDRTools()
  3 | ;
  4 | ;  Several functions for working on HDR data, and linear to non-linear convertions.
  5 | ;  Copyright (C) 2018 JPSDR
  6 | ;	
  7 | ;  HDRTools is free software; you can redistribute it and/or modify
  8 | ;  it under the terms of the GNU General Public License as published by
  9 | ;  the Free Software Foundation; either version 2, or (at your option)
 10 | ;  any later version.
 11 | ;   
 12 | ;  HDRTools is distributed in the hope that it will be useful,
 13 | ;  but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | ;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | ;  GNU General Public License for more details.
 16 | ;   
 17 | ;  You should have received a copy of the GNU General Public License
 18 | ;  along with GNU Make; see the file COPYING.  If not, write to
 19 | ;  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 20 | ;
 21 | ;
 22 | 
 23 | .586
 24 | .xmm
 25 | .model flat,c
 26 | 
 27 | .data
 28 | 
 29 | align 16
 30 | 
 31 | data segment align(32)
 32 | 
 33 | data_f_1048575 real4 8 dup(1048575.0)
 34 | data_f_65535 real4 8 dup(65535.0)
 35 | data_dw_1048575 dword 8 dup(1048575)
 36 | data_dw_65535 dword 8 dup(65535)
 37 | data_dw_0 dword 8 dup(0)
 38 | 
 39 | data_w_128 word 16 dup(128)
 40 | data_w_32 word 16 dup(32)
 41 | data_w_8 word 16 dup(8)
 42 | 
 43 | .code
 44 | 
 45 | 
 46 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword
 47 | 
 48 | 	public JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2
 49 | 	
 50 | 	push esi
 51 | 	push edi
 52 | 	push ebx
 53 | 	
 54 | 	vpcmpeqb ymm3,ymm3,ymm3
 55 | 	
 56 | 	mov edi,dst
 57 | 	mov esi,src1
 58 | 	mov edx,src2
 59 | 	xor eax,eax
 60 | 	mov ecx,w	
 61 | 	mov ebx,32
 62 | 	
 63 | Convert_Planar420_to_Planar422_8_AVX2_1:
 64 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
 65 | 	vmovdqa ymm1,YMMWORD ptr[edx+eax]
 66 | 	vpxor ymm2,ymm0,ymm3
 67 | 	vpxor ymm1,ymm1,ymm3
 68 | 	vpavgb ymm2,ymm2,ymm1
 69 | 	vpxor ymm2,ymm2,ymm3
 70 | 	vpavgb ymm2,ymm2,ymm0
 71 | 	
 72 | 	vmovdqa YMMWORD ptr[edi+eax],ymm2
 73 | 	add eax,ebx
 74 | 	loop Convert_Planar420_to_Planar422_8_AVX2_1
 75 | 	
 76 | 	vzeroupper
 77 | 	
 78 | 	pop ebx
 79 | 	pop edi
 80 | 	pop esi
 81 | 
 82 | 	ret
 83 | 
 84 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 endp
 85 | 
 86 | 
 87 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword
 88 | 
 89 | 	public JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2
 90 | 	
 91 | 	push esi
 92 | 	push edi
 93 | 	push ebx
 94 | 	
 95 | 	vpcmpeqb ymm3,ymm3,ymm3
 96 | 	
 97 | 	mov edi,dst
 98 | 	mov esi,src1
 99 | 	mov edx,src2
100 | 	xor eax,eax	
101 | 	mov ecx,w	
102 | 	mov ebx,32
103 | 	
104 | Convert_Planar420_to_Planar422_16_AVX2_1:
105 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
106 | 	vmovdqa ymm1,YMMWORD ptr[edx+eax]
107 | 	vpxor ymm2,ymm0,ymm3
108 | 	vpxor ymm1,ymm1,ymm3
109 | 	vpavgw ymm2,ymm2,ymm1
110 | 	vpxor ymm2,ymm2,ymm3
111 | 	vpavgw ymm2,ymm2,ymm0
112 | 	
113 | 	vmovdqa YMMWORD ptr[edi+eax],ymm2
114 | 	add eax,ebx
115 | 	loop Convert_Planar420_to_Planar422_16_AVX2_1
116 | 	
117 | 	vzeroupper
118 | 	
119 | 	pop ebx
120 | 	pop edi
121 | 	pop esi
122 | 
123 | 	ret
124 | 
125 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 endp
126 | 
127 | 
128 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword,h:dword,src_pitch2:dword,dst_pitch:dword
129 | 
130 | 	public JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2
131 | 	
132 | 	push esi
133 | 	push edi
134 | 	push ebx
135 | 	
136 | 	mov edi,dst
137 | 	mov esi,src1
138 | 	mov edx,src2
139 | 	mov ebx,32
140 | 	
141 | Convert_Planar422_to_Planar420_8_AVX2_1:
142 | 	xor eax,eax
143 | 	mov ecx,w32
144 | 
145 | Convert_Planar422_to_Planar420_8_AVX2_2:
146 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
147 | 	vpavgb ymm0,ymm0,YMMWORD ptr[edx+eax]
148 | 	
149 | 	vmovdqa YMMWORD ptr[edi+eax],ymm0
150 | 	add eax,ebx
151 | 	loop Convert_Planar422_to_Planar420_8_AVX2_2
152 | 	
153 | 	add esi,src_pitch2
154 | 	add edx,src_pitch2
155 | 	add edi,dst_pitch
156 | 	dec h
157 | 	jnz short Convert_Planar422_to_Planar420_8_AVX2_1
158 | 	
159 | 	vzeroupper
160 | 	
161 | 	pop ebx
162 | 	pop edi
163 | 	pop esi
164 | 
165 | 	ret
166 | 
167 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 endp
168 | 
169 | 
170 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword,h:dword,src_pitch2:dword,dst_pitch:dword
171 | 
172 | 	public JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2
173 | 	
174 | 	push esi
175 | 	push edi
176 | 	push ebx
177 | 	
178 | 	mov edi,dst
179 | 	mov esi,src1
180 | 	mov edx,src2
181 | 	mov ebx,32
182 | 	
183 | Convert_Planar422_to_Planar420_16_AVX2_1:
184 | 	xor eax,eax
185 | 	mov ecx,w16
186 | 
187 | Convert_Planar422_to_Planar420_16_AVX2_2:
188 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
189 | 	vpavgw ymm0,ymm0,YMMWORD ptr[edx+eax]
190 | 	
191 | 	vmovdqa YMMWORD ptr[edi+eax],ymm0
192 | 	add eax,ebx
193 | 	loop Convert_Planar422_to_Planar420_16_AVX2_2
194 | 	
195 | 	add esi,src_pitch2
196 | 	add edx,src_pitch2
197 | 	add edi,dst_pitch
198 | 	dec h
199 | 	jnz short Convert_Planar422_to_Planar420_16_AVX2_1
200 | 	
201 | 	vzeroupper
202 | 	
203 | 	pop ebx
204 | 	pop edi
205 | 	pop esi
206 | 
207 | 	ret
208 | 
209 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 endp
210 | 
211 | 
212 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword,
213 | 	ValMin:dword,Coeff:dword
214 | 
215 | 	public JPSDR_HDRTools_Scale_20_XYZ_AVX2
216 | 
217 | 	push esi
218 | 	push edi
219 | 	push ebx
220 | 	
221 | 	mov esi,ValMin
222 | 	vmovss xmm1,dword ptr[esi]
223 | 	vshufps xmm1,xmm1,xmm1,0
224 | 	vinsertf128 ymm1,ymm1,xmm1,1
225 | 	mov esi,Coeff
226 | 	vmovss xmm2,dword ptr[esi]
227 | 	vshufps xmm2,xmm2,xmm2,0
228 | 	vinsertf128 ymm2,ymm2,xmm2,1
229 | 	
230 | 	vmovdqa ymm3,YMMWORD ptr data_dw_1048575
231 | 	vmovdqa ymm4,YMMWORD ptr data_dw_0
232 | 	vmulps ymm2,ymm2,YMMWORD ptr data_f_1048575
233 | 	
234 | 	mov esi,src
235 | 	mov edi,dst
236 | 	mov ebx,w8
237 | 	mov edx,32
238 | 	
239 | Scale_20_XYZ_AVX2_1:
240 | 	xor eax,eax
241 | 	mov ecx,ebx
242 | Scale_20_XYZ_AVX2_2:	
243 | 	vaddps ymm0,ymm1,YMMWORD ptr[esi+eax]
244 | 	vmulps ymm0,ymm0,ymm2
245 | 	vcvtps2dq ymm0,ymm0
246 | 	vpminsd ymm0,ymm0,ymm3
247 | 	vpmaxsd ymm0,ymm0,ymm4
248 | 	vmovdqa YMMWORD ptr[edi+eax],ymm0
249 | 	
250 | 	add eax,edx
251 | 	loop Scale_20_XYZ_AVX2_2
252 | 	
253 | 	add esi,src_pitch
254 | 	add edi,dst_pitch
255 | 	dec h
256 | 	jnz short Scale_20_XYZ_AVX2_1
257 | 	
258 | 	vzeroupper
259 | 	
260 | 	pop ebx
261 | 	pop edi
262 | 	pop esi
263 | 
264 | 	ret
265 | 
266 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 endp
267 | 
268 | 
269 | JPSDR_HDRTools_Scale_20_RGB_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword
270 | 
271 | 	public JPSDR_HDRTools_Scale_20_RGB_AVX2
272 | 
273 | 	push esi
274 | 	push edi
275 | 	push ebx
276 | 	
277 | 	vmovaps ymm1,YMMWORD ptr data_f_1048575
278 | 	vmovdqa ymm2,YMMWORD ptr data_dw_1048575
279 | 	vmovdqa ymm3,YMMWORD ptr data_dw_0
280 | 	
281 | 	mov esi,src
282 | 	mov edi,dst
283 | 	mov ebx,w8
284 | 	mov edx,32
285 | 	
286 | Scale_20_RGB_AVX2_1:
287 | 	xor eax,eax
288 | 	mov ecx,ebx
289 | Scale_20_RGB_AVX2_2:	
290 | 	vmulps ymm0,ymm1,YMMWORD ptr[esi+eax]
291 | 	vcvtps2dq ymm0,ymm0
292 | 	vpminsd ymm0,ymm0,ymm2
293 | 	vpmaxsd ymm0,ymm0,ymm3
294 | 	vmovdqa YMMWORD ptr[edi+eax],ymm0
295 | 	
296 | 	add eax,edx
297 | 	loop Scale_20_RGB_AVX2_2
298 | 	
299 | 	add esi,src_pitch
300 | 	add edi,dst_pitch
301 | 	dec h
302 | 	jnz short Scale_20_RGB_AVX2_1
303 | 	
304 | 	vzeroupper
305 | 	
306 | 	pop ebx
307 | 	pop edi
308 | 	pop esi
309 | 
310 | 	ret
311 | 
312 | JPSDR_HDRTools_Scale_20_RGB_AVX2 endp
313 | 
314 | 
315 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
316 | 	src_pitch:dword,dst_pitch:dword
317 | 
318 | 	public JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2
319 | 
320 | 	push esi
321 | 	push edi
322 | 	push ebx
323 | 
324 | 	vmovdqa ymm1,YMMWORD ptr data_w_128
325 | 
326 | 	mov esi,src
327 | 	mov edi,dst
328 | 	mov ebx,w
329 | 	shr ebx,2
330 | 	mov edx,32
331 | 
332 | Convert_RGB64_16toRGB64_8_AVX2_1:
333 | 	mov ecx,ebx
334 | 	xor eax,eax
335 | 	or ecx,ecx
336 | 	jz short Convert_RGB64_16toRGB64_8_AVX2_3
337 | 	
338 | Convert_RGB64_16toRGB64_8_AVX2_2:
339 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
340 | 	vpaddusw ymm0,ymm0,ymm1
341 | 	vpsrlw ymm0,ymm0,8
342 | 	vmovdqa YMMWORD ptr[edi+eax],ymm0
343 | 	add eax,edx
344 | 	loop Convert_RGB64_16toRGB64_8_AVX2_2
345 | 
346 | Convert_RGB64_16toRGB64_8_AVX2_3:
347 | 	test w,2
348 | 	jz short Convert_RGB64_16toRGB64_8_AVX2_4
349 | 	
350 | 	vmovdqa xmm0,XMMWORD ptr[esi+eax]
351 | 	vpaddusw xmm0,xmm0,xmm1
352 | 	vpsrlw xmm0,xmm0,8
353 | 	vmovdqa XMMWORD ptr[edi+eax],xmm0
354 | 	
355 | 	add eax,16
356 | 
357 | Convert_RGB64_16toRGB64_8_AVX2_4:
358 | 	test w,1
359 | 	jz short Convert_RGB64_16toRGB64_8_AVX2_5
360 | 	
361 | 	vmovq xmm0,qword ptr[esi+eax]
362 | 	vpaddusw xmm0,xmm0,xmm1
363 | 	vpsrlw xmm0,xmm0,8
364 | 	vmovq qword ptr[edi+eax],xmm0
365 | 	
366 | Convert_RGB64_16toRGB64_8_AVX2_5:
367 | 	add esi,src_pitch
368 | 	add edi,dst_pitch
369 | 	dec h
370 | 	jnz short Convert_RGB64_16toRGB64_8_AVX2_1
371 | 	
372 | 	pop ebx
373 | 	pop edi
374 | 	pop esi
375 | 
376 | 	ret
377 | 
378 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 endp	
379 | 
380 | 
381 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
382 | 	src_pitch:dword,dst_pitch:dword
383 | 
384 | 	public JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2
385 | 
386 | 	push esi
387 | 	push edi
388 | 	push ebx
389 | 
390 | 	vmovdqa ymm1,YMMWORD ptr data_w_32
391 | 
392 | 	mov esi,src
393 | 	mov edi,dst
394 | 	mov ebx,w
395 | 	shr ebx,2
396 | 	mov edx,32
397 | 
398 | Convert_RGB64_16toRGB64_10_AVX2_1:
399 | 	mov ecx,ebx
400 | 	xor eax,eax
401 | 	or ecx,ecx
402 | 	jz short Convert_RGB64_16toRGB64_10_AVX2_3
403 | 	
404 | Convert_RGB64_16toRGB64_10_AVX2_2:
405 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
406 | 	vpaddusw ymm0,ymm0,ymm1
407 | 	vpsrlw ymm0,ymm0,6
408 | 	vmovdqa YMMWORD ptr[edi+eax],ymm0
409 | 	add eax,edx
410 | 	loop Convert_RGB64_16toRGB64_10_AVX2_2
411 | 
412 | Convert_RGB64_16toRGB64_10_AVX2_3:
413 | 	test w,2
414 | 	jz short Convert_RGB64_16toRGB64_10_AVX2_4
415 | 	
416 | 	vmovdqa xmm0,XMMWORD ptr[esi+eax]
417 | 	vpaddusw xmm0,xmm0,xmm1
418 | 	vpsrlw xmm0,xmm0,6
419 | 	vmovdqa XMMWORD ptr[edi+eax],xmm0
420 | 	
421 | 	add eax,16
422 | 
423 | Convert_RGB64_16toRGB64_10_AVX2_4:
424 | 	test w,1
425 | 	jz short Convert_RGB64_16toRGB64_10_AVX2_5
426 | 	
427 | 	vmovq xmm0,qword ptr[esi+eax]
428 | 	vpaddusw xmm0,xmm0,xmm1
429 | 	vpsrlw xmm0,xmm0,6
430 | 	vmovq qword ptr[edi+eax],xmm0
431 | 	
432 | Convert_RGB64_16toRGB64_10_AVX2_5:
433 | 	add esi,src_pitch
434 | 	add edi,dst_pitch
435 | 	dec h
436 | 	jnz short Convert_RGB64_16toRGB64_10_AVX2_1
437 | 	
438 | 	pop ebx
439 | 	pop edi
440 | 	pop esi
441 | 
442 | 	ret
443 | 
444 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 endp	
445 | 
446 | 
447 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
448 | 	src_pitch:dword,dst_pitch:dword
449 | 
450 | 	public JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2
451 | 
452 | 	push esi
453 | 	push edi
454 | 	push ebx
455 | 
456 | 	vmovdqa ymm1,YMMWORD ptr data_w_8
457 | 
458 | 	mov esi,src
459 | 	mov edi,dst
460 | 	mov ebx,w
461 | 	shr ebx,2
462 | 	mov edx,32
463 | 
464 | Convert_RGB64_16toRGB64_12_AVX2_1:
465 | 	mov ecx,ebx
466 | 	xor eax,eax
467 | 	or ecx,ecx
468 | 	jz short Convert_RGB64_16toRGB64_12_AVX2_3
469 | 	
470 | Convert_RGB64_16toRGB64_12_AVX2_2:
471 | 	vmovdqa ymm0,YMMWORD ptr[esi+eax]
472 | 	vpaddusw ymm0,ymm0,ymm1
473 | 	vpsrlw ymm0,ymm0,4
474 | 	vmovdqa YMMWORD ptr[edi+eax],ymm0
475 | 	add eax,edx
476 | 	loop Convert_RGB64_16toRGB64_12_AVX2_2
477 | 
478 | Convert_RGB64_16toRGB64_12_AVX2_3:
479 | 	test w,2
480 | 	jz short Convert_RGB64_16toRGB64_12_AVX2_4
481 | 	
482 | 	vmovdqa xmm0,XMMWORD ptr[esi+eax]
483 | 	vpaddusw xmm0,xmm0,xmm1
484 | 	vpsrlw xmm0,xmm0,4
485 | 	vmovdqa XMMWORD ptr[edi+eax],xmm0
486 | 	
487 | 	add eax,16
488 | 
489 | Convert_RGB64_16toRGB64_12_AVX2_4:
490 | 	test w,1
491 | 	jz short Convert_RGB64_16toRGB64_12_AVX2_5
492 | 	
493 | 	vmovq xmm0,qword ptr[esi+eax]
494 | 	vpaddusw xmm0,xmm0,xmm1
495 | 	vpsrlw xmm0,xmm0,4
496 | 	vmovq qword ptr[edi+eax],xmm0
497 | 	
498 | Convert_RGB64_16toRGB64_12_AVX2_5:
499 | 	add esi,src_pitch
500 | 	add edi,dst_pitch
501 | 	dec h
502 | 	jnz short Convert_RGB64_16toRGB64_12_AVX2_1
503 | 	
504 | 	pop ebx
505 | 	pop edi
506 | 	pop esi
507 | 
508 | 	ret
509 | 
510 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 endp
511 | 
512 | 
513 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc dst:dword,srcY:dword,w:dword,h:dword,dst_pitch:dword,src_pitchY:dword
514 | 
515 | 	public JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2
516 | 
517 | 	push esi
518 | 	push edi
519 | 	push ebx
520 | 	
521 | 	mov ebx,w
522 | 	shr ebx,1
523 | 	mov esi,srcY
524 | 	mov edi,dst
525 | 	mov edx,8
526 | 	pxor xmm4,xmm4
527 | 	
528 | Convert_16_RGB64_HLG_OOTF_AVX2_1:
529 | 	mov ecx,ebx
530 | 	xor eax,eax
531 | 	or ecx,ecx
532 | 	jz short Convert_16_RGB64_HLG_OOTF_AVX2_3
533 | 	
534 | Convert_16_RGB64_HLG_OOTF_AVX2_2:
535 | 	vmovss xmm0,dword ptr[esi+eax]
536 | 	vmovss xmm1,dword ptr[esi+eax+4]
537 | 	vshufps xmm0,xmm0,xmm0,0
538 | 	vshufps xmm1,xmm1,xmm1,0
539 | 	vmovdqa xmm2,XMMWORD ptr[edi+2*eax]
540 | 	vinsertf128 ymm0,ymm0,xmm1,1
541 | 	vpunpckhwd xmm3,xmm2,xmm4
542 | 	vpunpcklwd xmm2,xmm2,xmm4
543 | 	vinserti128 ymm2,ymm2,xmm3,1
544 | 	vcvtdq2ps ymm2,ymm2
545 | 	vmulps ymm2,ymm2,ymm0
546 | 	vcvtps2dq ymm2,ymm2
547 | 	vextracti128 xmm3,ymm2,1
548 | 	vpackusdw xmm2,xmm2,xmm3
549 | 	vmovdqa XMMWORD ptr[edi+2*eax],xmm2
550 | 	
551 | 	add eax,edx
552 | 	loop Convert_16_RGB64_HLG_OOTF_AVX2_2
553 | 	
554 | Convert_16_RGB64_HLG_OOTF_AVX2_3:
555 | 	test w,1
556 | 	jz short Convert_16_RGB64_HLG_OOTF_AVX2_4
557 | 	
558 | 	vmovss xmm0,dword ptr[esi+eax]
559 | 	vshufps xmm0,xmm0,xmm0,0
560 | 	vmovq xmm2,qword ptr[edi+2*eax]
561 | 	vpunpcklwd xmm2,xmm2,xmm4
562 | 	vcvtdq2ps xmm2,xmm2
563 | 	vmulps xmm2,xmm2,xmm0
564 | 	vcvtps2dq xmm2,xmm2
565 | 	vpackusdw xmm2,xmm2,xmm2
566 | 	vmovq qword ptr[edi+2*eax],xmm2
567 | 	
568 | Convert_16_RGB64_HLG_OOTF_AVX2_4:
569 | 	add edi,dst_pitch
570 | 	add esi,src_pitchY
571 | 	dec h
572 | 	jnz Convert_16_RGB64_HLG_OOTF_AVX2_1
573 | 	
574 | 	vzeroupper
575 | 	
576 | 	pop ebx
577 | 	pop edi
578 | 	pop esi
579 | 
580 | 	ret
581 | 
582 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 endp
583 | 
584 | 
585 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc src:dword,dst1:dword,dst2:dword,w8:dword,h:dword,src_pitch:dword,
586 | 	dst_pitch1:dword,dst_pitch2:dword,ValMinX:dword,CoeffX:dword,ValMinZ:dword,CoeffZ:dword
587 | 
588 | 	public JPSDR_HDRTools_BT2446C_16_XYZ_AVX2
589 | 
590 | 	push esi
591 | 	push edi
592 | 	push ebx
593 | 	
594 | 	mov esi,ValMinX
595 | 	vmovss xmm2,dword ptr[esi]
596 | 	vshufps xmm2,xmm2,xmm2,0
597 | 	vinsertf128 ymm1,ymm1,xmm1,1
598 | 	mov esi,CoeffX
599 | 	vmovss xmm3,dword ptr[esi]
600 | 	vshufps xmm3,xmm3,xmm3,0
601 | 	vinsertf128 ymm3,ymm3,xmm3,1
602 | 
603 | 	mov esi,ValMinZ
604 | 	vmovss xmm4,dword ptr[esi]
605 | 	vshufps xmm4,xmm4,xmm4,0
606 | 	vinsertf128 ymm4,ymm4,xmm4,1
607 | 	mov esi,CoeffZ
608 | 	vmovss xmm5,dword ptr[esi]
609 | 	vshufps xmm5,xmm5,xmm5,0
610 | 	vinsertf128 ymm5,ymm5,xmm5,1
611 | 	
612 | 	vmovdqa ymm6,YMMWORD ptr data_dw_65535
613 | 	vmovdqa ymm7,YMMWORD ptr data_dw_0
614 | 	vmulps ymm3,ymm3,YMMWORD ptr data_f_65535
615 | 	vmulps ymm5,ymm5,YMMWORD ptr data_f_65535
616 | 	
617 | 	mov esi,src
618 | 	mov edi,dst1
619 | 	mov edx,dst2
620 | 	mov ebx,32
621 | 	
622 | BT2446C_16_XYZ_AVX2_1:
623 | 	xor eax,eax
624 | 	mov ecx,w8
625 | BT2446C_16_XYZ_AVX2_2:
626 | 	vmovaps ymm0,YMMWORD ptr[edi+eax]
627 | 	vmovaps ymm1,YMMWORD ptr[edx+eax]
628 | 	vmulps ymm0,ymm0,YMMWORD ptr[esi+eax]
629 | 	vmulps ymm1,ymm1,YMMWORD ptr[esi+eax]
630 | 	vaddps ymm0,ymm0,ymm2
631 | 	vaddps ymm1,ymm1,ymm4
632 | 	vmulps ymm0,ymm0,ymm3
633 | 	vmulps ymm1,ymm1,ymm5
634 | 	vcvtps2dq ymm0,ymm0
635 | 	vcvtps2dq ymm1,ymm1
636 | 	vpminsd ymm0,ymm0,ymm6
637 | 	vpminsd ymm1,ymm1,ymm6
638 | 	vpmaxsd ymm0,ymm0,ymm7
639 | 	vpmaxsd ymm1,ymm1,ymm7
640 | 	vmovdqa YMMWORD ptr[edi+eax],ymm0
641 | 	vmovdqa YMMWORD ptr[edx+eax],ymm1
642 | 	
643 | 	add eax,ebx
644 | 	loop BT2446C_16_XYZ_AVX2_2
645 | 	
646 | 	add esi,src_pitch
647 | 	add edi,dst_pitch1
648 | 	add edx,dst_pitch2
649 | 	dec h
650 | 	jnz short BT2446C_16_XYZ_AVX2_1
651 | 	
652 | 	vzeroupper
653 | 	
654 | 	pop ebx
655 | 	pop edi
656 | 	pop esi
657 | 
658 | 	ret
659 | 
660 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 endp
661 | 
662 | 
663 | end
664 | 
665 | 
666 | 
667 | 
668 | 
669 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/HDRTools_AVX2_asm_x64.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ;  HDRTools()
  3 | ;
  4 | ;  Several functions for working on HDR data, and linear to non-linear convertions.
  5 | ;  Copyright (C) 2018 JPSDR
  6 | ;	
  7 | ;  HDRTools is free software; you can redistribute it and/or modify
  8 | ;  it under the terms of the GNU General Public License as published by
  9 | ;  the Free Software Foundation; either version 2, or (at your option)
 10 | ;  any later version.
 11 | ;   
 12 | ;  HDRTools is distributed in the hope that it will be useful,
 13 | ;  but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | ;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | ;  GNU General Public License for more details.
 16 | ;   
 17 | ;  You should have received a copy of the GNU General Public License
 18 | ;  along with GNU Make; see the file COPYING.  If not, write to
 19 | ;  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 20 | ;
 21 | ;
 22 | 
 23 | .data
 24 | 
 25 | align 16
 26 | 
 27 | data segment align(32)
 28 | 
 29 | data_f_1048575 real4 8 dup(1048575.0)
 30 | data_f_65535 real4 8 dup(65535.0)
 31 | data_dw_1048575 dword 8 dup(1048575)
 32 | data_dw_65535 dword 8 dup(65535)
 33 | data_dw_0 dword 8 dup(0)
 34 | 
 35 | data_w_128 word 16 dup(128)
 36 | data_w_32 word 16 dup(32)
 37 | data_w_8 word 16 dup(8)
 38 | 
 39 | .code
 40 | 
 41 | 
 42 | ;JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword
 43 | ; src1 = rcx
 44 | ; src2 = rdx
 45 | ; dst = r8
 46 | ; w = r9d
 47 | 
 48 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc public frame
 49 | 
 50 | 	.endprolog
 51 | 		
 52 | 	vpcmpeqb ymm3,ymm3,ymm3
 53 | 	
 54 | 	mov r10,rcx				; r10=src1
 55 | 	xor rcx,rcx
 56 | 	xor rax,rax	
 57 | 	mov ecx,r9d	
 58 | 	mov r11,32
 59 | 	
 60 | Convert_Planar420_to_Planar422_8_AVX2_1:
 61 | 	vmovdqa ymm0,YMMWORD ptr[r10+rax]
 62 | 	vmovdqa ymm1,YMMWORD ptr[rdx+rax]
 63 | 	vpxor ymm2,ymm0,ymm3
 64 | 	vpxor ymm1,ymm1,ymm3
 65 | 	vpavgb ymm2,ymm2,ymm1
 66 | 	vpxor ymm2,ymm2,ymm3
 67 | 	vpavgb ymm2,ymm2,ymm0
 68 | 	
 69 | 	vmovdqa YMMWORD ptr[r8+rax],ymm2
 70 | 	add rax,r11
 71 | 	loop Convert_Planar420_to_Planar422_8_AVX2_1
 72 | 	
 73 | 	vzeroupper
 74 | 	
 75 | 	ret
 76 | 
 77 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 endp
 78 | 
 79 | 
 80 | ;JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword
 81 | ; src1 = rcx
 82 | ; src2 = rdx
 83 | ; dst = r8
 84 | ; w = r9d
 85 | 
 86 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc public frame
 87 | 
 88 | 	.endprolog
 89 | 		
 90 | 	vpcmpeqb ymm3,ymm3,ymm3
 91 | 	
 92 | 	mov r10,rcx				; r10=src1
 93 | 	xor rcx,rcx
 94 | 	xor rax,rax	
 95 | 	mov ecx,r9d	
 96 | 	mov r11,32
 97 | 	
 98 | Convert_Planar420_to_Planar422_16_AVX2_1:
 99 | 	vmovdqa ymm0,YMMWORD ptr[r10+rax]
100 | 	vmovdqa ymm1,YMMWORD ptr[rdx+rax]
101 | 	vpxor ymm2,ymm0,ymm3
102 | 	vpxor ymm1,ymm1,ymm3
103 | 	vpavgw ymm2,ymm2,ymm1
104 | 	vpxor ymm2,ymm2,ymm3
105 | 	vpavgw ymm2,ymm2,ymm0
106 | 	
107 | 	vmovdqa YMMWORD ptr[r8+rax],ymm2
108 | 	add rax,r11
109 | 	loop Convert_Planar420_to_Planar422_16_AVX2_1
110 | 	
111 | 	vzeroupper
112 | 	
113 | 	ret
114 | 
115 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 endp
116 | 
117 | 
118 | ;JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword,h:dword,src_pitch2:dword,dst_pitch:dword
119 | ; src1 = rcx
120 | ; src2 = rdx
121 | ; dst = r8
122 | ; w32 = r9d
123 | 
124 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc public frame
125 | 
126 | h equ dword ptr[rbp+48]
127 | src_pitch2 equ qword ptr[rbp+56]
128 | dst_pitch equ qword ptr[rbp+64]
129 | 
130 | 	push rbp
131 | 	.pushreg rbp
132 | 	mov rbp,rsp
133 | 	push rsi
134 | 	.pushreg rsi
135 | 	push rbx
136 | 	.pushreg rbx
137 | 	push r12
138 | 	.pushreg r12
139 | 	.endprolog		
140 | 	
141 | 	mov rsi,rcx
142 | 	mov r10d,h
143 | 	mov rbx,32
144 | 	mov r11,src_pitch2
145 | 	mov r12,dst_pitch
146 | 	xor rcx,rcx
147 | 
148 | Convert_Planar422_to_Planar420_8_AVX2_1:
149 | 	xor rax,rax
150 | 	mov ecx,r9d
151 | 
152 | Convert_Planar422_to_Planar420_8_AVX2_2:
153 | 	vmovdqa ymm0,YMMWORD ptr[rsi+rax]
154 | 	vpavgb ymm0,ymm0,YMMWORD ptr[rdx+rax]
155 | 	
156 | 	vmovdqa YMMWORD ptr[r8+rax],ymm0
157 | 	add rax,rbx
158 | 	loop Convert_Planar422_to_Planar420_8_AVX2_2
159 | 	
160 | 	add rsi,r11
161 | 	add rdx,r11
162 | 	add r8,r12
163 | 	dec r10d
164 | 	jnz short Convert_Planar422_to_Planar420_8_AVX2_1
165 | 	
166 | 	vzeroupper
167 | 
168 | 	pop r12
169 | 	pop rbx
170 | 	pop rsi
171 | 	pop rbp
172 | 
173 | 	ret
174 | 
175 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 endp
176 | 
177 | 
178 | ;JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword,h:dword,src_pitch2:dword,dst_pitch:dword
179 | ; src1 = rcx
180 | ; src2 = rdx
181 | ; dst = r8
182 | ; w16 = r9d
183 | 
184 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc public frame
185 | 
186 | h equ dword ptr[rbp+48]
187 | src_pitch2 equ qword ptr[rbp+56]
188 | dst_pitch equ qword ptr[rbp+64]
189 | 
190 | 	push rbp
191 | 	.pushreg rbp
192 | 	mov rbp,rsp
193 | 	push rsi
194 | 	.pushreg rsi
195 | 	push rbx
196 | 	.pushreg rbx
197 | 	push r12
198 | 	.pushreg r12
199 | 	.endprolog		
200 | 	
201 | 	mov rsi,rcx
202 | 	mov r10d,h
203 | 	mov rbx,32
204 | 	mov r11,src_pitch2
205 | 	mov r12,dst_pitch
206 | 	xor rcx,rcx
207 | 
208 | Convert_Planar422_to_Planar420_16_AVX2_1:
209 | 	xor rax,rax
210 | 	mov ecx,r9d
211 | 
212 | Convert_Planar422_to_Planar420_16_AVX2_2:
213 | 	vmovdqa ymm0,YMMWORD ptr[rsi+rax]
214 | 	vpavgw ymm0,ymm0,YMMWORD ptr[rdx+rax]
215 | 	
216 | 	vmovdqa YMMWORD ptr[r8+rax],ymm0
217 | 	add rax,rbx
218 | 	loop Convert_Planar422_to_Planar420_16_AVX2_2
219 | 	
220 | 	add rsi,r11
221 | 	add rdx,r11
222 | 	add r8,r12
223 | 	dec r10d
224 | 	jnz short Convert_Planar422_to_Planar420_16_AVX2_1
225 | 
226 | 	vzeroupper
227 | 	
228 | 	pop r12
229 | 	pop rbx
230 | 	pop rsi
231 | 	pop rbp
232 | 
233 | 	ret
234 | 
235 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 endp
236 | 
237 | 
238 | ;JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword,
239 | ;	ValMin:dword,Coeff:dword
240 | ; src = rcx
241 | ; dst = rdx
242 | ; w8 = r8d
243 | ; h = r9d
244 | 
245 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc public frame
246 | 
247 | src_pitch equ qword ptr[rbp+48]
248 | dst_pitch equ qword ptr[rbp+56]
249 | ValMin equ qword ptr[rbp+64]
250 | Coeff equ qword ptr[rbp+72]
251 | 
252 | 	push rbp
253 | 	.pushreg rbp
254 | 	mov rbp,rsp
255 | 	push rsi
256 | 	.pushreg rsi
257 | 	push rbx
258 | 	.pushreg rbx
259 | 	.endprolog
260 | 
261 | 	mov rsi,ValMin
262 | 	vmovss xmm1,dword ptr[rsi]
263 | 	vshufps xmm1,xmm1,xmm1,0
264 | 	vinsertf128 ymm1,ymm1,xmm1,1
265 | 	mov rsi,Coeff
266 | 	vmovss xmm2,dword ptr[rsi]
267 | 	vshufps xmm2,xmm2,xmm2,0
268 | 	vinsertf128 ymm2,ymm2,xmm2,1
269 | 	
270 | 	vmovdqa ymm3,YMMWORD ptr data_dw_1048575
271 | 	vmovdqa ymm4,YMMWORD ptr data_dw_0
272 | 	vmulps ymm2,ymm2,YMMWORD ptr data_f_1048575
273 | 	
274 | 	mov rsi,rcx
275 | 	mov r10,src_pitch
276 | 	mov r11,dst_pitch
277 | 	mov rbx,32
278 | 	xor rcx,rcx
279 | 	
280 | Scale_20_XYZ_AVX2_1:
281 | 	xor rax,rax
282 | 	mov ecx,r8d
283 | Scale_20_XYZ_AVX2_2:
284 | 	vaddps ymm0,ymm1,YMMWORD ptr[rsi+rax]
285 | 	vmulps ymm0,ymm0,ymm2
286 | 	vcvtps2dq ymm0,ymm0
287 | 	vpminsd ymm0,ymm0,ymm3
288 | 	vpmaxsd ymm0,ymm0,ymm4
289 | 	vmovdqa YMMWORD ptr[rdx+rax],ymm0
290 | 	
291 | 	add rax,rbx
292 | 	loop Scale_20_XYZ_AVX2_2
293 | 	
294 | 	add rsi,r10
295 | 	add rdx,r11
296 | 	dec r9d
297 | 	jnz short Scale_20_XYZ_AVX2_1
298 | 	
299 | 	vzeroupper
300 | 	
301 | 	pop rbx
302 | 	pop rsi
303 | 	pop rbp
304 | 
305 | 	ret
306 | 
307 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 endp
308 | 
309 | 
310 | ;JPSDR_HDRTools_Scale_20_RGB_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword
311 | ; src = rcx
312 | ; dst = rdx
313 | ; w8 = r8d
314 | ; h = r9d
315 | 
316 | JPSDR_HDRTools_Scale_20_RGB_AVX2 proc public frame
317 | 
318 | src_pitch equ qword ptr[rbp+48]
319 | dst_pitch equ qword ptr[rbp+56]
320 | ValMin equ qword ptr[rbp+64]
321 | Coeff equ qword ptr[rbp+72]
322 | 
323 | 	push rbp
324 | 	.pushreg rbp
325 | 	mov rbp,rsp
326 | 	push rsi
327 | 	.pushreg rsi
328 | 	push rbx
329 | 	.pushreg rbx
330 | 	.endprolog
331 | 
332 | 	vmovaps ymm1,YMMWORD ptr data_f_1048575
333 | 	vmovdqa ymm2,YMMWORD ptr data_dw_1048575
334 | 	vmovdqa ymm3,YMMWORD ptr data_dw_0
335 | 	
336 | 	mov rsi,rcx
337 | 	mov r10,src_pitch
338 | 	mov r11,dst_pitch
339 | 	mov rbx,32
340 | 	xor rcx,rcx
341 | 	
342 | Scale_20_RGB_AVX2_1:
343 | 	xor rax,rax
344 | 	mov ecx,r8d
345 | Scale_20_RGB_AVX2_2:
346 | 	vmulps ymm0,ymm1,YMMWORD ptr[rsi+rax]
347 | 	vcvtps2dq ymm0,ymm0
348 | 	vpminsd ymm0,ymm0,ymm2
349 | 	vpmaxsd ymm0,ymm0,ymm3
350 | 	vmovdqa YMMWORD ptr[rdx+rax],ymm0
351 | 	
352 | 	add rax,rbx
353 | 	loop Scale_20_RGB_AVX2_2
354 | 	
355 | 	add rsi,r10
356 | 	add rdx,r11
357 | 	dec r9d
358 | 	jnz short Scale_20_RGB_AVX2_1
359 | 	
360 | 	vzeroupper
361 | 	
362 | 	pop rbx
363 | 	pop rsi
364 | 	pop rbp
365 | 
366 | 	ret
367 | 
368 | JPSDR_HDRTools_Scale_20_RGB_AVX2 endp
369 | 
370 | 
371 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
372 | ;	src_pitch:dword,dst_pitch:dword
373 | ; src = rcx
374 | ; dst = rdx
375 | ; w = r8d
376 | ; h = r9d
377 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc public frame
378 | 
379 | src_pitch equ qword ptr[rbp+48]
380 | dst_pitch equ qword ptr[rbp+56]
381 | 
382 | 	push rbp
383 | 	.pushreg rbp
384 | 	mov rbp,rsp
385 | 	push rdi
386 | 	.pushreg rdi
387 | 	push rsi
388 | 	.pushreg rsi
389 | 	push rbx
390 | 	.pushreg rbx
391 | 	push r12
392 | 	.pushreg r12
393 | 	push r13
394 | 	.pushreg r13
395 | 	push r14
396 | 	.pushreg r14
397 | 	.endprolog
398 | 
399 | 	vmovdqa ymm1,YMMWORD ptr data_w_128
400 | 
401 | 	xor rbx,rbx
402 | 	mov rsi,rcx
403 | 	mov rdi,rdx
404 | 	mov ebx,r8d
405 | 	mov r10,src_pitch
406 | 	mov r11,dst_pitch
407 | 	shr ebx,2
408 | 	mov rdx,32
409 | 	mov r12,16
410 | 	mov r13,2
411 | 	mov r14,1
412 | 	xor rcx,rcx
413 | 
414 | Convert_RGB64_16toRGB64_8_AVX2_1:
415 | 	mov ecx,ebx
416 | 	xor rax,rax
417 | 	or ecx,ecx
418 | 	jz Convert_RGB64_16toRGB64_8_AVX2_3
419 | 	
420 | Convert_RGB64_16toRGB64_8_AVX2_2:
421 | 	vmovdqa ymm0,YMMWORD ptr[rsi+rax]
422 | 	vpaddusw ymm0,ymm0,ymm1
423 | 	vpsrlw ymm0,ymm0,8
424 | 	vmovdqa YMMWORD ptr[rdi+rax],ymm0
425 | 	add rax,rdx
426 | 	loop Convert_RGB64_16toRGB64_8_AVX2_2
427 | 	
428 | Convert_RGB64_16toRGB64_8_AVX2_3:
429 | 	test r8d,r13d
430 | 	jz short Convert_RGB64_16toRGB64_8_AVX2_4
431 | 
432 | 	vmovdqa xmm0,XMMWORD ptr[rsi+rax]
433 | 	vpaddusw xmm0,xmm0,xmm1
434 | 	vpsrlw xmm0,xmm0,8
435 | 	vmovdqa XMMWORD ptr[rdi+rax],xmm0
436 | 	add rax,r12
437 | 
438 | Convert_RGB64_16toRGB64_8_AVX2_4:
439 | 	test r8d,r14d
440 | 	jz short Convert_RGB64_16toRGB64_8_AVX2_5
441 | 	
442 | 	vmovq xmm0,qword ptr[rsi+rax]
443 | 	vpaddusw xmm0,xmm0,xmm1
444 | 	vpsrlw xmm0,xmm0,8
445 | 	vmovq qword ptr[rdi+rax],xmm0
446 | 	
447 | Convert_RGB64_16toRGB64_8_AVX2_5:
448 | 	add rsi,r10
449 | 	add rdi,r11
450 | 	dec r9d
451 | 	jnz short Convert_RGB64_16toRGB64_8_AVX2_1
452 | 	
453 | 	pop r14
454 | 	pop r13
455 | 	pop r12
456 | 	pop rbx
457 | 	pop rsi
458 | 	pop rdi
459 | 	pop rbp
460 | 
461 | 	ret
462 | 
463 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 endp
464 | 
465 | 
466 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
467 | ;	src_pitch:dword,dst_pitch:dword
468 | ; src = rcx
469 | ; dst = rdx
470 | ; w = r8d
471 | ; h = r9d
472 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc public frame
473 | 
474 | src_pitch equ qword ptr[rbp+48]
475 | dst_pitch equ qword ptr[rbp+56]
476 | 
477 | 	push rbp
478 | 	.pushreg rbp
479 | 	mov rbp,rsp
480 | 	push rdi
481 | 	.pushreg rdi
482 | 	push rsi
483 | 	.pushreg rsi
484 | 	push rbx
485 | 	.pushreg rbx
486 | 	push r12
487 | 	.pushreg r12
488 | 	push r13
489 | 	.pushreg r13
490 | 	push r14
491 | 	.pushreg r14
492 | 	.endprolog
493 | 
494 | 	vmovdqa ymm1,YMMWORD ptr data_w_32
495 | 
496 | 	xor rbx,rbx
497 | 	mov rsi,rcx
498 | 	mov rdi,rdx
499 | 	mov ebx,r8d
500 | 	mov r10,src_pitch
501 | 	mov r11,dst_pitch
502 | 	shr ebx,2
503 | 	mov rdx,32
504 | 	mov r12,16
505 | 	mov r13,2
506 | 	mov r14,1
507 | 	xor rcx,rcx
508 | 
509 | Convert_RGB64_16toRGB64_10_AVX2_1:
510 | 	mov ecx,ebx
511 | 	xor rax,rax
512 | 	or ecx,ecx
513 | 	jz Convert_RGB64_16toRGB64_10_AVX2_3
514 | 	
515 | Convert_RGB64_16toRGB64_10_AVX2_2:
516 | 	vmovdqa ymm0,YMMWORD ptr[rsi+rax]
517 | 	vpaddusw ymm0,ymm0,ymm1
518 | 	vpsrlw ymm0,ymm0,6
519 | 	vmovdqa YMMWORD ptr[rdi+rax],ymm0
520 | 	add rax,rdx
521 | 	loop Convert_RGB64_16toRGB64_10_AVX2_2
522 | 	
523 | Convert_RGB64_16toRGB64_10_AVX2_3:
524 | 	test r8d,r13d
525 | 	jz short Convert_RGB64_16toRGB64_10_AVX2_4
526 | 
527 | 	vmovdqa xmm0,XMMWORD ptr[rsi+rax]
528 | 	vpaddusw xmm0,xmm0,xmm1
529 | 	vpsrlw xmm0,xmm0,6
530 | 	vmovdqa XMMWORD ptr[rdi+rax],xmm0
531 | 	add rax,r12
532 | 
533 | Convert_RGB64_16toRGB64_10_AVX2_4:
534 | 	test r8d,r14d
535 | 	jz short Convert_RGB64_16toRGB64_10_AVX2_5
536 | 	
537 | 	vmovq xmm0,qword ptr[rsi+rax]
538 | 	vpaddusw xmm0,xmm0,xmm1
539 | 	vpsrlw xmm0,xmm0,6
540 | 	vmovq qword ptr[rdi+rax],xmm0
541 | 	
542 | Convert_RGB64_16toRGB64_10_AVX2_5:
543 | 	add rsi,r10
544 | 	add rdi,r11
545 | 	dec r9d
546 | 	jnz short Convert_RGB64_16toRGB64_10_AVX2_1
547 | 	
548 | 	pop r14
549 | 	pop r13
550 | 	pop r12
551 | 	pop rbx
552 | 	pop rsi
553 | 	pop rdi
554 | 	pop rbp
555 | 
556 | 	ret
557 | 
558 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 endp
559 | 
560 | 
561 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc src:dword,dst:dword,w:dword,h:dword,
562 | ;	src_pitch:dword,dst_pitch:dword
563 | ; src = rcx
564 | ; dst = rdx
565 | ; w = r8d
566 | ; h = r9d
567 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc public frame
568 | 
569 | src_pitch equ qword ptr[rbp+48]
570 | dst_pitch equ qword ptr[rbp+56]
571 | 
572 | 	push rbp
573 | 	.pushreg rbp
574 | 	mov rbp,rsp
575 | 	push rdi
576 | 	.pushreg rdi
577 | 	push rsi
578 | 	.pushreg rsi
579 | 	push rbx
580 | 	.pushreg rbx
581 | 	push r12
582 | 	.pushreg r12
583 | 	push r13
584 | 	.pushreg r13
585 | 	push r14
586 | 	.pushreg r14
587 | 	.endprolog
588 | 
589 | 	vmovdqa ymm1,YMMWORD ptr data_w_8
590 | 
591 | 	xor rbx,rbx
592 | 	mov rsi,rcx
593 | 	mov rdi,rdx
594 | 	mov ebx,r8d
595 | 	mov r10,src_pitch
596 | 	mov r11,dst_pitch
597 | 	shr ebx,2
598 | 	mov rdx,32
599 | 	mov r12,16
600 | 	mov r13,2
601 | 	mov r14,1
602 | 	xor rcx,rcx
603 | 
604 | Convert_RGB64_16toRGB64_12_AVX2_1:
605 | 	mov ecx,ebx
606 | 	xor rax,rax
607 | 	or ecx,ecx
608 | 	jz Convert_RGB64_16toRGB64_12_AVX2_3
609 | 	
610 | Convert_RGB64_16toRGB64_12_AVX2_2:
611 | 	vmovdqa ymm0,YMMWORD ptr[rsi+rax]
612 | 	vpaddusw ymm0,ymm0,ymm1
613 | 	vpsrlw ymm0,ymm0,4
614 | 	vmovdqa YMMWORD ptr[rdi+rax],ymm0
615 | 	add rax,rdx
616 | 	loop Convert_RGB64_16toRGB64_12_AVX2_2
617 | 	
618 | Convert_RGB64_16toRGB64_12_AVX2_3:
619 | 	test r8d,r13d
620 | 	jz short Convert_RGB64_16toRGB64_12_AVX2_4
621 | 
622 | 	vmovdqa xmm0,XMMWORD ptr[rsi+rax]
623 | 	vpaddusw xmm0,xmm0,xmm1
624 | 	vpsrlw xmm0,xmm0,4
625 | 	vmovdqa XMMWORD ptr[rdi+rax],xmm0
626 | 	add rax,r12
627 | 
628 | Convert_RGB64_16toRGB64_12_AVX2_4:
629 | 	test r8d,r14d
630 | 	jz short Convert_RGB64_16toRGB64_12_AVX2_5
631 | 	
632 | 	vmovq xmm0,qword ptr[rsi+rax]
633 | 	vpaddusw xmm0,xmm0,xmm1
634 | 	vpsrlw xmm0,xmm0,4
635 | 	vmovq qword ptr[rdi+rax],xmm0
636 | 	
637 | Convert_RGB64_16toRGB64_12_AVX2_5:
638 | 	add rsi,r10
639 | 	add rdi,r11
640 | 	dec r9d
641 | 	jnz short Convert_RGB64_16toRGB64_12_AVX2_1
642 | 	
643 | 	pop r14
644 | 	pop r13
645 | 	pop r12
646 | 	pop rbx
647 | 	pop rsi
648 | 	pop rdi
649 | 	pop rbp
650 | 
651 | 	ret
652 | 
653 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 endp
654 | 
655 | 
656 | ;JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc dst:dword,srcY:dword,w:dword,h:dword,dst_pitch:dword,src_pitchY:dword
657 | ; dst = rcx
658 | ; srcY = rdx
659 | ; w = r8d
660 | ; h = r9d
661 | 	
662 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc public frame	
663 | 
664 | dst_pitch equ qword ptr[rbp+48]
665 | src_pitchY equ qword ptr[rbp+56]
666 | 
667 | 	push rbp
668 | 	.pushreg rbp
669 | 	mov rbp,rsp
670 | 	push rsi
671 | 	.pushreg rsi
672 | 	push rdi
673 | 	.pushreg rdi
674 | 	push rbx
675 | 	.pushreg rbx
676 | 	push r12
677 | 	.pushreg r12
678 | 	.endprolog	
679 | 	
680 | 	mov rdi,rcx
681 | 	mov rsi,rdx
682 | 	mov r10d,r8d
683 | 	mov r11,dst_pitch
684 | 	mov r12,src_pitchY
685 | 	mov rdx,8
686 | 	shr r10d,1
687 | 	mov rbx,1
688 | 	xor rcx,rcx
689 | 	pxor xmm4,xmm4
690 | 	
691 | Convert_16_RGB64_HLG_OOTF_AVX2_1:
692 | 	mov ecx,r10d
693 | 	xor rax,rax
694 | 	or ecx,ecx
695 | 	jz short Convert_16_RGB64_HLG_OOTF_AVX2_3
696 | 	
697 | Convert_16_RGB64_HLG_OOTF_AVX2_2:
698 | 	vmovss xmm0,dword ptr[rsi+rax]
699 | 	vmovss xmm1,dword ptr[rsi+rax+4]
700 | 	vshufps xmm0,xmm0,xmm0,0
701 | 	vshufps xmm1,xmm1,xmm1,0
702 | 	vmovdqa xmm2,XMMWORD ptr[rdi+2*rax]
703 | 	vinsertf128 ymm0,ymm0,xmm1,1
704 | 	vpunpckhwd xmm3,xmm2,xmm4
705 | 	vpunpcklwd xmm2,xmm2,xmm4
706 | 	vinserti128 ymm2,ymm2,xmm3,1
707 | 	vcvtdq2ps ymm2,ymm2
708 | 	vmulps ymm2,ymm2,ymm0
709 | 	vcvtps2dq ymm2,ymm2
710 | 	vextracti128 xmm3,ymm2,1
711 | 	vpackusdw xmm2,xmm2,xmm3
712 | 	vmovdqa XMMWORD ptr[rdi+2*rax],xmm2
713 | 	
714 | 	add rax,rdx
715 | 	loop Convert_16_RGB64_HLG_OOTF_AVX2_2
716 | 	
717 | Convert_16_RGB64_HLG_OOTF_AVX2_3:
718 | 	test r8d,ebx
719 | 	jz short Convert_16_RGB64_HLG_OOTF_AVX2_4
720 | 	
721 | 	vmovss xmm0,dword ptr[rsi+rax]
722 | 	vshufps xmm0,xmm0,xmm0,0
723 | 	vmovq xmm2,qword ptr[rdi+2*rax]
724 | 	vpunpcklwd xmm2,xmm2,xmm4
725 | 	vcvtdq2ps xmm2,xmm2
726 | 	vmulps xmm2,xmm2,xmm0
727 | 	vcvtps2dq xmm2,xmm2
728 | 	vpackusdw xmm2,xmm2,xmm2
729 | 	vmovq qword ptr[rdi+2*rax],xmm2
730 | 	
731 | Convert_16_RGB64_HLG_OOTF_AVX2_4:
732 | 	add rdi,r11
733 | 	add rsi,r12
734 | 	dec r9d
735 | 	jnz Convert_16_RGB64_HLG_OOTF_AVX2_1
736 | 	
737 | 	vzeroupper
738 | 	
739 | 	pop r12
740 | 	pop rbx
741 | 	pop rdi
742 | 	pop rsi
743 | 	pop rbp
744 | 
745 | 	ret
746 | 
747 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 endp
748 | 
749 | 
750 | ;JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc src:dword,dst1:dword,dst2:dword,w8:dword,h:dword,src_pitch:dword,
751 | ;	dst_pitch1:dword,dst_pitch2:dword,ValMinX:dword,CoeffX:dword,ValMinZ:dword,CoeffZ:dword
752 | ; src = rcx
753 | ; dst1 = rdx
754 | ; dst2 = r8
755 | ; w8 = r9d
756 | 
757 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc public frame
758 | 
759 | h equ dword ptr[rbp+48]
760 | src_pitch equ qword ptr[rbp+56]
761 | dst_pitch1 equ qword ptr[rbp+64]
762 | dst_pitch2 equ qword ptr[rbp+72]
763 | ValMinX equ qword ptr[rbp+80]
764 | CoeffX equ qword ptr[rbp+88]
765 | ValMinZ equ qword ptr[rbp+96]
766 | CoeffZ equ qword ptr[rbp+104]
767 | 
768 | 	push rbp
769 | 	.pushreg rbp
770 | 	mov rbp,rsp
771 | 	push rsi
772 | 	.pushreg rsi
773 | 	push rbx
774 | 	.pushreg rbx
775 | 	push r12
776 | 	.pushreg r12
777 | 	push r13
778 | 	.pushreg r13
779 | 	sub rsp,48
780 | 	.allocstack 48
781 | 	vmovdqa XMMWORD ptr[rsp],xmm6
782 | 	.savexmm128 xmm6,0
783 | 	vmovdqa XMMWORD ptr[rsp+16],xmm7
784 | 	.savexmm128 xmm7,16
785 | 	vmovdqa XMMWORD ptr[rsp+32],xmm8
786 | 	.savexmm128 xmm8,32
787 | 	.endprolog
788 | 
789 | 	mov rsi,ValMinX
790 | 	vmovss xmm2,dword ptr[rsi]
791 | 	vshufps xmm2,xmm2,xmm2,0
792 | 	vinsertf128 ymm2,ymm2,xmm2,1
793 | 	mov rsi,CoeffX
794 | 	vmovss xmm3,dword ptr[rsi]
795 | 	vshufps xmm3,xmm3,xmm3,0
796 | 	vinsertf128 ymm3,ymm3,xmm3,1
797 | 
798 | 	mov rsi,ValMinZ
799 | 	vmovss xmm4,dword ptr[rsi]
800 | 	vshufps xmm4,xmm4,xmm4,0
801 | 	vinsertf128 ymm4,ymm4,xmm4,1
802 | 	mov rsi,CoeffZ
803 | 	vmovss xmm5,dword ptr[rsi]
804 | 	vshufps xmm5,xmm5,xmm5,0
805 | 	vinsertf128 ymm5,ymm5,xmm5,1
806 | 	
807 | 	vmovdqa ymm6,YMMWORD ptr data_dw_65535
808 | 	vmovdqa ymm7,YMMWORD ptr data_dw_0
809 | 	vmulps ymm3,ymm3,YMMWORD ptr data_f_65535
810 | 	vmulps ymm5,ymm5,YMMWORD ptr data_f_65535
811 | 	
812 | 	mov rsi,rcx
813 | 	mov r10,src_pitch
814 | 	mov r11,dst_pitch1
815 | 	mov r12,dst_pitch2
816 | 	mov r13d,h
817 | 	mov rbx,32
818 | 	xor rcx,rcx
819 | 	
820 | BT2446C_16_XYZ_AVX2_1:
821 | 	xor rax,rax
822 | 	mov ecx,r9d
823 | BT2446C_16_XYZ_AVX2_2:
824 | 	vmovaps ymm8,YMMWORD ptr[rsi+rax]
825 | 	vmulps ymm0,ymm8,YMMWORD ptr[rdx+rax]
826 | 	vmulps ymm1,ymm8,YMMWORD ptr[r8+rax]	
827 | 	vaddps ymm0,ymm0,ymm2
828 | 	vaddps ymm1,ymm1,ymm4
829 | 	vmulps ymm0,ymm0,ymm3
830 | 	vmulps ymm1,ymm1,ymm5
831 | 	vcvtps2dq ymm0,ymm0
832 | 	vcvtps2dq ymm1,ymm1
833 | 	vpminsd ymm0,ymm0,ymm6
834 | 	vpminsd ymm1,ymm1,ymm6
835 | 	vpmaxsd ymm0,ymm0,ymm7
836 | 	vpmaxsd ymm1,ymm1,ymm7
837 | 	vmovdqa YMMWORD ptr[rdx+rax],ymm0
838 | 	vmovdqa YMMWORD ptr[r8+rax],ymm1
839 | 	
840 | 	add rax,rbx
841 | 	loop BT2446C_16_XYZ_AVX2_2
842 | 	
843 | 	add rsi,r10
844 | 	add rdx,r11
845 | 	add r8,r12
846 | 	dec r13d
847 | 	jnz short BT2446C_16_XYZ_AVX2_1
848 | 	
849 | 	vzeroupper
850 | 
851 | 	vmovdqa xmm8,XMMWORD ptr[rsp+32]
852 | 	vmovdqa xmm7,XMMWORD ptr[rsp+16]
853 | 	vmovdqa xmm6,XMMWORD ptr[rsp]
854 | 	add rsp,48
855 | 	
856 | 	pop r13
857 | 	pop r12
858 | 	pop rbx
859 | 	pop rsi
860 | 	pop rbp
861 | 
862 | 	ret
863 | 
864 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 endp
865 | 
866 | 
867 | end
868 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/MatrixClass.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  MatrixClass
  3 |  *
  4 |  *  Matrix and vector class allowing several operations.
  5 |  *  Copyright (C) 2017 JPSDR
  6 |  *	
  7 |  *  MatrixClass is free software; you can redistribute it and/or modify
  8 |  *  it under the terms of the GNU General Public License as published by
  9 |  *  the Free Software Foundation; either version 2, or (at your option)
 10 |  *  any later version.
 11 |  *   
 12 |  *  MatrixClass is distributed in the hope that it will be useful,
 13 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 |  *  GNU General Public License for more details.
 16 |  *   
 17 |  *  You should have received a copy of the GNU General Public License
 18 |  *  along with GNU Make; see the file COPYING.  If not, write to
 19 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 20 |  *
 21 |  */
 22 | 
 23 | #ifndef _MATRIX_CLASS_H
 24 | #define _MATRIX_CLASS_H
 25 | 
 26 | #include <stdlib.h>
 27 | #include <stdint.h>
 28 | #include <math.h>
 29 | 
 30 | typedef enum COEFF_DATA_TYPE_ {DATA_NONE,DATA_DOUBLE,DATA_FLOAT,DATA_UINT64,DATA_INT64,
 31 | 	DATA_UINT32,DATA_INT32,DATA_UINT16,DATA_INT16,DATA_UINT8,DATA_INT8} COEFF_DATA_TYPE;
 32 | 
 33 | 
 34 | void SetCPUMatrixClass(bool SSE2,bool AVX,bool AVX2);
 35 | 
 36 | 
 37 | class Vector
 38 | {
 39 | public :
 40 | 	Vector(void);
 41 | 	Vector(const uint16_t l,const COEFF_DATA_TYPE data);
 42 | 	Vector(const Vector &x);
 43 | 	virtual ~Vector(void);
 44 | 
 45 | 	bool AllocCheck(void) const {return(Coeff!=NULL);}
 46 | 	bool Create(void);
 47 | 	bool Create(const uint16_t l,const COEFF_DATA_TYPE data);
 48 | 	bool Create(const Vector &x);
 49 | 	bool CopyStrict(const Vector &x);
 50 | 	bool CopyRaw(const void *ptr);
 51 | 	bool CopyRaw(const void *ptr,uint16_t lgth);
 52 | 	bool ExportRaw(void *ptr);
 53 | 	bool ExportRaw(void *ptr,uint16_t lgth);
 54 | 	void Destroy(void);
 55 | 	bool FillD(const double data);
 56 | 	bool FillF(const float data);
 57 | 	bool FillZero(void);
 58 | 	COEFF_DATA_TYPE GetDataType(void) const {return(data_type);}
 59 | 	bool SetInfo(const uint16_t l,const COEFF_DATA_TYPE data);
 60 | 	void GetInfo(uint16_t &l,COEFF_DATA_TYPE &data) const;
 61 | 	uint16_t GetLength(void) const {return(length);}
 62 | 	void* GetPtrVector(void) const {return(Coeff);}
 63 | 	size_t GetDataSize(void) const {return(size);}
 64 | 	double GetD(const uint16_t i) const {return(((double *)Coeff)[i]);}
 65 | 	float GetF(const uint16_t i) const {return(((float *)Coeff)[i]);}
 66 | 	void SetD(const uint16_t i,const double d) {((double *)Coeff)[i]=d;}
 67 | 	void SetF(const uint16_t i,const float d) {((float *)Coeff)[i]=d;}
 68 | 	bool GetSafeD(const uint16_t i,double &d) const ;
 69 | 	bool SetSafeD(const uint16_t i,const double d);
 70 | 	bool GetSafeF(const uint16_t i,float &d) const ;
 71 | 	bool SetSafeF(const uint16_t i,const float d);
 72 | 
 73 | protected :
 74 | 	void *Coeff;
 75 | 	uint16_t length;
 76 | 	size_t size;
 77 | 	COEFF_DATA_TYPE data_type;
 78 | 
 79 | private :
 80 | 	Vector& operator = (const Vector &other);
 81 | 	bool operator == (const Vector &other) const;
 82 | 	bool operator != (const Vector &other) const;
 83 | };
 84 | 
 85 | class Matrix;
 86 | 
 87 | class Vector_Compute : public Vector
 88 | {
 89 | protected :
 90 | 	bool SSE2_Enable,AVX_Enable,AVX2_Enable;
 91 | 
 92 | public :
 93 | 	Vector_Compute(void);
 94 | 	Vector_Compute(const uint16_t l,const COEFF_DATA_TYPE data);
 95 | 	Vector_Compute(const Vector_Compute &x);
 96 | 	virtual ~Vector_Compute(void);
 97 | 
 98 | 	void SetSSE2(bool val) {SSE2_Enable=val;}
 99 | 	void SetAVX(bool val) {AVX_Enable=val;}
100 | 	void SetAVX2(bool val) {AVX2_Enable=val;}
101 | 
102 | 	bool Mult(const double coef,const Vector &x);
103 | 	bool Mult(const double coef);
104 | 	bool Add(const double coef,const Vector &x);
105 | 	bool Add(const double coef);
106 | 	bool Sub(const double coef,const Vector &x);
107 | 	bool Sub(const double coef);
108 | 	bool Add_X(const Vector &x,const Vector &y);
109 | 	bool Add_X(const Vector &x);
110 | 	bool Sub_X(const Vector &x,const Vector &y);
111 | 	bool Sub_X(const Vector &x);
112 | 	bool InvSub_X(const Vector &x);
113 | 	bool Mult_X(const Vector &x,const Vector &y);
114 | 	bool Mult_X(const Vector &x);
115 | 
116 | 	bool Product_AX(const Matrix &ma,const Vector &x);
117 | 	bool Product_AX(const Matrix &ma);
118 | 	bool Product_tAX(const Matrix &ma,const Vector &x);
119 | 	bool Product_tAX(const Matrix &ma);
120 | 
121 | 	bool Norme2(double &result);
122 | 	bool Distance2(const Vector &x,double &result);
123 | 	bool Norme1(double &result);
124 | 	bool Distance1(const Vector &x,double &result);
125 | 
126 | protected :
127 | 	// Float
128 | 	void MultF(const double coef,const Vector &x);
129 | 	void MultF(const double coef);
130 | 	void AddF(const double coef,const Vector &x);
131 | 	void AddF(const double coef);
132 | 	void SubF(const double coef,const Vector &x);
133 | 	void SubF(const double coef);
134 | 	void AddF_X(const Vector &x,const Vector &y);
135 | 	void AddF_X(const Vector &x);
136 | 	void SubF_X(const Vector &x,const Vector &y);
137 | 	void SubF_X(const Vector &x);
138 | 	void InvSubF_X(const Vector &x);
139 | 	void MultF_X(const Vector &x,const Vector &y);
140 | 	void MultF_X(const Vector &x);
141 | 
142 | 	void ProductF_AX(const Matrix &ma,const Vector &x);
143 | 	void ProductF_tAX(const Matrix &ma,const Vector &x);
144 | 
145 | 	double Norme2F(void);
146 | 	double Distance2F(const Vector &x);
147 | 	double Norme1F(void);
148 | 	double Distance1F(const Vector &x);
149 | 
150 | 	// Double
151 | 	void MultD(const double coef,const Vector &x);
152 | 	void MultD(const double coef);
153 | 	void AddD(const double coef,const Vector &x);
154 | 	void AddD(const double coef);
155 | 	void SubD(const double coef,const Vector &x);
156 | 	void SubD(const double coef);
157 | 	void AddD_X(const Vector &x,const Vector &y);
158 | 	void AddD_X(const Vector &x);
159 | 	void SubD_X(const Vector &x,const Vector &y);
160 | 	void SubD_X(const Vector &x);
161 | 	void InvSubD_X(const Vector &x);
162 | 	void MultD_X(const Vector &x,const Vector &y);
163 | 	void MultD_X(const Vector &x);
164 | 
165 | 	void ProductD_AX(const Matrix &ma,const Vector &x);
166 | 	void ProductD_tAX(const Matrix &ma,const Vector &x);
167 | 
168 | 	double Norme2D(void);
169 | 	double Distance2D(const Vector &x);
170 | 	double Norme1D(void);
171 | 	double Distance1D(const Vector &x);
172 | 
173 | private :
174 | 	Vector_Compute& operator = (const Vector_Compute &other);
175 | 	bool operator == (const Vector_Compute &other) const;
176 | 	bool operator != (const Vector_Compute &other) const;
177 | };
178 | 
179 | 
180 | class Matrix
181 | {
182 | public :
183 | 	Matrix(void);
184 | 	Matrix(const uint16_t l,const uint16_t c,const COEFF_DATA_TYPE data);
185 | 	Matrix(const Matrix &m);
186 | 	virtual ~Matrix(void);
187 | 
188 | 	bool AllocCheck(void) const {return(Coeff!=NULL);}
189 | 	bool Create(void);
190 | 	bool Create(const uint16_t l,const uint16_t c,const COEFF_DATA_TYPE data);
191 | 	bool Create(const Matrix &m);
192 | 	virtual bool CopyStrict(const Matrix &m);
193 | 	bool CopyRaw(const void *ptr);
194 | 	bool CopyRaw(const void *ptr,ptrdiff_t ptr_pitch);
195 | 	bool CopyRaw(const void *ptr,ptrdiff_t ptr_pitch,uint16_t ln,uint16_t co);
196 | 	bool ExportRaw(void *ptr);
197 | 	bool ExportRaw(void *ptr,ptrdiff_t ptr_pitch);
198 | 	bool ExportRaw(void *ptr,ptrdiff_t ptr_pitch,uint16_t ln,uint16_t co);
199 | 	void Destroy(void);
200 | 	bool FillD(const double data);
201 | 	bool FillF(const float data);
202 | 	bool FillZero(void);
203 | 	COEFF_DATA_TYPE GetDataType(void) const {return(data_type);}
204 | 	bool SetInfo(const uint16_t l,const uint16_t c,const COEFF_DATA_TYPE data);
205 | 	void GetInfo(uint16_t &l,uint16_t &c,COEFF_DATA_TYPE &data) const;
206 | 	uint16_t GetLines(void) const {return(lines);}
207 | 	uint16_t GetColumns(void) const {return(columns);}
208 | 	void* GetPtrMatrix(void) const {return(Coeff);}
209 | 	void* GetPtrMatrixLine(const uint16_t i) const {return((void *)((uint8_t *)Coeff+i*pitch));}
210 | 	ptrdiff_t GetPitch(void) const {return(pitch);}
211 | 	size_t GetDataSize(void) const {return(size);}
212 | 	double GetD(const uint16_t i,const uint16_t j) const {return(((double *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]);}
213 | 	float GetF(const uint16_t i,const uint16_t j) const {return(((float *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]);}
214 | 	void SetD(const uint16_t i,const uint16_t j,const double d) {((double *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]=d;}
215 | 	void SetF(const uint16_t i,const uint16_t j,const float d) {((float *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]=d;}
216 | 	bool GetSafeD(const uint16_t i,const uint16_t j,double &d) const ;
217 | 	bool SetSafeD(const uint16_t i,const uint16_t j,const double d);
218 | 	bool GetSafeF(const uint16_t i,const uint16_t j,float &d) const ;
219 | 	bool SetSafeF(const uint16_t i,const uint16_t j,const float d);
220 | 
221 | protected :
222 | 	void *Coeff;
223 | 	uint16_t columns,lines;
224 | 	size_t size;
225 | 	ptrdiff_t pitch;
226 | 	COEFF_DATA_TYPE data_type;
227 | 
228 | 	Matrix& operator=(const Matrix&){return(*this);}
229 | 
230 | private :
231 | 	bool operator == (const Matrix &other) const;
232 | 	bool operator != (const Matrix &other) const;
233 | };
234 | 
235 | 
236 | class Matrix_Compute : public Matrix
237 | {
238 | protected :
239 | 	double zero_value;
240 | 	bool SSE2_Enable,AVX_Enable,AVX2_Enable;
241 | 
242 | public :
243 | 	Matrix_Compute(void);
244 | 	Matrix_Compute(const uint16_t l,const uint16_t c,const COEFF_DATA_TYPE data);
245 | 	Matrix_Compute(const Matrix_Compute &m);
246 | 	virtual ~Matrix_Compute(void);
247 | 
248 | 	void SetSSE2(bool val) {SSE2_Enable=val;}
249 | 	void SetAVX(bool val) {AVX_Enable=val;}
250 | 	void SetAVX2(bool val) {AVX2_Enable=val;}
251 | 
252 | 	bool CreateTranspose(const Matrix &m);
253 | 	virtual bool CopyStrict(const Matrix_Compute &m);
254 | 	void SetZeroValue(const double z) {zero_value=fabs(z);}
255 | 	double GetZeroValue(void) const {return(zero_value);}
256 | 
257 | 	bool Transpose(void);
258 | 	bool Transpose(const Matrix &ma);
259 | 
260 | 	bool Mult(const double coef,const Matrix &ma);
261 | 	bool Mult(const double coef);
262 | 	bool Add(const double coef,const Matrix &ma);
263 | 	bool Add(const double coef);
264 | 	bool Sub(const double coef,const Matrix &ma);
265 | 	bool Sub(const double coef);
266 | 	bool Add_A(const Matrix &ma,const Matrix &mb);
267 | 	bool Add_A(const Matrix &ma);
268 | 	bool Sub_A(const Matrix &ma,const Matrix &mb);
269 | 	bool Sub_A(const Matrix &ma);
270 | 	bool InvSub_A(const Matrix &ma);
271 | 	bool Mult_A(const Matrix &ma,const Matrix &mb);
272 | 	bool Mult_A(const Matrix &ma);
273 | 
274 | 	bool Product_AB(const Matrix &ma,const Matrix &mb);
275 | 	bool Product_AtB(const Matrix &ma,const Matrix &mb);
276 | 	bool Product_tAA(const Matrix &ma);
277 | 	bool Product_tAA(void);
278 | 
279 | 	bool Inverse(const Matrix &ma);
280 | 	bool Inverse(void);
281 | 	int8_t InverseSafe(const Matrix_Compute &ma);
282 | 	int8_t InverseSafe(void);
283 | 
284 | 	bool Norme2(double &result);
285 | 	bool Distance2(const Matrix &ma,double &result);
286 | 	bool Norme1(double &result);
287 | 	bool Distance1(const Matrix &ma,double &result);
288 | 
289 | protected :
290 | 	// Float
291 | 	void TransposeF(const Matrix &ma);
292 | 
293 | 	void MultF(const double coef,const Matrix &ma);
294 | 	void MultF(const double coef);
295 | 	void AddF(const double coef,const Matrix &ma);
296 | 	void AddF(const double coef);
297 | 	void SubF(const double coef,const Matrix &ma);
298 | 	void SubF(const double coef);
299 | 	void AddF_A(const Matrix &ma,const Matrix &mb);
300 | 	void AddF_A(const Matrix &ma);
301 | 	void SubF_A(const Matrix &ma,const Matrix &mb);
302 | 	void SubF_A(const Matrix &ma);
303 | 	void InvSubF_A(const Matrix &ma);
304 | 	void MultF_A(const Matrix &ma,const Matrix &mb);
305 | 	void MultF_A(const Matrix &ma);
306 | 
307 | 	void ProductF_AB(const Matrix &ma,const Matrix &mb);
308 | 	void ProductF_AtB(const Matrix &ma,const Matrix &mb);
309 | 
310 | 	bool InverseF(const Matrix &ma);
311 | 	int8_t InverseSafeF(const Matrix_Compute &ma);
312 | 
313 | 	double Norme2F(void);
314 | 	double Distance2F(const Matrix &ma);
315 | 	double Norme1F(void);
316 | 	double Distance1F(const Matrix &ma);
317 | 
318 | 	// Double
319 | 	void MultD(const double coef,const Matrix &ma);
320 | 	void MultD(const double coef);
321 | 	void AddD(const double coef,const Matrix &ma);
322 | 	void AddD(const double coef);
323 | 	void SubD(const double coef,const Matrix &ma);
324 | 	void SubD(const double coef);
325 | 	void AddD_A(const Matrix &ma,const Matrix &mb);
326 | 	void AddD_A(const Matrix &ma);
327 | 	void SubD_A(const Matrix &ma,const Matrix &mb);
328 | 	void SubD_A(const Matrix &ma);
329 | 	void InvSubD_A(const Matrix &ma);
330 | 	void MultD_A(const Matrix &ma,const Matrix &mb);
331 | 	void MultD_A(const Matrix &ma);
332 | 
333 | 	void TransposeD(const Matrix &ma);
334 | 
335 | 	void ProductD_AB(const Matrix &ma,const Matrix &mb);
336 | 	void ProductD_AtB(const Matrix &ma,const Matrix &mb);
337 | 
338 | 	bool InverseD(const Matrix &ma);
339 | 	int8_t InverseSafeD(const Matrix_Compute &ma);
340 | 
341 | 	double Norme2D(void);
342 | 	double Distance2D(const Matrix &ma);
343 | 	double Norme1D(void);
344 | 	double Distance1D(const Matrix &ma);
345 | 
346 | 	// U64
347 | 	void TransposeU64(const Matrix &ma);
348 | 
349 | 	// I64
350 | 	void TransposeI64(const Matrix &ma);
351 | 
352 | 	// U32
353 | 	void TransposeU32(const Matrix &ma);
354 | 
355 | 	// I32
356 | 	void TransposeI32(const Matrix &ma);
357 | 
358 | 	// U16
359 | 	void TransposeU16(const Matrix &ma);
360 | 
361 | 	// I16
362 | 	void TransposeI16(const Matrix &ma);
363 | 
364 | 	// U8
365 | 	void TransposeU8(const Matrix &ma);
366 | 
367 | 	// I8
368 | 	void TransposeI8(const Matrix &ma);
369 | 
370 | 	Matrix_Compute& operator=(const Matrix_Compute&){return(*this);}
371 | 
372 | private :
373 | 	bool operator == (const Matrix_Compute &other) const;
374 | 	bool operator != (const Matrix_Compute &other) const;
375 | };
376 | 
377 | #endif


--------------------------------------------------------------------------------
/Plugins_JPSDR/PlanarFrame.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpsdr/plugins_JPSDR/7826b045cfc28a0738088ec03a5e6f2edd44c9f0/Plugins_JPSDR/PlanarFrame.cpp


--------------------------------------------------------------------------------
/Plugins_JPSDR/PlanarFrame.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | **   My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar 
  3 | **   to packed conversions, and always gives 16 bit alignment for all
  4 | **   planes.  Supports Y8/YV12/YV16/YV24/YUY2/RGB24 frames from avisynth, can do any planar format 
  5 | **   internally.
  6 | **
  7 | **   Copyright (C) 2005-2006 Kevin Stone
  8 | **
  9 | **   This program is free software; you can redistribute it and/or modify
 10 | **   it under the terms of the GNU General Public License as published by
 11 | **   the Free Software Foundation; either version 2 of the License, or
 12 | **   (at your option) any later version.
 13 | **
 14 | **   This program is distributed in the hope that it will be useful,
 15 | **   but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | **   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | **   GNU General Public License for more details.
 18 | **
 19 | **   You should have received a copy of the GNU General Public License
 20 | **   along with this program; if not, write to the Free Software
 21 | **   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 22 | */
 23 | 
 24 | #ifndef __PlanarFrame_H__
 25 | #define __PlanarFrame_H__
 26 | 
 27 | #include <windows.h>
 28 | #include <malloc.h>
 29 | #include <stdint.h>
 30 | #include "./internal.h"
 31 | #include "./avs/cpuid.h"
 32 | 
 33 | #define MIN_PAD 10
 34 | #define MIN_ALIGNMENT 64
 35 | 
 36 | #define PLANAR_420 1
 37 | #define PLANAR_422 2
 38 | #define PLANAR_444 3
 39 | 
 40 | 
 41 | class PlanarFrame
 42 | {
 43 | private:
 44 | 	bool useSIMD,useAVX;
 45 | 	int cpu;
 46 | 	int ypitch,uvpitch;
 47 | 	int ywidth,uvwidth;
 48 | 	int yheight,uvheight;
 49 | 	bool alloc_ok;
 50 | 	
 51 | 	bool grey,isRGBPfamily,isAlphaChannel;
 52 | 	uint8_t pixelsize; // AVS16
 53 | 	uint8_t bits_per_pixel;
 54 | 	
 55 | 	uint8_t *planar_1,*planar_2,*planar_3,*planar_4;
 56 | 	bool allocSpace(VideoInfo &viInfo);
 57 | 	bool allocSpace(int specs[4],bool rgbplanar,bool alphaplanar,uint8_t _pixelsize,uint8_t _bits_per_pixel);
 58 | 	int getCPUInfo(void);
 59 | 	int checkCPU(void);
 60 | 	bool copyInternalFrom(PVideoFrame &frame,VideoInfo &viInfo);
 61 | 	bool copyInternalFrom(PlanarFrame &frame);
 62 | 	bool copyInternalTo(PVideoFrame &frame,VideoInfo &viInfo);
 63 | 	bool copyInternalTo(PlanarFrame &frame);
 64 | 	bool copyInternalPlaneTo(PlanarFrame &frame,uint8_t plane);
 65 | 	void conv422toYUY2(uint8_t *py,uint8_t *pu,uint8_t *pv,uint8_t *dst,int pitch1Y,int pitch1UV,int pitch2,
 66 | 		int width,int height);
 67 | 	void conv444toRGB24(uint8_t *py,uint8_t *pu,uint8_t *pv,uint8_t *dst,int pitch1Y,int pitch1UV,int pitch2,
 68 | 		int width,int height);
 69 | 
 70 | public:
 71 | 	PlanarFrame(void);
 72 | 	PlanarFrame(VideoInfo &viInfo);
 73 | 	virtual ~PlanarFrame(void);
 74 | 	bool GetAllocStatus(void) {return(alloc_ok);}
 75 | 	bool createPlanar(int yheight,int uvheight,int ywidth,int uvwidth,bool rgbplanar,bool alphaplanar,uint8_t pixelsize,uint8_t bits_per_pixel);
 76 | 	bool createPlanar(int height,int width,uint8_t chroma_format,bool rgbplanar,bool alphaplanar,uint8_t pixelsize,uint8_t bits_per_pixel);
 77 | 	bool createFromProfile(VideoInfo &viInfo);
 78 | 	bool createFromFrame(PVideoFrame &frame,VideoInfo &viInfo);
 79 | 	bool createFromPlanar(PlanarFrame &frame);
 80 | 	bool copyFrom(PVideoFrame &frame,VideoInfo &viInfo);
 81 | 	bool copyTo(PVideoFrame &frame,VideoInfo &viInfo);
 82 | 	bool copyFrom(PlanarFrame &frame);
 83 | 	bool copyTo(PlanarFrame &frame);
 84 | 	bool copyChromaTo(PlanarFrame &dst);
 85 | 	bool copyPlaneTo(PlanarFrame &dst,uint8_t plane);
 86 | 	void freePlanar();
 87 | 	uint8_t* GetPtr(uint8_t plane);
 88 | 	int GetWidth(uint8_t plane);
 89 | 	int GetHeight(uint8_t plane);
 90 | 	int GetPitch(uint8_t plane);
 91 | 	int getCPUFlags(void) {return cpu;}
 92 | 	inline void BitBlt(uint8_t *dstp,int dst_pitch,const uint8_t *srcp,int src_pitch,int row_size,int height);
 93 | 	PlanarFrame& operator=(PlanarFrame &ob2);
 94 | 	void convYUY2to422(const uint8_t *src,uint8_t *py,uint8_t *pu,uint8_t *pv,int pitch1,int pitch2Y,int pitch2UV,
 95 | 		int width,int height);
 96 | 	void convRGB24to444(const uint8_t *src,uint8_t *py,uint8_t *pu,uint8_t *pv,int pitch1,int pitch2Y,int pitch2UV,
 97 | 		int width,int height);
 98 | };
 99 | 
100 | #endif


--------------------------------------------------------------------------------
/Plugins_JPSDR/PlanarFrame_asm.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ;   My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar 
  3 | ;   to packed conversions, and always gives 16 bit alignment for all
  4 | ;   planes.  Supports Y8/YV12/YV16/YV24/YUY2/RGB24 frames from avisynth, can do any planar
  5 | ;   format internally.
  6 | ;
  7 | ;   Copyright (C) 2005-2010 Kevin Stone
  8 | ;
  9 | ;   This program is free software; you can redistribute it and/or modify
 10 | ;   it under the terms of the GNU General Public License as published by
 11 | ;   the Free Software Foundation; either version 2 of the License, or
 12 | ;   (at your option) any later version.
 13 | ;
 14 | ;   This program is distributed in the hope that it will be useful,
 15 | ;   but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | ;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | ;   GNU General Public License for more details.
 18 | ;
 19 | ;   You should have received a copy of the GNU General Public License
 20 | ;   along with this program; if not, write to the Free Software
 21 | ;   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 22 | ;
 23 | 
 24 | .xmm
 25 | .model flat,c
 26 | 
 27 | .data
 28 | 
 29 | align 16
 30 | 
 31 | Ymask qword 2 dup(00FF00FF00FF00FFh)
 32 | 
 33 | .code
 34 | 
 35 | 
 36 | convYUY2to422_MMX proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword
 37 | 
 38 | 	public convYUY2to422_MMX		
 39 | 	
 40 | 		push ebx
 41 | 		push edi
 42 | 		push esi
 43 | 		
 44 | 		mov edi,src
 45 | 		mov ebx,py
 46 | 		mov edx,pu
 47 | 		mov esi,pv
 48 | 		mov ecx,width_
 49 | 		shr ecx,1
 50 | 		movq mm5,qword ptr Ymask
 51 | yloop:
 52 | 		xor eax,eax
 53 | 		align 16
 54 | xloop:
 55 | 		movq mm0,[edi+eax*4]   ;VYUYVYUY
 56 | 		movq mm1,[edi+eax*4+8] ;VYUYVYUY
 57 | 		movq mm2,mm0           ;VYUYVYUY
 58 | 		movq mm3,mm1           ;VYUYVYUY
 59 | 		pand mm0,mm5           ;0Y0Y0Y0Y
 60 | 		psrlw mm2,8 	       ;0V0U0V0U
 61 | 		pand mm1,mm5           ;0Y0Y0Y0Y
 62 | 		psrlw mm3,8            ;0V0U0V0U
 63 | 		packuswb mm0,mm1       ;YYYYYYYY
 64 | 		packuswb mm2,mm3       ;VUVUVUVU
 65 | 		movq mm4,mm2           ;VUVUVUVU
 66 | 		pand mm2,mm5           ;0U0U0U0U
 67 | 		psrlw mm4,8            ;0V0V0V0V
 68 | 		packuswb mm2,mm2       ;xxxxUUUU
 69 | 		packuswb mm4,mm4       ;xxxxVVVV
 70 | 		movq [ebx+eax*2],mm0   ;store y
 71 | 		movd dword ptr[edx+eax],mm2     ;store u
 72 | 		movd dword ptr[esi+eax],mm4     ;store v
 73 | 		add eax,4
 74 | 		cmp eax,ecx
 75 | 		jl short xloop
 76 | 		add edi,pitch1
 77 | 		add ebx,pitch2Y
 78 | 		add edx,pitch2UV
 79 | 		add esi,pitch2UV
 80 | 		dec height
 81 | 		jnz short yloop
 82 | 		emms
 83 | 		
 84 | 		pop esi
 85 | 		pop edi
 86 | 		pop ebx
 87 | 		
 88 | 		ret
 89 | 		
 90 | convYUY2to422_MMX endp
 91 | 
 92 | 
 93 | convYUY2to422_SSE2 proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword
 94 | 
 95 | 	public convYUY2to422_SSE2		
 96 | 	
 97 | 		push ebx
 98 | 		push edi
 99 | 		push esi
100 | 	
101 | 		mov edi,src
102 | 		mov ebx,py
103 | 		mov edx,pu
104 | 		mov esi,pv
105 | 		
106 | yloop_2:
107 | 		xor eax,eax
108 | 		mov ecx,width_
109 | 		shr ecx,1
110 | 		jz short suite1_2
111 | 		
112 | xloop_2:
113 | 	movdqa xmm0,XMMWORD ptr[edi+4*eax]   ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1
114 | 	movdqa xmm1,XMMWORD ptr[edi+4*eax+16]   ;V8Y16U8Y15V7Y14U7Y13 V6Y12U6Y11V5Y10U5Y9
115 | 	movdqa xmm2,xmm0                      ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1
116 | 	punpcklbw xmm0,xmm1                  ;V6V2Y12Y4U6U2Y11Y3 V5V1Y10Y2U5U1Y9Y1
117 | 	punpckhbw xmm2,xmm1                  ;V8V4Y16Y8U8U4Y15Y7 V7V3Y14Y6U7U3Y13Y5
118 | 	movdqa xmm1,xmm0
119 | 	punpcklbw xmm0,xmm2                  ;V7V5V3V1Y14Y10Y6Y2 U7U5U3U1Y13Y9Y5Y1
120 | 	punpckhbw xmm1,xmm2                  ;V8V6V4V2Y16Y12Y8Y4 U8U6U4U2Y15Y11Y7Y3
121 | 	movdqa xmm2,xmm0
122 | 	punpcklbw xmm0,xmm1                  ;U8U7U6U5U4U3U2U1 Y15Y13Y11Y9Y5Y3Y1
123 | 	punpckhbw xmm2,xmm1                  ;V8V7V6V5V4V3V2V1 Y16Y14Y12Y10Y8Y6Y4Y2
124 | 	movhps qword ptr [edx+eax],xmm0
125 | 	punpcklbw xmm0,xmm2                  ;Y16Y15Y14Y13Y12Y11Y10Y9Y8Y7Y6Y5Y4Y3Y2Y1
126 | 	movhps qword ptr [esi+eax],xmm2
127 | 	movdqa XMMWORD ptr[ebx+2*eax],xmm0
128 | 	add eax,8
129 | 	loop xloop_2
130 | 
131 | suite1_2:
132 | 		mov ecx,width_
133 | 		and ecx,1
134 | 		jz short suite2_2
135 | 		
136 | 	movdqa xmm0,XMMWORD ptr[edi+4*eax]   ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1
137 | 	movhlps xmm1,xmm0                    ;V4Y8U4Y7V3Y6U3Y5 V4Y8U4Y7V3Y6U3Y5
138 | 	punpcklbw xmm0,xmm1                  ;V4V2Y8Y4U4U2Y7Y3 V3V1Y6Y2U3U1Y5Y1
139 | 	movhlps xmm1,xmm0                    ;V4V2Y8Y4U4U2Y7Y3 V4V2Y8Y4U4U2Y7Y3
140 | 	punpcklbw xmm0,xmm1                  ;V4V3V2V1Y8Y6Y4Y2 U4U3U2U1Y7Y5Y3Y1
141 | 	movhlps xmm2,xmm0                    ;xxxxxxxx V4V3V2V1Y8Y6Y4Y2
142 | 	movdqa xmm1,xmm0
143 | 	psrlq xmm0,32                        ;0000V4V3V2V1 0000U4U3U2U1
144 | 	punpcklbw xmm1,xmm2                  ; xxxxxxxx Y8Y7Y6Y5Y4Y3Y2Y1
145 | 	movd dword ptr[edx+eax],xmm0
146 | 	movhlps xmm2,xmm0
147 | 	movq qword ptr[ebx+2*eax],xmm1
148 | 	movd dword ptr[esi+eax],xmm2
149 | 		
150 | 	
151 | suite2_2:	
152 | 		add edi,pitch1
153 | 		add ebx,pitch2Y
154 | 		add edx,pitch2UV
155 | 		add esi,pitch2UV
156 | 		dec height
157 | 		jnz yloop_2
158 | 		
159 | 		pop esi
160 | 		pop edi
161 | 		pop ebx
162 | 		
163 | 		ret
164 | 		
165 | convYUY2to422_SSE2 endp
166 | 
167 | 
168 | convYUY2to422_AVX proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword
169 | 
170 | 	public convYUY2to422_AVX		
171 | 	
172 | 		push ebx
173 | 		push edi
174 | 		push esi
175 | 	
176 | 		mov edi,src
177 | 		mov ebx,py
178 | 		mov edx,pu
179 | 		mov esi,pv
180 | 		
181 | yloop_2_AVX:
182 | 		xor eax,eax
183 | 		mov ecx,width_
184 | 		shr ecx,1
185 | 		jz short suite1_2_AVX
186 | 		
187 | xloop_2_AVX:
188 | 	vmovdqa xmm0,XMMWORD ptr[edi+4*eax]   ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1
189 | 	vmovdqa xmm1,XMMWORD ptr[edi+4*eax+16]   ;V8Y16U8Y15V7Y14U7Y13 V6Y12U6Y11V5Y10U5Y9
190 | 	vpunpckhbw xmm2,xmm0,xmm1                ;V8V4Y16Y8U8U4Y15Y7 V7V3Y14Y6U7U3Y13Y5
191 | 	vpunpcklbw xmm0,xmm0,xmm1                ;V6V2Y12Y4U6U2Y11Y3 V5V1Y10Y2U5U1Y9Y1
192 | 	vpunpckhbw xmm1,xmm0,xmm2                ;V8V6V4V2Y16Y12Y8Y4 U8U6U4U2Y15Y11Y7Y3
193 | 	vpunpcklbw xmm0,xmm0,xmm2                ;V7V5V3V1Y14Y10Y6Y2 U7U5U3U1Y13Y9Y5Y1
194 | 	vpunpckhbw xmm2,xmm0,xmm1                ;V8V7V6V5V4V3V2V1 Y16Y14Y12Y10Y8Y6Y4Y2
195 | 	vpunpcklbw xmm0,xmm0,xmm1                ;U8U7U6U5U4U3U2U1 Y15Y13Y11Y9Y5Y3Y1
196 | 	vmovhps qword ptr [edx+eax],xmm0
197 | 	vpunpcklbw xmm0,xmm0,xmm2                ;Y16Y15Y14Y13Y12Y11Y10Y9Y8Y7Y6Y5Y4Y3Y2Y1
198 | 	vmovhps qword ptr [esi+eax],xmm2
199 | 	vmovdqa XMMWORD ptr[ebx+2*eax],xmm0
200 | 	add eax,8
201 | 	loop xloop_2_AVX
202 | 
203 | suite1_2_AVX:
204 | 		mov ecx,width_
205 | 		and ecx,1
206 | 		jz short suite2_2_AVX
207 | 		
208 | 	vmovdqa xmm0,XMMWORD ptr[edi+4*eax]   ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1
209 | 	vmovhlps xmm1,xmm1,xmm0               ;V4Y8U4Y7V3Y6U3Y5 V4Y8U4Y7V3Y6U3Y5
210 | 	vpunpcklbw xmm0,xmm0,xmm1             ;V4V2Y8Y4U4U2Y7Y3 V3V1Y6Y2U3U1Y5Y1
211 | 	vmovhlps xmm1,xmm1,xmm0               ;V4V2Y8Y4U4U2Y7Y3 V4V2Y8Y4U4U2Y7Y3
212 | 	vpunpcklbw xmm0,xmm0,xmm1             ;V4V3V2V1Y8Y6Y4Y2 U4U3U2U1Y7Y5Y3Y1
213 | 	vmovhlps xmm2,xmm2,xmm0               ;xxxxxxxx V4V3V2V1Y8Y6Y4Y2
214 | 	vpunpcklbw xmm1,xmm0,xmm2             ; xxxxxxxx Y8Y7Y6Y5Y4Y3Y2Y1
215 | 	vpsrlq xmm0,xmm0,32                   ;0000V4V3V2V1 0000U4U3U2U1
216 | 	vmovd dword ptr[edx+eax],xmm0
217 | 	vmovhlps xmm2,xmm2,xmm0
218 | 	vmovq qword ptr[ebx+2*eax],xmm1
219 | 	vmovd dword ptr[esi+eax],xmm2
220 | 		
221 | 	
222 | suite2_2_AVX:	
223 | 		add edi,pitch1
224 | 		add ebx,pitch2Y
225 | 		add edx,pitch2UV
226 | 		add esi,pitch2UV
227 | 		dec height
228 | 		jnz yloop_2_AVX
229 | 		
230 | 		pop esi
231 | 		pop edi
232 | 		pop ebx
233 | 		
234 | 		ret
235 | 		
236 | convYUY2to422_AVX endp
237 | 
238 | 
239 | conv422toYUY2_MMX proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,pitch2:dword,width_:dword,height:dword
240 | 
241 | 	public conv422toYUY2_MMX		
242 | 	
243 | 		push ebx
244 | 		push edi
245 | 		push esi
246 | 	
247 | 		mov ebx,py
248 | 		mov edx,pu
249 | 		mov esi,pv
250 | 		mov edi,dst
251 | 		mov ecx,width_
252 | 		shr ecx,1
253 | yloop_3:
254 | 		xor eax,eax
255 | 		align 16
256 | xloop_3:
257 | 		movq mm0,[ebx+eax*2]   ;YYYYYYYY
258 | 		movd mm1,dword ptr[edx+eax]     ;0000UUUU
259 | 		movd mm2,dword ptr[esi+eax]     ;0000VVVV
260 | 		movq mm3,mm0           ;YYYYYYYY
261 | 		punpcklbw mm1,mm2      ;VUVUVUVU
262 | 		punpcklbw mm0,mm1      ;VYUYVYUY
263 | 		punpckhbw mm3,mm1      ;VYUYVYUY
264 | 		movq [edi+eax*4],mm0   ;store
265 | 		movq [edi+eax*4+8],mm3 ;store
266 | 		add eax,4
267 | 		cmp eax,ecx
268 | 		jl short xloop_3
269 | 		add ebx,pitch1Y
270 | 		add edx,pitch1UV
271 | 		add esi,pitch1UV
272 | 		add edi,pitch2
273 | 		dec height
274 | 		jnz short yloop_3
275 | 		emms
276 | 
277 | 		pop esi
278 | 		pop edi
279 | 		pop ebx
280 | 		
281 | 		ret
282 | 		
283 | conv422toYUY2_MMX endp
284 | 
285 | 
286 | conv422toYUY2_SSE2 proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,modulo2:dword,width_:dword,height:dword
287 | 
288 | 	public conv422toYUY2_SSE2		
289 | 	
290 | 		push ebx
291 | 		push edi
292 | 		push esi
293 | 		
294 | 		mov ebx,py
295 | 		mov edx,pu
296 | 		mov esi,pv
297 | 		mov edi,dst
298 | 		
299 | yloop_4:
300 | 		xor eax,eax
301 | 		mov ecx,width_
302 | 		shr ecx,1
303 | 		jz suite1
304 | 		
305 | xloop_4:
306 | 	movq xmm1,qword ptr[edx+4*eax]		;00000000UUUUUUUU
307 | 	movq xmm0,qword ptr[esi+4*eax]		;00000000VVVVVVVV
308 | 	movdqa xmm2,XMMWORD ptr[ebx+8*eax]	;YYYYYYYYYYYYYYYY	
309 | 	punpcklbw xmm1,xmm0					;VUVUVUVUVUVUVUVU
310 | 	movdqa xmm3,xmm2
311 | 	add eax,2
312 | 	punpcklbw xmm2,xmm1     			;VYUYVYUYVYUYVYUY
313 | 	punpckhbw xmm3,xmm1     			;VYUYVYUYVYUYVYUY
314 | 	
315 | 	movdqa XMMWORD ptr[edi],xmm2
316 | 	movdqa XMMWORD ptr[edi+16],xmm3
317 | 	add edi,32
318 | 
319 | 	loop xloop_4
320 | 		
321 | suite1:
322 | 	mov ecx,width_
323 | 	and ecx,1
324 | 	jz short suite2
325 | 	
326 | 	movd xmm1,dword ptr[edx+4*eax]		;000000000000UUUU
327 | 	movd xmm0,dword ptr[esi+4*eax]		;000000000000VVVV
328 | 	movq xmm2,qword ptr[ebx+8*eax]		;00000000YYYYYYYY
329 | 	punpcklbw xmm1,xmm0					;00000000VUVUVUVU
330 | 	punpcklbw xmm2,xmm1     			;VYUYVYUYVYUYVYUY
331 | 	
332 | 	movdqa XMMWORD ptr[edi],xmm2
333 | 	add edi,16
334 | 		
335 | suite2:		
336 | 		add ebx,pitch1Y
337 | 		add edx,pitch1UV
338 | 		add esi,pitch1UV
339 | 		add edi,modulo2
340 | 		dec height
341 | 		jnz short yloop_4
342 | 		
343 | 		pop esi
344 | 		pop edi
345 | 		pop ebx
346 | 		
347 | 		ret
348 | 		
349 | conv422toYUY2_SSE2 endp
350 | 
351 | 
352 | conv422toYUY2_AVX proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,modulo2:dword,width_:dword,height:dword
353 | 
354 | 	public conv422toYUY2_AVX		
355 | 	
356 | 		push ebx
357 | 		push edi
358 | 		push esi
359 | 		
360 | 		mov ebx,py
361 | 		mov edx,pu
362 | 		mov esi,pv
363 | 		mov edi,dst
364 | 		
365 | yloop_4_AVX:
366 | 		xor eax,eax
367 | 		mov ecx,width_
368 | 		shr ecx,1
369 | 		jz suite1_AVX
370 | 		
371 | xloop_4_AVX:
372 | 	vmovq xmm1,qword ptr[edx+4*eax]		;00000000UUUUUUUU
373 | 	vmovq xmm0,qword ptr[esi+4*eax]		;00000000VVVVVVVV
374 | 	vmovdqa xmm2,XMMWORD ptr[ebx+8*eax]	;YYYYYYYYYYYYYYYY	
375 | 	vpunpcklbw xmm1,xmm1,xmm0				;VUVUVUVUVUVUVUVU
376 | 	add eax,2
377 | 	vpunpckhbw xmm3,xmm2,xmm1     			;VYUYVYUYVYUYVYUY
378 | 	vpunpcklbw xmm2,xmm2,xmm1     			;VYUYVYUYVYUYVYUY
379 | 	
380 | 	vmovdqa XMMWORD ptr[edi],xmm2
381 | 	vmovdqa XMMWORD ptr[edi+16],xmm3
382 | 	add edi,32
383 | 
384 | 	loop xloop_4_AVX
385 | 		
386 | suite1_AVX:
387 | 	mov ecx,width_
388 | 	and ecx,1
389 | 	jz short suite2_AVX
390 | 	
391 | 	vmovd xmm1,dword ptr[edx+4*eax]		;000000000000UUUU
392 | 	vmovd xmm0,dword ptr[esi+4*eax]		;000000000000VVVV
393 | 	vmovq xmm2,qword ptr[ebx+8*eax]		;00000000YYYYYYYY
394 | 	vpunpcklbw xmm1,xmm1,xmm0			;00000000VUVUVUVU
395 | 	vpunpcklbw xmm2,xmm2,xmm1   		;VYUYVYUYVYUYVYUY
396 | 	
397 | 	vmovdqa XMMWORD ptr[edi],xmm2
398 | 	add edi,16
399 | 		
400 | suite2_AVX:		
401 | 		add ebx,pitch1Y
402 | 		add edx,pitch1UV
403 | 		add esi,pitch1UV
404 | 		add edi,modulo2
405 | 		dec height
406 | 		jnz short yloop_4_AVX
407 | 		
408 | 		pop esi
409 | 		pop edi
410 | 		pop ebx
411 | 		
412 | 		ret
413 | 		
414 | conv422toYUY2_AVX endp
415 | 
416 | 
417 | end
418 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/PlanarFrame_asm_x64.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ;   My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar 
  3 | ;   to packed conversions, and always gives 16 bit alignment for all
  4 | ;   planes.  Supports Y8/YV12/YV16/YV24/YUY2/RGB24 frames from avisynth, can do any planar
  5 | ;   format internally.
  6 | ;
  7 | ;   Copyright (C) 2005-2010 Kevin Stone
  8 | ;
  9 | ;   This program is free software; you can redistribute it and/or modify
 10 | ;   it under the terms of the GNU General Public License as published by
 11 | ;   the Free Software Foundation; either version 2 of the License, or
 12 | ;   (at your option) any later version.
 13 | ;
 14 | ;   This program is distributed in the hope that it will be useful,
 15 | ;   but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | ;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | ;   GNU General Public License for more details.
 18 | ;
 19 | ;   You should have received a copy of the GNU General Public License
 20 | ;   along with this program; if not, write to the Free Software
 21 | ;   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 22 | ;
 23 | 
 24 | .data
 25 | 
 26 | align 16
 27 | 
 28 | Ymask qword 2 dup(00FF00FF00FF00FFh)
 29 | 
 30 | .code
 31 | 
 32 | 
 33 | ;convYUY2to422_MMX proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword
 34 | ; src = rcx
 35 | ; py = rdx
 36 | ; pu = r8
 37 | ; pv = r9
 38 | 
 39 | convYUY2to422_MMX proc public frame
 40 | 
 41 | pitch1 equ dword ptr[rbp+48]
 42 | pitch2Y equ dword ptr[rbp+56]
 43 | pitch2UV equ dword ptr[rbp+64]
 44 | width_ equ dword ptr[rbp+72]
 45 | height equ dword ptr[rbp+80]
 46 | 
 47 | 	push rbp
 48 | 	.pushreg rbp
 49 | 	mov rbp,rsp
 50 | 	push rbx
 51 | 	.pushreg rbx
 52 | 	push rsi
 53 | 	.pushreg rsi
 54 | 	push rdi
 55 | 	.pushreg rdi
 56 | 	push r12
 57 | 	.pushreg r12
 58 | 	.endprolog
 59 | 		
 60 | 		mov rdi,rcx
 61 | 		mov rbx,rdx
 62 | 		mov rdx,r8
 63 | 		mov rsi,r9
 64 | 		xor rcx,rcx
 65 | 		mov ecx,width_
 66 | 		shr ecx,1
 67 | 		movq mm5,qword ptr Ymask
 68 | 		
 69 | 		xor r8,r8
 70 | 		mov r8d,height
 71 | 		movsxd r9,pitch1
 72 | 		movsxd r10,pitch2Y
 73 | 		movsxd r11,pitch2UV
 74 | 		mov r12,4
 75 | 		
 76 | yloop:
 77 | 		xor rax,rax
 78 | 		align 16
 79 | xloop:
 80 | 		movq mm0,[rdi+rax*4]   ;VYUYVYUY
 81 | 		movq mm1,[rdi+rax*4+8] ;VYUYVYUY
 82 | 		movq mm2,mm0           ;VYUYVYUY
 83 | 		movq mm3,mm1           ;VYUYVYUY
 84 | 		pand mm0,mm5           ;0Y0Y0Y0Y
 85 | 		psrlw mm2,8 	       ;0V0U0V0U
 86 | 		pand mm1,mm5           ;0Y0Y0Y0Y
 87 | 		psrlw mm3,8            ;0V0U0V0U
 88 | 		packuswb mm0,mm1       ;YYYYYYYY
 89 | 		packuswb mm2,mm3       ;VUVUVUVU
 90 | 		movq mm4,mm2           ;VUVUVUVU
 91 | 		pand mm2,mm5           ;0U0U0U0U
 92 | 		psrlw mm4,8            ;0V0V0V0V
 93 | 		packuswb mm2,mm2       ;xxxxUUUU
 94 | 		packuswb mm4,mm4       ;xxxxVVVV
 95 | 		movq [rbx+rax*2],mm0   ;store y
 96 | 		movd dword ptr[rdx+rax],mm2     ;store u
 97 | 		movd dword ptr[rsi+rax],mm4     ;store v
 98 | 		add rax,r12
 99 | 		cmp rax,rcx
100 | 		jl short xloop
101 | 		add rdi,r9
102 | 		add rbx,r10
103 | 		add rdx,r11
104 | 		add rsi,r11
105 | 		dec r8
106 | 		jnz short yloop
107 | 		emms
108 | 		
109 | 	pop r12
110 | 	pop rdi
111 | 	pop rsi
112 | 	pop rbx
113 | 	pop rbp		
114 | 				
115 | 		ret
116 | 		
117 | convYUY2to422_MMX endp
118 | 
119 | 
120 | ;convYUY2to422_SSE2 proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword
121 | ; src = rcx
122 | ; py = rdx
123 | ; pu = r8
124 | ; pv = r9
125 | 
126 | convYUY2to422_SSE2 proc public frame
127 | 	
128 | pitch1 equ dword ptr[rbp+48]
129 | pitch2Y equ dword ptr[rbp+56]
130 | pitch2UV equ dword ptr[rbp+64]
131 | width_ equ dword ptr[rbp+72]
132 | height equ dword ptr[rbp+80]
133 | 
134 | 	push rbp
135 | 	.pushreg rbp
136 | 	mov rbp,rsp
137 | 	push rbx
138 | 	.pushreg rbx
139 | 	push rsi
140 | 	.pushreg rsi
141 | 	push rdi
142 | 	.pushreg rdi
143 | 	push r12
144 | 	.pushreg r12
145 | 	push r13
146 | 	.pushreg r13
147 | 	push r14
148 | 	.pushreg r14
149 | 	push r15
150 | 	.pushreg r15
151 | 	.endprolog
152 | 		
153 | 		mov rdi,rcx
154 | 		mov rbx,rdx
155 | 		mov rdx,r8
156 | 		mov rsi,r9
157 | 		xor rcx,rcx
158 | 		mov r13d,width_
159 | 		
160 | 		xor r8,r8
161 | 		mov r8d,height
162 | 		movsxd r9,pitch1
163 | 		movsxd r10,pitch2Y
164 | 		movsxd r11,pitch2UV
165 | 		mov r12,8
166 | 		mov r14d,r13d
167 | 		shr r14d,1
168 | 		mov r15d,1		
169 | 		
170 | yloop_2:
171 | 		xor rax,rax
172 | 		mov ecx,r14d
173 | 		or ecx,ecx
174 | 		jz short suite1_2
175 | 		
176 | xloop_2:
177 | 	movdqa xmm0,XMMWORD ptr[rdi+4*rax]   ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1
178 | 	movdqa xmm1,XMMWORD ptr[rdi+4*rax+16]   ;V8Y16U8Y15V7Y14U7Y13 V6Y12U6Y11V5Y10U5Y9
179 | 	movdqa xmm2,xmm0                      ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1
180 | 	punpcklbw xmm0,xmm1                  ;V6V2Y12Y4U6U2Y11Y3 V5V1Y10Y2U5U1Y9Y1
181 | 	punpckhbw xmm2,xmm1                  ;V8V4Y16Y8U8U4Y15Y7 V7V3Y14Y6U7U3Y13Y5
182 | 	movdqa xmm1,xmm0
183 | 	punpcklbw xmm0,xmm2                  ;V7V5V3V1Y14Y10Y6Y2 U7U5U3U1Y13Y9Y5Y1
184 | 	punpckhbw xmm1,xmm2                  ;V8V6V4V2Y16Y12Y8Y4 U8U6U4U2Y15Y11Y7Y3
185 | 	movdqa xmm2,xmm0
186 | 	punpcklbw xmm0,xmm1                  ;U8U7U6U5U4U3U2U1 Y15Y13Y11Y9Y5Y3Y1
187 | 	punpckhbw xmm2,xmm1                  ;V8V7V6V5V4V3V2V1 Y16Y14Y12Y10Y8Y6Y4Y2
188 | 	movhps qword ptr [rdx+rax],xmm0
189 | 	punpcklbw xmm0,xmm2                  ;Y16Y15Y14Y13Y12Y11Y10Y9Y8Y7Y6Y5Y4Y3Y2Y1
190 | 	movhps qword ptr [rsi+rax],xmm2
191 | 	movdqa XMMWORD ptr[rbx+2*rax],xmm0
192 | 	add rax,r12
193 | 	loop xloop_2
194 | 
195 | suite1_2:
196 | 		mov ecx,r13d
197 | 		and ecx,r15d
198 | 		jz short suite2_2
199 | 
200 | 	movdqa xmm0,XMMWORD ptr[rdi+4*rax]   ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1
201 | 	movhlps xmm1,xmm0                    ;V4Y8U4Y7V3Y6U3Y5 V4Y8U4Y7V3Y6U3Y5
202 | 	punpcklbw xmm0,xmm1                  ;V4V2Y8Y4U4U2Y7Y3 V3V1Y6Y2U3U1Y5Y1
203 | 	movhlps xmm1,xmm0                    ;V4V2Y8Y4U4U2Y7Y3 V4V2Y8Y4U4U2Y7Y3
204 | 	punpcklbw xmm0,xmm1                  ;V4V3V2V1Y8Y6Y4Y2 U4U3U2U1Y7Y5Y3Y1
205 | 	movhlps xmm2,xmm0                    ;xxxxxxxx V4V3V2V1Y8Y6Y4Y2
206 | 	movdqa xmm1,xmm0
207 | 	psrlq xmm0,32                        ;0000V4V3V2V1 0000U4U3U2U1
208 | 	punpcklbw xmm1,xmm2                  ; xxxxxxxx Y8Y7Y6Y5Y4Y3Y2Y1
209 | 	movd dword ptr[rdx+rax],xmm0
210 | 	movhlps xmm2,xmm0
211 | 	movq qword ptr[rbx+2*rax],xmm1
212 | 	movd dword ptr[rsi+rax],xmm2	
213 | 	
214 | suite2_2:	
215 | 		add rdi,r9
216 | 		add rbx,r10
217 | 		add rdx,r11
218 | 		add rsi,r11
219 | 		dec r8
220 | 		jnz yloop_2
221 | 		
222 | 	pop r15
223 | 	pop r14
224 | 	pop r13
225 | 	pop r12
226 | 	pop rdi
227 | 	pop rsi
228 | 	pop rbx
229 | 	pop rbp		
230 | 				
231 | 		ret
232 | 		
233 | convYUY2to422_SSE2 endp
234 | 
235 | 
236 | ;convYUY2to422_AVX proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword
237 | ; src = rcx
238 | ; py = rdx
239 | ; pu = r8
240 | ; pv = r9
241 | 
242 | convYUY2to422_AVX proc public frame
243 | 	
244 | pitch1 equ dword ptr[rbp+48]
245 | pitch2Y equ dword ptr[rbp+56]
246 | pitch2UV equ dword ptr[rbp+64]
247 | width_ equ dword ptr[rbp+72]
248 | height equ dword ptr[rbp+80]
249 | 
250 | 	push rbp
251 | 	.pushreg rbp
252 | 	mov rbp,rsp
253 | 	push rbx
254 | 	.pushreg rbx
255 | 	push rsi
256 | 	.pushreg rsi
257 | 	push rdi
258 | 	.pushreg rdi
259 | 	push r12
260 | 	.pushreg r12
261 | 	push r13
262 | 	.pushreg r13
263 | 	push r14
264 | 	.pushreg r14
265 | 	push r15
266 | 	.pushreg r15
267 | 	.endprolog
268 | 		
269 | 		mov rdi,rcx
270 | 		mov rbx,rdx
271 | 		mov rdx,r8
272 | 		mov rsi,r9
273 | 		xor rcx,rcx
274 | 		mov r13d,width_
275 | 		
276 | 		xor r8,r8
277 | 		mov r8d,height
278 | 		movsxd r9,pitch1
279 | 		movsxd r10,pitch2Y
280 | 		movsxd r11,pitch2UV
281 | 		mov r12,8
282 | 		mov r14d,r13d
283 | 		shr r14d,1
284 | 		mov r15d,1		
285 | 		
286 | yloop_2_AVX:
287 | 		xor rax,rax
288 | 		mov ecx,r14d
289 | 		or ecx,ecx
290 | 		jz short suite1_2_AVX
291 | 		
292 | xloop_2_AVX:
293 | 	vmovdqa xmm0,XMMWORD ptr[rdi+4*rax]   ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1
294 | 	vmovdqa xmm1,XMMWORD ptr[rdi+4*rax+16]   ;V8Y16U8Y15V7Y14U7Y13 V6Y12U6Y11V5Y10U5Y9
295 | 	vpunpckhbw xmm2,xmm0,xmm1                ;V8V4Y16Y8U8U4Y15Y7 V7V3Y14Y6U7U3Y13Y5
296 | 	vpunpcklbw xmm0,xmm0,xmm1                ;V6V2Y12Y4U6U2Y11Y3 V5V1Y10Y2U5U1Y9Y1
297 | 	vpunpckhbw xmm1,xmm0,xmm2                ;V8V6V4V2Y16Y12Y8Y4 U8U6U4U2Y15Y11Y7Y3
298 | 	vpunpcklbw xmm0,xmm0,xmm2                ;V7V5V3V1Y14Y10Y6Y2 U7U5U3U1Y13Y9Y5Y1
299 | 	vpunpckhbw xmm2,xmm0,xmm1                ;V8V7V6V5V4V3V2V1 Y16Y14Y12Y10Y8Y6Y4Y2
300 | 	vpunpcklbw xmm0,xmm0,xmm1                ;U8U7U6U5U4U3U2U1 Y15Y13Y11Y9Y5Y3Y1
301 | 	vmovhps qword ptr [rdx+rax],xmm0
302 | 	vpunpcklbw xmm0,xmm0,xmm2                ;Y16Y15Y14Y13Y12Y11Y10Y9Y8Y7Y6Y5Y4Y3Y2Y1
303 | 	vmovhps qword ptr [rsi+rax],xmm2
304 | 	vmovdqa XMMWORD ptr[rbx+2*rax],xmm0
305 | 	add rax,r12
306 | 	loop xloop_2_AVX
307 | 
308 | suite1_2_AVX:
309 | 		mov ecx,r13d
310 | 		and ecx,r15d
311 | 		jz short suite2_2_AVX
312 | 
313 | 	vmovdqa xmm0,XMMWORD ptr[rdi+4*rax]   ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1
314 | 	vmovhlps xmm1,xmm1,xmm0               ;V4Y8U4Y7V3Y6U3Y5 V4Y8U4Y7V3Y6U3Y5
315 | 	vpunpcklbw xmm0,xmm0,xmm1             ;V4V2Y8Y4U4U2Y7Y3 V3V1Y6Y2U3U1Y5Y1
316 | 	vmovhlps xmm1,xmm1,xmm0               ;V4V2Y8Y4U4U2Y7Y3 V4V2Y8Y4U4U2Y7Y3
317 | 	vpunpcklbw xmm0,xmm0,xmm1             ;V4V3V2V1Y8Y6Y4Y2 U4U3U2U1Y7Y5Y3Y1
318 | 	vmovhlps xmm2,xmm2,xmm0               ;xxxxxxxx V4V3V2V1Y8Y6Y4Y2
319 | 	vpunpcklbw xmm1,xmm0,xmm2             ; xxxxxxxx Y8Y7Y6Y5Y4Y3Y2Y1
320 | 	vpsrlq xmm0,xmm0,32                   ;0000V4V3V2V1 0000U4U3U2U1
321 | 	vmovd dword ptr[rdx+rax],xmm0
322 | 	vmovhlps xmm2,xmm2,xmm0
323 | 	vmovq qword ptr[rbx+2*rax],xmm1
324 | 	vmovd dword ptr[rsi+rax],xmm2	
325 | 	
326 | suite2_2_AVX:	
327 | 		add rdi,r9
328 | 		add rbx,r10
329 | 		add rdx,r11
330 | 		add rsi,r11
331 | 		dec r8
332 | 		jnz yloop_2_AVX
333 | 		
334 | 	pop r15
335 | 	pop r14
336 | 	pop r13
337 | 	pop r12
338 | 	pop rdi
339 | 	pop rsi
340 | 	pop rbx
341 | 	pop rbp		
342 | 				
343 | 		ret
344 | 		
345 | convYUY2to422_AVX endp
346 | 
347 | 
348 | ;conv422toYUY2_MMX proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,pitch2:dword,width_:dword,height:dword
349 | ; py = rcx
350 | ; pu = rdx
351 | ; pv = r8
352 | ; dst = r9
353 | 
354 | conv422toYUY2_MMX proc public frame
355 | 	
356 | pitch1Y equ dword ptr[rbp+48]
357 | pitch1UV equ dword ptr[rbp+56]
358 | pitch2 equ dword ptr[rbp+64]
359 | width_ equ dword ptr[rbp+72]
360 | height equ dword ptr[rbp+80]
361 | 
362 | 	push rbp
363 | 	.pushreg rbp
364 | 	mov rbp,rsp
365 | 	push rbx
366 | 	.pushreg rbx
367 | 	push rsi
368 | 	.pushreg rsi
369 | 	push rdi
370 | 	.pushreg rdi
371 | 	push r12
372 | 	.pushreg r12
373 | 	.endprolog
374 | 			
375 | 		mov rbx,rcx
376 | 		mov rsi,r8
377 | 		mov rdi,r9
378 | 		xor rcx,rcx
379 | 		mov ecx,width_
380 | 		shr ecx,1
381 | 		
382 | 		xor r8,r8
383 | 		mov r8d,height
384 | 		movsxd r9,pitch1Y
385 | 		movsxd r10,pitch1UV
386 | 		movsxd r11,pitch2
387 | 		mov r12,4			
388 | 		
389 | yloop_3:
390 | 		xor rax,rax
391 | 		align 16
392 | xloop_3:
393 | 		movq mm0,[rbx+rax*2]   ;YYYYYYYY
394 | 		movd mm1,dword ptr[rdx+rax]     ;0000UUUU
395 | 		movd mm2,dword ptr[rsi+rax]     ;0000VVVV
396 | 		movq mm3,mm0           ;YYYYYYYY
397 | 		punpcklbw mm1,mm2      ;VUVUVUVU
398 | 		punpcklbw mm0,mm1      ;VYUYVYUY
399 | 		punpckhbw mm3,mm1      ;VYUYVYUY
400 | 		movq [rdi+rax*4],mm0   ;store
401 | 		movq [rdi+rax*4+8],mm3 ;store
402 | 		add rax,r12
403 | 		cmp rax,rcx
404 | 		jl short xloop_3
405 | 		add rbx,r9
406 | 		add rdx,r10
407 | 		add rsi,r10
408 | 		add rdi,r11
409 | 		dec r8
410 | 		jnz short yloop_3
411 | 		emms
412 | 
413 | 	pop r12
414 | 	pop rdi
415 | 	pop rsi
416 | 	pop rbx
417 | 	pop rbp		
418 | 		
419 | 		ret
420 | 		
421 | conv422toYUY2_MMX endp
422 | 
423 | 
424 | ;conv422toYUY2_SSE2 proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,pitch2:dword,width_:dword,height:dword
425 | ; py = rcx
426 | ; pu = rdx
427 | ; pv = r8
428 | ; dst = r9
429 | 
430 | conv422toYUY2_SSE2 proc public frame
431 | 	
432 | pitch1Y equ dword ptr[rbp+48]
433 | pitch1UV equ dword ptr[rbp+56]
434 | pitch2 equ dword ptr[rbp+64]
435 | width_ equ dword ptr[rbp+72]
436 | height equ dword ptr[rbp+80]
437 | 
438 | 	push rbp
439 | 	.pushreg rbp
440 | 	mov rbp,rsp
441 | 	push rbx
442 | 	.pushreg rbx
443 | 	push rsi
444 | 	.pushreg rsi
445 | 	push rdi
446 | 	.pushreg rdi
447 | 	push r12
448 | 	.pushreg r12
449 | 	push r13
450 | 	.pushreg r13
451 | 	push r14
452 | 	.pushreg r14
453 | 	push r15
454 | 	.pushreg r15
455 | 	.endprolog
456 | 			
457 | 		mov rbx,rcx
458 | 		mov rsi,r8
459 | 		mov rdi,r9
460 | 		xor rcx,rcx
461 | 		mov r15d,width_
462 | 		shr ecx,1
463 | 		
464 | 		xor r8,r8
465 | 		mov r8d,height
466 | 		movsxd r9,pitch1Y
467 | 		movsxd r10,pitch1UV
468 | 		movsxd r11,pitch2
469 | 		mov r12,16
470 | 		mov r13,32
471 | 		mov r14,2
472 | 		
473 | yloop_4:
474 | 		xor rax,rax
475 | 		mov ecx,r15d
476 | 		shr ecx,1
477 | 		jz short suite1
478 | 		
479 | xloop_4:
480 | 		movq xmm1,qword ptr[rdx+4*rax]     ;00000000UUUUUUUU
481 | 		movq xmm0,qword ptr[rsi+4*rax]     ;00000000VVVVVVVV
482 | 		movdqa xmm2,XMMWORD ptr[rbx+8*rax] ;YYYYYYYYYYYYYYYY
483 | 		punpcklbw xmm1,xmm0					;VUVUVUVUVUVUVUVU
484 | 		movdqa xmm3,xmm2
485 | 		add rax,r14
486 | 		punpcklbw xmm2,xmm1     			;VYUYVYUYVYUYVYUY
487 | 		punpckhbw xmm3,xmm1     			;VYUYVYUYVYUYVYUY
488 | 		
489 | 		movdqa XMMWORD ptr[rdi],xmm2 ;store
490 | 		movdqa XMMWORD ptr[rdi+r12],xmm3 ;store
491 | 		add rdi,r13
492 | 		loop xloop_4
493 | 		
494 | suite1:		
495 | 		mov ecx,r15d
496 | 		and ecx,1
497 | 		jz short suite2
498 | 
499 | 		movd xmm1,dword ptr[rdx+4*rax]     ;000000000000UUUU
500 | 		movd xmm0,dword ptr[rsi+4*rax]     ;000000000000VVVV
501 | 		movq xmm2,qword ptr[rbx+8*rax] ;00000000YYYYYYY
502 | 		punpcklbw xmm1,xmm0					;00000000VUVUVUVU
503 | 		punpcklbw xmm2,xmm1     			;VYUYVYUYVYUYVYUY
504 | 		
505 | 		movdqa XMMWORD ptr[rdi],xmm2 ;store
506 | 		add rdi,r12			
507 | 		
508 | suite2:		
509 | 		add rbx,r9
510 | 		add rdx,r10
511 | 		add rsi,r10
512 | 		add rdi,r11
513 | 		dec r8
514 | 		jnz short yloop_4
515 | 		
516 | 	pop r15
517 | 	pop r14
518 | 	pop r13
519 | 	pop r12
520 | 	pop rdi
521 | 	pop rsi
522 | 	pop rbx
523 | 	pop rbp		
524 | 				
525 | 		ret
526 | 		
527 | conv422toYUY2_SSE2 endp
528 | 
529 | 
530 | ;conv422toYUY2_AVX proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,pitch2:dword,width_:dword,height:dword
531 | ; py = rcx
532 | ; pu = rdx
533 | ; pv = r8
534 | ; dst = r9
535 | 
536 | conv422toYUY2_AVX proc public frame
537 | 	
538 | pitch1Y equ dword ptr[rbp+48]
539 | pitch1UV equ dword ptr[rbp+56]
540 | pitch2 equ dword ptr[rbp+64]
541 | width_ equ dword ptr[rbp+72]
542 | height equ dword ptr[rbp+80]
543 | 
544 | 	push rbp
545 | 	.pushreg rbp
546 | 	mov rbp,rsp
547 | 	push rbx
548 | 	.pushreg rbx
549 | 	push rsi
550 | 	.pushreg rsi
551 | 	push rdi
552 | 	.pushreg rdi
553 | 	push r12
554 | 	.pushreg r12
555 | 	push r13
556 | 	.pushreg r13
557 | 	push r14
558 | 	.pushreg r14
559 | 	push r15
560 | 	.pushreg r15
561 | 	.endprolog
562 | 			
563 | 		mov rbx,rcx
564 | 		mov rsi,r8
565 | 		mov rdi,r9
566 | 		xor rcx,rcx
567 | 		mov r15d,width_
568 | 		shr ecx,1
569 | 		
570 | 		xor r8,r8
571 | 		mov r8d,height
572 | 		movsxd r9,pitch1Y
573 | 		movsxd r10,pitch1UV
574 | 		movsxd r11,pitch2
575 | 		mov r12,16
576 | 		mov r13,32
577 | 		mov r14,2
578 | 		
579 | yloop_4_AVX:
580 | 		xor rax,rax
581 | 		mov ecx,r15d
582 | 		shr ecx,1
583 | 		jz short suite1_AVX
584 | 		
585 | xloop_4_AVX:
586 | 		vmovq xmm1,qword ptr[rdx+4*rax]     ;00000000UUUUUUUU
587 | 		vmovq xmm0,qword ptr[rsi+4*rax]     ;00000000VVVVVVVV
588 | 		vmovdqa xmm2,XMMWORD ptr[rbx+8*rax] ;YYYYYYYYYYYYYYYY
589 | 		vpunpcklbw xmm1,xmm1,xmm0				;VUVUVUVUVUVUVUVU
590 | 		add rax,r14
591 | 		vpunpckhbw xmm3,xmm2,xmm1     			;VYUYVYUYVYUYVYUY
592 | 		vpunpcklbw xmm2,xmm2,xmm1     			;VYUYVYUYVYUYVYUY
593 | 		
594 | 		vmovdqa XMMWORD ptr[rdi],xmm2 ;store
595 | 		vmovdqa XMMWORD ptr[rdi+r12],xmm3 ;store
596 | 		add rdi,r13
597 | 		loop xloop_4_AVX
598 | 		
599 | suite1_AVX:		
600 | 		mov ecx,r15d
601 | 		and ecx,1
602 | 		jz short suite2_AVX
603 | 
604 | 		vmovd xmm1,dword ptr[rdx+4*rax]     ;000000000000UUUU
605 | 		vmovd xmm0,dword ptr[rsi+4*rax]     ;000000000000VVVV
606 | 		vmovq xmm2,qword ptr[rbx+8*rax] ;00000000YYYYYYY
607 | 		vpunpcklbw xmm1,xmm1,xmm0				;00000000VUVUVUVU
608 | 		vpunpcklbw xmm2,xmm2,xmm1     			;VYUYVYUYVYUYVYUY
609 | 		
610 | 		vmovdqa XMMWORD ptr[rdi],xmm2 ;store
611 | 		add rdi,r12			
612 | 		
613 | suite2_AVX:		
614 | 		add rbx,r9
615 | 		add rdx,r10
616 | 		add rsi,r10
617 | 		add rdi,r11
618 | 		dec r8
619 | 		jnz short yloop_4_AVX
620 | 		
621 | 	pop r15
622 | 	pop r14
623 | 	pop r13
624 | 	pop r12
625 | 	pop rdi
626 | 	pop rsi
627 | 	pop rbx
628 | 	pop rbp		
629 | 				
630 | 		ret
631 | 		
632 | conv422toYUY2_AVX endp
633 | 
634 | 
635 | end
636 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/Plugins_JPSDR.vcxproj.filters:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup>
  4 |     <Filter Include="Fichiers sources">
  5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
  6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
  7 |     </Filter>
  8 |     <Filter Include="Fichiers d%27en-tête">
  9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
 10 |       <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
 11 |     </Filter>
 12 |     <Filter Include="Fichiers de ressources">
 13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
 14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
 15 |     </Filter>
 16 |   </ItemGroup>
 17 |   <ItemGroup>
 18 |     <ClCompile Include="nnedi3.cpp">
 19 |       <Filter>Fichiers sources</Filter>
 20 |     </ClCompile>
 21 |     <ClCompile Include="PlanarFrame.cpp">
 22 |       <Filter>Fichiers sources</Filter>
 23 |     </ClCompile>
 24 |     <ClCompile Include="ThreadPool.cpp">
 25 |       <Filter>Fichiers sources</Filter>
 26 |     </ClCompile>
 27 |     <ClCompile Include="ThreadPoolInterface.cpp">
 28 |       <Filter>Fichiers sources</Filter>
 29 |     </ClCompile>
 30 |     <ClCompile Include="AutoYUY2.cpp">
 31 |       <Filter>Fichiers sources</Filter>
 32 |     </ClCompile>
 33 |     <ClCompile Include="resample.cpp">
 34 |       <Filter>Fichiers sources</Filter>
 35 |     </ClCompile>
 36 |     <ClCompile Include="resample_functions.cpp">
 37 |       <Filter>Fichiers sources</Filter>
 38 |     </ClCompile>
 39 |     <ClCompile Include="plugins_JPSDR.cpp">
 40 |       <Filter>Fichiers sources</Filter>
 41 |     </ClCompile>
 42 |     <ClCompile Include="MatrixClass.cpp">
 43 |       <Filter>Fichiers sources</Filter>
 44 |     </ClCompile>
 45 |     <ClCompile Include="aWarpSharp.cpp">
 46 |       <Filter>Fichiers sources</Filter>
 47 |     </ClCompile>
 48 |     <ClCompile Include="HDRTools.cpp">
 49 |       <Filter>Fichiers sources</Filter>
 50 |     </ClCompile>
 51 |     <ClCompile Include="TransferFunctions.cpp">
 52 |       <Filter>Fichiers sources</Filter>
 53 |     </ClCompile>
 54 |     <ClCompile Include="resample_avx2.cpp">
 55 |       <Filter>Fichiers sources</Filter>
 56 |     </ClCompile>
 57 |     <ClCompile Include="resample_sse.cpp">
 58 |       <Filter>Fichiers sources</Filter>
 59 |     </ClCompile>
 60 |   </ItemGroup>
 61 |   <ItemGroup>
 62 |     <ClInclude Include="avisynth.h">
 63 |       <Filter>Fichiers d%27en-tête</Filter>
 64 |     </ClInclude>
 65 |     <ClInclude Include="nnedi3.h">
 66 |       <Filter>Fichiers d%27en-tête</Filter>
 67 |     </ClInclude>
 68 |     <ClInclude Include="PlanarFrame.h">
 69 |       <Filter>Fichiers d%27en-tête</Filter>
 70 |     </ClInclude>
 71 |     <ClInclude Include="resource.h">
 72 |       <Filter>Fichiers d%27en-tête</Filter>
 73 |     </ClInclude>
 74 |     <ClInclude Include="ThreadPool.h">
 75 |       <Filter>Fichiers d%27en-tête</Filter>
 76 |     </ClInclude>
 77 |     <ClInclude Include="ThreadPoolDef.h">
 78 |       <Filter>Fichiers d%27en-tête</Filter>
 79 |     </ClInclude>
 80 |     <ClInclude Include="ThreadPoolInterface.h">
 81 |       <Filter>Fichiers d%27en-tête</Filter>
 82 |     </ClInclude>
 83 |     <ClInclude Include="resample.h">
 84 |       <Filter>Fichiers d%27en-tête</Filter>
 85 |     </ClInclude>
 86 |     <ClInclude Include="resample_functions.h">
 87 |       <Filter>Fichiers d%27en-tête</Filter>
 88 |     </ClInclude>
 89 |     <ClInclude Include="MatrixClass.h">
 90 |       <Filter>Fichiers d%27en-tête</Filter>
 91 |     </ClInclude>
 92 |     <ClInclude Include="aWarpSharp.h">
 93 |       <Filter>Fichiers d%27en-tête</Filter>
 94 |     </ClInclude>
 95 |     <ClInclude Include="AutoYUY2.h">
 96 |       <Filter>Fichiers d%27en-tête</Filter>
 97 |     </ClInclude>
 98 |     <ClInclude Include="HDRTools.h">
 99 |       <Filter>Fichiers d%27en-tête</Filter>
100 |     </ClInclude>
101 |     <ClInclude Include="TransferFunctions.h">
102 |       <Filter>Fichiers d%27en-tête</Filter>
103 |     </ClInclude>
104 |     <ClInclude Include="resample_avx2.h">
105 |       <Filter>Fichiers d%27en-tête</Filter>
106 |     </ClInclude>
107 |     <ClInclude Include="resample_sse.h">
108 |       <Filter>Fichiers d%27en-tête</Filter>
109 |     </ClInclude>
110 |   </ItemGroup>
111 |   <ItemGroup>
112 |     <MASM Include="AutoYUY2_asm.asm">
113 |       <Filter>Fichiers sources</Filter>
114 |     </MASM>
115 |     <MASM Include="nnedi3_asm.asm">
116 |       <Filter>Fichiers sources</Filter>
117 |     </MASM>
118 |     <MASM Include="nnedi3_asm_FMA.asm">
119 |       <Filter>Fichiers sources</Filter>
120 |     </MASM>
121 |     <MASM Include="PlanarFrame_asm.asm">
122 |       <Filter>Fichiers sources</Filter>
123 |     </MASM>
124 |     <MASM Include="MatrixClass_x64.asm">
125 |       <Filter>Fichiers sources</Filter>
126 |     </MASM>
127 |     <MASM Include="MatrixClass_x86.asm">
128 |       <Filter>Fichiers sources</Filter>
129 |     </MASM>
130 |     <MASM Include="aWarpSharp_asm.asm">
131 |       <Filter>Fichiers sources</Filter>
132 |     </MASM>
133 |     <MASM Include="aWarpSharp_asm_x64.asm">
134 |       <Filter>Fichiers sources</Filter>
135 |     </MASM>
136 |     <MASM Include="AutoYUY2_AVX2_asm.asm">
137 |       <Filter>Fichiers sources</Filter>
138 |     </MASM>
139 |     <MASM Include="AutoYUY2_AVX2_asm_x64.asm">
140 |       <Filter>Fichiers sources</Filter>
141 |     </MASM>
142 |     <MASM Include="HDRTools_asm.asm">
143 |       <Filter>Fichiers sources</Filter>
144 |     </MASM>
145 |     <MASM Include="HDRTools_asm_x64.asm">
146 |       <Filter>Fichiers sources</Filter>
147 |     </MASM>
148 |     <MASM Include="HDRTools_AVX2_asm.asm">
149 |       <Filter>Fichiers sources</Filter>
150 |     </MASM>
151 |     <MASM Include="HDRTools_AVX2_asm_x64.asm">
152 |       <Filter>Fichiers sources</Filter>
153 |     </MASM>
154 |   </ItemGroup>
155 |   <ItemGroup>
156 |     <CustomBuild Include="AutoYUY2_asm_x64.asm">
157 |       <Filter>Fichiers sources</Filter>
158 |     </CustomBuild>
159 |     <CustomBuild Include="nnedi3_asm_FMA_x64.asm">
160 |       <Filter>Fichiers sources</Filter>
161 |     </CustomBuild>
162 |     <CustomBuild Include="nnedi3_asm_x64.asm">
163 |       <Filter>Fichiers sources</Filter>
164 |     </CustomBuild>
165 |     <CustomBuild Include="PlanarFrame_asm_x64.asm">
166 |       <Filter>Fichiers sources</Filter>
167 |     </CustomBuild>
168 |   </ItemGroup>
169 |   <ItemGroup>
170 |     <ResourceCompile Include="plugins_JPSDR.rc">
171 |       <Filter>Fichiers de ressources</Filter>
172 |     </ResourceCompile>
173 |   </ItemGroup>
174 |   <ItemGroup>
175 |     <None Include="binary1.bin">
176 |       <Filter>Fichiers de ressources</Filter>
177 |     </None>
178 |   </ItemGroup>
179 | </Project>


--------------------------------------------------------------------------------
/Plugins_JPSDR/Plugins_JPSDR.vcxproj.user:
--------------------------------------------------------------------------------
1 | ﻿<?xml version="1.0" encoding="utf-8"?>
2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 | </Project>


--------------------------------------------------------------------------------
/Plugins_JPSDR/ThreadPool.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpsdr/plugins_JPSDR/7826b045cfc28a0738088ec03a5e6f2edd44c9f0/Plugins_JPSDR/ThreadPool.cpp


--------------------------------------------------------------------------------
/Plugins_JPSDR/ThreadPool.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Threadpool
  3 |  *
  4 |  *  Create and manage a threadpool.
  5 |  *  Copyright (C) 2016 JPSDR
  6 |  *	
  7 |  *  Threadpool is free software; you can redistribute it and/or modify
  8 |  *  it under the terms of the GNU General Public License as published by
  9 |  *  the Free Software Foundation; either version 2, or (at your option)
 10 |  *  any later version.
 11 |  *   
 12 |  *  Threadpool is distributed in the hope that it will be useful,
 13 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 |  *  GNU General Public License for more details.
 16 |  *   
 17 |  *  You should have received a copy of the GNU General Public License
 18 |  *  along with GNU Make; see the file COPYING.  If not, write to
 19 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 20 |  *
 21 |  */
 22 | 
 23 | #ifndef __ThreadPool_H__
 24 | #define __ThreadPool_H__
 25 | 
 26 | #include <windows.h>
 27 | 
 28 | #include "./ThreadPoolDef.h"
 29 | 
 30 | #define THREADPOOL_VERSION "ThreadPool 1.4.4"
 31 | 
 32 | #define MAX_PHYSICAL_CORES 64
 33 | 
 34 | typedef struct _MT_Data_Thread
 35 | {
 36 | 	Public_MT_Data_Thread *MTData;
 37 | 	uint8_t f_process,thread_Id;
 38 | 	HANDLE nextJob,jobFinished;
 39 | } MT_Data_Thread;
 40 | 
 41 | 
 42 | typedef struct _Arch_CPU
 43 | {
 44 | 	uint8_t NbPhysCore,NbLogicCPU;
 45 | 	uint8_t NbHT[MAX_PHYSICAL_CORES];
 46 | 	ULONG_PTR ProcMask[MAX_PHYSICAL_CORES];
 47 | 	ULONG_PTR FullMask;
 48 | } Arch_CPU;
 49 | 
 50 | 
 51 | class ThreadPool
 52 | {
 53 | 	public :
 54 | 	ThreadPool(void);
 55 | 	virtual ~ThreadPool();
 56 | 
 57 | 	protected :
 58 | 
 59 | 	Arch_CPU CPU;
 60 | 
 61 | 	public :
 62 | 
 63 | 	uint8_t GetThreadNumber(uint8_t thread_number,bool logical);
 64 | 	bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,
 65 | 		bool SetAffinity,bool sleep,ThreadLevelName priority);
 66 | 	bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,
 67 | 		bool SetAffinity,bool sleep)
 68 | 		{return(AllocateThreads(thread_number,offset_core,offset_ht,UseMaxPhysCore,SetAffinity,sleep,NormalThreadLevel));}
 69 | 	bool DeAllocateThreads(void);
 70 | 	bool ChangeThreadsAffinity(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity);
 71 | 	bool ChangeThreadsLevel(ThreadLevelName priority);
 72 | 	bool RequestThreadPool(uint8_t thread_number,Public_MT_Data_Thread *Data,ThreadLevelName priority);
 73 | 	bool RequestThreadPool(uint8_t thread_number,Public_MT_Data_Thread *Data)
 74 | 		{return(RequestThreadPool(thread_number,Data,NoneThreadLevel));}
 75 | 	bool ReleaseThreadPool(bool sleep);
 76 | 	bool StartThreads(void);
 77 | 	bool WaitThreadsEnd(void);
 78 | 	bool GetThreadPoolStatus(void) {return(Status_Ok);}
 79 | 	uint8_t GetCurrentThreadAllocated(void) {return(CurrentThreadsAllocated);}
 80 | 	uint8_t GetCurrentThreadUsed(void) {return(CurrentThreadsUsed);}
 81 | 	uint8_t GetLogicalCPUNumber(void) {return(CPU.NbLogicCPU);}
 82 | 	uint8_t GetPhysicalCoreNumber(void) {return(CPU.NbPhysCore);}
 83 | 
 84 | 	protected :
 85 | 
 86 | 	MT_Data_Thread MT_Thread[MAX_MT_THREADS];
 87 | 	HANDLE nextJob[MAX_MT_THREADS],jobFinished[MAX_MT_THREADS];
 88 | 	HANDLE thds[MAX_MT_THREADS];
 89 | 	DWORD tids[MAX_MT_THREADS];
 90 | 	ULONG_PTR ThreadMask[MAX_MT_THREADS];
 91 | 	bool ThreadSleep[MAX_MT_THREADS];
 92 | 	ThreadLevelName nPriority;
 93 | 
 94 | 	bool Status_Ok;
 95 | 	uint8_t TotalThreadsRequested,CurrentThreadsAllocated,CurrentThreadsUsed;
 96 | 	
 97 | 	void FreeThreadPool(void);
 98 | 	void DestroyThreadPool(void);
 99 | 	void CreateThreadPool(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity,
100 | 		bool sleep,ThreadLevelName priority);
101 | 
102 | 	private :
103 | 
104 | 	static DWORD WINAPI StaticThreadpool(LPVOID lpParam);
105 | 
106 | 	ThreadPool (const ThreadPool &other);
107 | 	ThreadPool& operator = (const ThreadPool &other);
108 | 	bool operator == (const ThreadPool &other) const;
109 | 	bool operator != (const ThreadPool &other) const;
110 | };
111 | 
112 | #endif // __ThreadPool_H__
113 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/ThreadPoolDef.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Threadpool
 3 |  *
 4 |  *  Create and manage a threadpool.
 5 |  *  Copyright (C) 2016 JPSDR
 6 |  *	
 7 |  *  Threadpool is free software; you can redistribute it and/or modify
 8 |  *  it under the terms of the GNU General Public License as published by
 9 |  *  the Free Software Foundation; either version 2, or (at your option)
10 |  *  any later version.
11 |  *   
12 |  *  Threadpool is distributed in the hope that it will be useful,
13 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 |  *  GNU General Public License for more details.
16 |  *   
17 |  *  You should have received a copy of the GNU General Public License
18 |  *  along with GNU Make; see the file COPYING.  If not, write to
19 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
20 |  *
21 |  */
22 | 
23 | #ifndef __ThreadPoolDef_H__
24 | #define __ThreadPoolDef_H__
25 | 
26 | #include <stdint.h>
27 | 
28 | #define MAX_MT_THREADS 128  // Maximum possible 255
29 | #define MAX_THREAD_POOL 64  // Maximum possible 127
30 | 
31 | typedef void (*ThreadPoolFunction)(void *ptr);
32 | 
33 | enum ThreadLevelName {NoneThreadLevel,IdleThreadLevel,LowestThreadLevel,BelowThreadLevel,
34 | 	NormalThreadLevel,AboveThreadLevel,HighestThreadLevel,CriticalThreadLevel};
35 | 
36 | typedef struct _Public_MT_Data_Thread
37 | {
38 | 	ThreadPoolFunction pFunc;
39 | 	void *pClass;
40 | 	uint8_t f_process,thread_Id;
41 | 	void *pData;
42 | } Public_MT_Data_Thread;
43 | 
44 | 
45 | #endif // __ThreadPoolDef_H__
46 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/ThreadPoolInterface.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  ThreadpoolInterface
  3 |  *
  4 |  *  Allow to use the threadpool, kind of API.
  5 |  *  Copyright (C) 2017 JPSDR
  6 |  *	
  7 |  *  ThreadpoolInterface is free software; you can redistribute it and/or modify
  8 |  *  it under the terms of the GNU General Public License as published by
  9 |  *  the Free Software Foundation; either version 2, or (at your option)
 10 |  *  any later version.
 11 |  *   
 12 |  *  ThreadpoolInterface is distributed in the hope that it will be useful,
 13 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 |  *  GNU General Public License for more details.
 16 |  *   
 17 |  *  You should have received a copy of the GNU General Public License
 18 |  *  along with GNU Make; see the file COPYING.  If not, write to
 19 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 20 |  *
 21 |  */
 22 | 
 23 | #ifndef __ThreadPoolInterface_H__
 24 | #define __ThreadPoolInterface_H__
 25 | 
 26 | #include <windows.h>
 27 | #include <vector>
 28 | 
 29 | #include "./ThreadPoolDef.h"
 30 | 
 31 | #define THREADPOOLINTERFACE_VERSION "ThreadPoolInterface 1.12.0"
 32 | 
 33 | class ThreadPoolInterface;
 34 |  
 35 | class UserData
 36 | {
 37 | 	friend ThreadPoolInterface;
 38 | 		
 39 | 	public :
 40 | 
 41 | 	UserData(void);
 42 | 	virtual ~UserData(void);
 43 | 
 44 | 	protected :
 45 | 
 46 | 	uint32_t UserId;
 47 | 	bool AllowSeveral;
 48 | 	bool AllowWaiting;
 49 | 	bool AllowTimeOut;
 50 | 	bool AllowRetryMax;
 51 | 	DWORD TimeOut;
 52 | 	uint8_t RetryMax;
 53 | 	int8_t NbrePool;
 54 | 	int8_t UsedPool[MAX_THREAD_POOL];
 55 | };
 56 | 
 57 | 
 58 | class ThreadPoolInterface
 59 | {
 60 | 	public :
 61 | 
 62 | 	virtual ~ThreadPoolInterface(void);
 63 | 	static ThreadPoolInterface* Init(uint8_t num);
 64 | 
 65 | 	uint8_t GetThreadNumber(uint8_t thread_number,bool logical);
 66 | 	int16_t AddPool(uint8_t num);
 67 | 	bool CreatePool(uint8_t num);
 68 | 	bool DeletePool(uint8_t num);
 69 | 	bool RemovePool(uint8_t num);	
 70 | 	bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,
 71 | 		bool SetAffinity,bool sleep,ThreadLevelName priority,int8_t nPool);
 72 | 	bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,
 73 | 		bool SetAffinity,bool sleep,int8_t nPool)
 74 | 		{return(AllocateThreads(thread_number,offset_core,offset_ht,UseMaxPhysCore,SetAffinity,sleep,
 75 | 			NormalThreadLevel,nPool));}
 76 | 	bool GetUserId(uint32_t &UserId);
 77 | 	bool RemoveUserId(uint32_t UserId);
 78 | 	bool ChangeThreadsAffinity(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity,int8_t nPool);
 79 | 	bool ChangeThreadsLevel(ThreadLevelName priority,int8_t nPool);
 80 | 	bool DeAllocateUserThreads(uint32_t UserId,bool check);
 81 | 	bool DeAllocatePoolThreads(uint8_t nPool,bool check);
 82 | 	bool DeAllocateAllThreads(bool check);
 83 | 	bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data,
 84 | 		ThreadLevelName priority,int8_t nPool,bool Exclusive);
 85 | 	bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data,
 86 | 		ThreadLevelName priority,int8_t &nPool,bool Exclusive);
 87 | 	bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data);
 88 | 	bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data,
 89 | 		ThreadLevelName priority);
 90 | 	bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data,
 91 | 		int8_t nPool,bool Exclusive)
 92 | 		{return(RequestThreadPool(UserId,thread_number,Data,NoneThreadLevel,nPool,Exclusive));}
 93 | 	bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data)
 94 | 		{return(RequestThreadPool(UserId,thread_number,Data,NoneThreadLevel,-1,false));}
 95 | 	bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data,
 96 | 		ThreadLevelName priority)
 97 | 		{return(RequestThreadPool(UserId,thread_number,Data,priority,-1,false));}
 98 | 	bool ReleaseThreadPool(uint32_t UserId,bool sleep);
 99 | 	bool ReleaseThreadPool(uint32_t UserId,bool sleep,int8_t idxPool);
100 | 	bool StartThreads(uint32_t UserId);
101 | 	bool StartThreads(uint32_t UserId,int8_t idxPool);
102 | 	bool WaitThreadsEnd(uint32_t UserId);
103 | 	bool WaitThreadsEnd(uint32_t UserId,int8_t idxPool);
104 | 	bool GetThreadPoolStatus(uint32_t UserId,int8_t idxPool,int8_t nPool);
105 | 	uint8_t GetCurrentThreadAllocated(uint32_t UserId,int8_t idxPool,int8_t nPool);
106 | 	uint8_t GetCurrentThreadUsed(uint32_t UserId,int8_t idxPool,int8_t nPool);
107 | 	bool EnableAllowSeveral(uint32_t UserId);
108 | 	bool DisableAllowSeveral(uint32_t UserId);
109 | 	bool IsAllowedSeveral(uint32_t UserId);
110 | 	bool EnableWaitonRequest(uint32_t UserId);
111 | 	bool DisableWaitonRequest(uint32_t UserId);
112 | 	bool EnableTimeOutonRequest(uint32_t UserId);
113 | 	bool DisableTimeOutonRequest(uint32_t UserId);
114 | 	bool EnableRetryMaxonRequest(uint32_t UserId);
115 | 	bool DisableRetryMaxonRequest(uint32_t UserId);
116 | 	bool ConfigureTimeOutValue(uint32_t UserId, DWORD dwMilliseconds);
117 | 	bool ConfigureRetryMaxValue(uint32_t UserId, uint8_t NbreMax);
118 | 	int8_t GetPoolAllocated(uint32_t UserId);
119 | 	int8_t GetPoolNumber(uint32_t UserId,int8_t idxPool);
120 | 	int8_t GetPoolIndex(uint32_t UserId,int8_t nPool);
121 | 	uint8_t GetLogicalCPUNumber(void);
122 | 	uint8_t GetPhysicalCoreNumber(void);
123 | 	
124 | 	protected :
125 | 	
126 | 	bool Status_Ok;
127 | 	uint8_t NbrePool;
128 | 	
129 | 	public :
130 | 
131 | 	bool GetThreadPoolInterfaceStatus(void) {return(Status_Ok);}
132 | 	int8_t GetCurrentPoolCreated(void) {return((Status_Ok) ? NbrePool:-1);}
133 | 
134 | 	protected :
135 | 
136 | 	ThreadPoolInterface(void);
137 | 
138 | 	CRITICAL_SECTION CriticalSection;
139 | 	HANDLE ghMutexResources;
140 | 	BOOL CSectionOk;
141 | 	HANDLE JobsEnded[MAX_THREAD_POOL],ThreadPoolFree[MAX_THREAD_POOL];
142 | 	std::vector<UserData> TabId;
143 | 	HANDLE EndExclusive;
144 | 	bool Error_Occured;
145 | 
146 | 	bool ThreadPoolRequested[MAX_THREAD_POOL],JobsRunning[MAX_THREAD_POOL];
147 | 	bool ThreadPoolReleased[MAX_THREAD_POOL],ThreadWaitEnd[MAX_THREAD_POOL];
148 | 	bool ThreadPoolWaitFree[MAX_THREAD_POOL];
149 | 	uint32_t ThreadPoolUserId[MAX_THREAD_POOL];
150 | 	bool ExclusiveMode;
151 | 	uint8_t NbrePoolEvent;
152 | 
153 | 	bool CreatePoolEvent(uint8_t num);
154 | 	void FreeData(void);
155 | 	void FreePool(void);
156 | 	void FreePool(int8_t nPool);
157 | 	bool EnterCS(void);
158 | 	void LeaveCS(void);
159 | 	bool GetMutex(void);
160 | 	void FreeMutex(void);
161 | 	int32_t GetUserIdIndex(uint32_t UserId);
162 | 	bool ReleaseThreadPoolCore(uint32_t UserId,int32_t index,bool sleep,int8_t nPool,int8_t idxPool);
163 | 	bool StartThreadsCore(int8_t nPool);
164 | 	bool WaitThreadsEndCore(uint32_t UserId,int8_t nPool,int8_t idxPool);
165 | 	
166 | 	private :
167 | 
168 | 	ThreadPoolInterface (const ThreadPoolInterface &other);
169 | 	ThreadPoolInterface& operator = (const ThreadPoolInterface &other);
170 | 	bool operator == (const ThreadPoolInterface &other) const;
171 | 	bool operator != (const ThreadPoolInterface &other) const;
172 | };
173 | 
174 | #endif // __ThreadPoolInterface_H__
175 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/TransferFunctions.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  TransferFunctions
  3 |  *
  4 |  *  OOTF,EOTF,OETF, etc... HDR and SDR core functions.
  5 |  *  Copyright (C) 2019 JPSDR
  6 |  *	
  7 |  *  HDRTools is free software; you can redistribute it and/or modify
  8 |  *  it under the terms of the GNU General Public License as published by
  9 |  *  the Free Software Foundation; either version 2, or (at your option)
 10 |  *  any later version.
 11 |  *   
 12 |  *  HDRTools is distributed in the hope that it will be useful,
 13 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 |  *  GNU General Public License for more details.
 16 |  *   
 17 |  *  You should have received a copy of the GNU General Public License
 18 |  *  along with GNU Make; see the file COPYING.  If not, write to
 19 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 20 |  *
 21 |  */
 22 | 
 23 | #include <math.h>
 24 | 
 25 | static const double m1=0.1593017578125,im1=1.0/m1;
 26 | static const double m2=78.84375,im2=1.0/m2;
 27 | static const double c1=0.8359375;
 28 | static const double c2=18.8515625;
 29 | static const double c3=18.6875;
 30 | 
 31 | static const double alpha=1.09929682680944,alpham1=alpha-1.0,ialpha=1.0/alpha;
 32 | static const double beta=0.018053968510807;
 33 | static const double alpha2=267.84,beta2=0.0003024,ialpha2=1.0/alpha2;
 34 | static const double coeff_i12=1.0/12.0,coeff_i3=1.0/3.0,coeff_i45=1.0/0.45;
 35 | static const double coeff_i24=1.0/2.404,coeff_i59=1.0/59.5208;
 36 | static const double a=0.17883277;
 37 | static const double b=1.0-4.0*a,c=0.5-a*log(4.0*a),ia=1.0/a;
 38 | static double lm1=1.2-1.0,ilm1=(1.0/1.2)-1.0;
 39 | 
 40 | void Set_l_HLG(double Lw)
 41 | {
 42 | 	lm1=(1.2+0.42*log10(Lw*0.001))-1.0;
 43 | 	ilm1=(1.0/(1.2+0.42*log10(Lw*0.001)))-1.0;
 44 | }
 45 | 
 46 | double HLG_OETF(double x)
 47 | {
 48 | 	if (x<=coeff_i12) return(sqrt(3.0*x));
 49 | 	else return(a*log(12.0*x-b)+c);
 50 | }
 51 | 
 52 | double HLG_inv_OETF(double x)
 53 | {
 54 | 	if (x<=0.5) return(x*x*coeff_i3);
 55 | 	else return((exp((x-c)*ia)+b)*coeff_i12);
 56 | }
 57 | 
 58 | double HLG_OOTF(double x)
 59 | {
 60 | 	return(x*pow(x,lm1));
 61 | }
 62 | 
 63 | double HLG_inv_OOTF(double x)
 64 | {
 65 | 	return(x*pow(x,ilm1));
 66 | }
 67 | 
 68 | double inv_OETF(double x)
 69 | {
 70 | 	if (x<(beta*4.5)) return(x*coeff_i45);
 71 | 	else return(pow(((x+alpham1))*ialpha,coeff_i45));
 72 | }
 73 | 
 74 | double OETF(double x)
 75 | {
 76 | 	if (x<beta) return(x*4.5);
 77 | 	else return(alpha*pow(x,0.45)-alpham1);
 78 | }
 79 | 
 80 | double EOTF(double x)
 81 | {
 82 | 	return(pow(x,2.404));
 83 | }
 84 | 
 85 | double PQ_OOTF(double x)
 86 | {
 87 | 	if (x<=beta2) x*=alpha2;
 88 | 	else x=pow(59.5208*x,0.45)*alpha-alpham1;
 89 | 	return(pow(x,2.404)*0.01);
 90 | }
 91 | 
 92 | double PQ_OOTF_Inv(double x)
 93 | {
 94 | 	x=pow(100.0*x,coeff_i24);
 95 | 	if (x<=alpha2*beta2) return(x*ialpha2);
 96 | 	else return(pow(((x+alpham1))*ialpha,coeff_i45)*coeff_i59);
 97 | }
 98 | 
 99 | double PQ_EOTF(double x)
100 | {
101 | 	double x0;
102 | 
103 | 	x0=pow(x,im2);
104 | 	if (x0<=c1) return(0.0);
105 | 	else return(pow((x0-c1)/(c2-c3*x0),im1));
106 | }
107 | 
108 | double PQ_inv_EOTF(double x)
109 | {
110 | 	return(pow((c1+c2*pow(x,m1))/(1+c3*pow(x,m1)),m2));
111 | }
112 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/TransferFunctions.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  TransferFunctions
 3 |  *
 4 |  *  OOTF,EOTF,OETF, etc... HDR and SDR core functions.
 5 |  *  Copyright (C) 2019 JPSDR
 6 |  *	
 7 |  *  HDRTools is free software; you can redistribute it and/or modify
 8 |  *  it under the terms of the GNU General Public License as published by
 9 |  *  the Free Software Foundation; either version 2, or (at your option)
10 |  *  any later version.
11 |  *   
12 |  *  HDRTools is distributed in the hope that it will be useful,
13 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 |  *  GNU General Public License for more details.
16 |  *   
17 |  *  You should have received a copy of the GNU General Public License
18 |  *  along with GNU Make; see the file COPYING.  If not, write to
19 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
20 |  *
21 |  */
22 | 
23 | void Set_l_HLG(double lw);
24 | double HLG_OETF(double x);
25 | double HLG_inv_OETF(double x);
26 | double HLG_OOTF(double x);
27 | double HLG_inv_OOTF(double x);
28 | double inv_OETF(double x);
29 | double OETF(double x);
30 | double EOTF(double x);
31 | double PQ_OOTF(double x);
32 | double PQ_OOTF_Inv(double x);
33 | double PQ_EOTF(double x);
34 | double PQ_inv_EOTF(double x);
35 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/aWarpSharp.h:
--------------------------------------------------------------------------------
  1 | // aWarpSharp package 2016.06.23 for Avisynth+ and Avisynth 2.6
  2 | // based on Firesledge's 2015.12.30 for Avisynth 2.5
  3 | // aWarpSharp package 2012.03.28 for Avisynth 2.5
  4 | // Copyright (C) 2003 MarcFD, 2012 Skakov Pavel
  5 | // 2015 Firesledge
  6 | // 2016 pinterf
  7 | // This program is free software; you can redistribute it and/or modify
  8 | // it under the terms of the GNU General Public License as published by
  9 | // the Free Software Foundation; either version 2 of the License, or
 10 | // (at your option) any later version.
 11 | //
 12 | // This program is distributed in the hope that it will be useful,
 13 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 | // GNU General Public License for more details.
 16 | 
 17 | #include "./avisynth.h"
 18 | #include "./ThreadPoolInterface.h"
 19 | 
 20 | #define AWARPSHARP_VERSION "aWarpSharpMT 2.1.10 JPSDR"
 21 | 
 22 | #define myfree(ptr) if (ptr!=NULL) { free(ptr); ptr=NULL;}
 23 | #define myAlignedFree(ptr) if (ptr!=NULL) { _aligned_free(ptr); ptr=NULL;}
 24 | 
 25 | typedef struct _MT_Data_Info_WarpSharp
 26 | {
 27 | 	void *src_Y1,*src_Y2,*src_Y3;
 28 | 	void *dst_Y1,*dst_Y2,*dst_Y3;
 29 | 	int32_t src_pitch_Y1,src_pitch_Y2,src_pitch_Y3;
 30 | 	int32_t dst_pitch_Y1,dst_pitch_Y2,dst_pitch_Y3;
 31 | 	void *src_U1,*src_U2,*src_U3;
 32 | 	void *dst_U1,*dst_U2,*dst_U3;
 33 | 	int32_t src_pitch_U1,src_pitch_U2,src_pitch_u3;
 34 | 	int32_t dst_pitch_U1,dst_pitch_U2,dst_pitch_U3;
 35 | 	void *src_V1,*src_V2,*src_V3;
 36 | 	void *dst_V1,*dst_V2,*dst_V3;
 37 | 	int32_t src_pitch_V1,src_pitch_V2,src_pitch_V3;
 38 | 	int32_t dst_pitch_V1,dst_pitch_V2,dst_pitch_V3;
 39 | 
 40 | 	int32_t row_size_Y1,row_size_Y2,row_size_Y3;
 41 | 	int32_t row_size_U1,row_size_U2,row_size_U3;
 42 | 	int32_t row_size_V1,row_size_V2,row_size_V3;
 43 | 
 44 | 	int32_t src_Y_h,src_U_h,src_V_h,dst_Y_h,dst_U_h,dst_V_h;
 45 | 
 46 | 	int32_t src_Y_h_min,src_Y_h_max,src_Y_w;
 47 | 	int32_t src_UV_h_min,src_UV_h_max,src_UV_w;
 48 | 	int32_t dst_Y_h_min,dst_Y_h_max,dst_Y_w;
 49 | 	int32_t dst_UV_h_min,dst_UV_h_max,dst_UV_w;
 50 | 	bool top,bottom;
 51 | 
 52 | 	bool processH,processV,cprocessH,cprocessV;
 53 | 	int SubW_U,SubH_U;
 54 | 
 55 | } MT_Data_Info_WarpSharp;
 56 | 
 57 | 
 58 | class aWarpSharp : public GenericVideoFilter
 59 | {
 60 | public:
 61 |   aWarpSharp(PClip _child, int _thresh, int _blur_level, int _blur_type, int _depth, int _chroma, int _depthC, 
 62 | 	  bool _cplace_mpeg2_flag, int _blur_levelV, int _depthV, int _depthVC, int _blur_levelC, int _blur_levelVC,
 63 | 	  int _threshC,uint8_t _threads,bool _sleep, bool negativePrefetch, bool _avsp, IScriptEnvironment *env);
 64 |   virtual ~aWarpSharp();
 65 | 
 66 |   PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment *env);
 67 |   int __stdcall SetCacheHints(int cachehints, int frame_range);
 68 | 
 69 | private:
 70 |   int thresh,threshC;
 71 |   int blur_level,blur_levelV;
 72 |   int blur_levelC,blur_levelVC;
 73 |   int depth,depthV;
 74 |   int depthC,depthVC;
 75 |   int chroma;
 76 |   int blur_type;
 77 |   bool cplace_mpeg2_flag;
 78 | 
 79 |   bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
 80 |   uint8_t pixelsize; // AVS16
 81 |   uint8_t bits_per_pixel;
 82 | 
 83 | 	Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
 84 | 	MT_Data_Info_WarpSharp MT_Data[MAX_MT_THREADS];
 85 | 	uint8_t threads,threads_number;
 86 | 	bool sleep;
 87 | 	uint32_t UserId;
 88 | 	
 89 | 	ThreadPoolFunction StaticThreadpoolF;
 90 | 
 91 | 	static void StaticThreadpool(void *ptr);
 92 | };
 93 | 
 94 | 
 95 | class aSobel : public GenericVideoFilter
 96 | {
 97 | public:
 98 |   aSobel(PClip _child, int _thresh, int _chroma, int _threshC,uint8_t _threads,bool _sleep, bool negativePrefetch,
 99 | 	  bool _avsp, IScriptEnvironment *env);
100 |   virtual ~aSobel();
101 | 
102 |   PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment *env);
103 |   int __stdcall SetCacheHints(int cachehints, int frame_range);
104 | 
105 | private:
106 |   int thresh,threshC;
107 |   int chroma;
108 | 
109 |   bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
110 |   uint8_t pixelsize; // AVS16
111 |   uint8_t bits_per_pixel;
112 | 
113 | 	Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
114 | 	MT_Data_Info_WarpSharp MT_Data[MAX_MT_THREADS];
115 | 	uint8_t threads,threads_number;
116 | 	bool sleep;
117 | 	uint32_t UserId;
118 | 	
119 | 	ThreadPoolFunction StaticThreadpoolF;
120 | 
121 | 	static void StaticThreadpool(void *ptr);
122 | };
123 | 
124 | 
125 | class aBlur : public GenericVideoFilter
126 | {
127 | public:
128 |   aBlur(PClip _child, int _blur_level, int _blur_type, int _chroma, int _blur_levelV,
129 | 	   int _blur_levelC, int _blur_levelVC,uint8_t _threads,bool _sleep, bool negativePrefetch,
130 | 	   bool _avsp,IScriptEnvironment *env);
131 |   virtual ~aBlur();
132 | 
133 |   PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment *env);
134 |   int __stdcall SetCacheHints(int cachehints, int frame_range);
135 | 
136 | private:
137 |   int blur_level,blur_levelV;
138 |    int blur_levelC,blur_levelVC;
139 |   int blur_type;
140 |   int chroma;
141 | 
142 |   bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
143 |   uint8_t pixelsize; // AVS16
144 |   uint8_t bits_per_pixel;
145 | 
146 | 	Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
147 | 	MT_Data_Info_WarpSharp MT_Data[MAX_MT_THREADS];
148 | 	uint8_t threads,threads_number;
149 | 	bool sleep;
150 | 	uint32_t UserId;
151 | 	
152 | 	ThreadPoolFunction StaticThreadpoolF;
153 | 
154 | 	static void StaticThreadpool(void *ptr);
155 | };
156 | 
157 | 
158 | class aWarp : public GenericVideoFilter
159 | {
160 | public:
161 |   aWarp(PClip _child, PClip _edges, int _depth, int _chroma, int _depthC, bool _cplace_mpeg2_flag,
162 | 	  int _depthV, int _depthVC,uint8_t _threads,bool _sleep, bool negativePrefetch,
163 | 	  bool _avsp, IScriptEnvironment *env);
164 |   virtual ~aWarp();
165 | 
166 |   PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment *env);
167 |   int __stdcall SetCacheHints(int cachehints, int frame_range);
168 | 
169 | private:
170 |   PClip edges;
171 |   int depth,depthV;
172 |   int depthC,depthVC;
173 |   int chroma;
174 |   bool cplace_mpeg2_flag;
175 | 
176 |   bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
177 |   uint8_t pixelsize; // AVS16
178 |   uint8_t bits_per_pixel;
179 | 
180 | 	Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
181 | 	MT_Data_Info_WarpSharp MT_Data[MAX_MT_THREADS];
182 | 	uint8_t threads,threads_number;
183 | 	bool sleep;
184 | 	uint32_t UserId;
185 | 	
186 | 	ThreadPoolFunction StaticThreadpoolF;
187 | 
188 | 	static void StaticThreadpool(void *ptr);
189 | };
190 | 
191 | 
192 | class aWarp4 : public GenericVideoFilter
193 | {
194 | public:
195 |   aWarp4(PClip _child, PClip _edges, int _depth, int _chroma, int _depthC, bool _cplace_mpeg2_flag,
196 | 	  int _depthV, int _depthVC,uint8_t _threads,bool _sleep, bool negativePrefetch,
197 | 	  bool _avsp, IScriptEnvironment *env);
198 |   virtual ~aWarp4();
199 | 
200 |   PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment *env);
201 |   int __stdcall SetCacheHints(int cachehints, int frame_range);
202 | 
203 | private:
204 |   PClip edges;
205 |   int depth,depthV;
206 |   int depthC,depthVC;
207 |   int chroma;
208 |   bool cplace_mpeg2_flag;
209 | 
210 |   bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
211 |   uint8_t pixelsize; // AVS16
212 |   uint8_t bits_per_pixel;
213 | 
214 | 	Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
215 | 	MT_Data_Info_WarpSharp MT_Data[MAX_MT_THREADS];
216 | 	uint8_t threads,threads_number;
217 | 	bool sleep;
218 | 	uint32_t UserId;
219 | 	
220 | 	ThreadPoolFunction StaticThreadpoolF;
221 | 
222 | 	static void StaticThreadpool(void *ptr);
223 | };


--------------------------------------------------------------------------------
/Plugins_JPSDR/avs/alignment.h:
--------------------------------------------------------------------------------
  1 | // Avisynth C Interface Version 0.20
  2 | // Copyright 2003 Kevin Atkinson
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // As a special exception, I give you permission to link to the
 20 | // Avisynth C interface with independent modules that communicate with
 21 | // the Avisynth C interface solely through the interfaces defined in
 22 | // avisynth_c.h, regardless of the license terms of these independent
 23 | // modules, and to copy and distribute the resulting combined work
 24 | // under terms of your choice, provided that every copy of the
 25 | // combined work is accompanied by a complete copy of the source code
 26 | // of the Avisynth C interface and Avisynth itself (with the version
 27 | // used to produce the combined work), being distributed under the
 28 | // terms of the GNU General Public License plus this exception.  An
 29 | // independent module is a module which is not derived from or based
 30 | // on Avisynth C Interface, such as 3rd-party filters, import and
 31 | // export plugins, or graphical user interfaces.
 32 | 
 33 | #ifndef AVS_ALIGNMENT_H
 34 | #define AVS_ALIGNMENT_H
 35 | 
 36 | // Functions and macros to help work with alignment requirements.
 37 | 
 38 | // Tells if a number is a power of two.
 39 | #define IS_POWER2(n) ((n) && !((n) & ((n) - 1)))
 40 | 
 41 | // Tells if the pointer "ptr" is aligned to "align" bytes.
 42 | #define IS_PTR_ALIGNED(ptr, align) (((uintptr_t)ptr & ((uintptr_t)(align-1))) == 0)
 43 | 
 44 | // Rounds up the number "n" to the next greater multiple of "align"
 45 | #define ALIGN_NUMBER(n, align) (((n) + (align)-1) & (~((align)-1)))
 46 | 
 47 | // Rounds up the pointer address "ptr" to the next greater multiple of "align"
 48 | #define ALIGN_POINTER(ptr, align) (((uintptr_t)(ptr) + (align)-1) & (~(uintptr_t)((align)-1)))
 49 | 
 50 | #ifdef __cplusplus
 51 | 
 52 | #include <cassert>
 53 | #include <cstdlib>
 54 | #include <cstdint>
 55 | #include "config.h"
 56 | 
 57 | #if defined(MSVC) && _MSC_VER<1400
 58 |     // needed for VS2013, otherwise C++11 'alignas' works
 59 |     #define avs_alignas(x) __declspec(align(x))
 60 | #else
 61 |     // assumes C++11 support
 62 |     #define avs_alignas(x) alignas(x)
 63 | #endif
 64 | 
 65 | template<typename T>
 66 | static bool IsPtrAligned(T* ptr, size_t align)
 67 | {
 68 |   assert(IS_POWER2(align));
 69 |   return (bool)IS_PTR_ALIGNED(ptr, align);
 70 | }
 71 | 
 72 | template<typename T>
 73 | static T AlignNumber(T n, T align)
 74 | {
 75 |   assert(IS_POWER2(align));
 76 |   return ALIGN_NUMBER(n, align);
 77 | }
 78 | 
 79 | template<typename T>
 80 | static T* AlignPointer(T* ptr, size_t align)
 81 | {
 82 |   assert(IS_POWER2(align));
 83 |   return (T*)ALIGN_POINTER(ptr, align);
 84 | }
 85 | 
 86 | extern "C"
 87 | {
 88 | #else
 89 | #include <stdlib.h>
 90 | #endif  // __cplusplus
 91 | 
 92 | // Returns a new buffer that is at least the size "nbytes".
 93 | // The buffer will be aligned to "align" bytes.
 94 | // Returns NULL on error. On successful allocation,
 95 | // the returned buffer must be freed using "avs_free".
 96 | inline void* avs_malloc(size_t nbytes, size_t align)
 97 | {
 98 |   if (!IS_POWER2(align))
 99 |     return NULL;
100 | 
101 |   size_t offset = sizeof(void*) + align - 1;
102 | 
103 |   void *orig = malloc(nbytes + offset);
104 |   if (orig == NULL)
105 |    return NULL;
106 | 
107 |   void **aligned = (void**)(((uintptr_t)orig + (uintptr_t)offset) & (~(uintptr_t)(align-1)));
108 |   aligned[-1] = orig;
109 |   return aligned;
110 | }
111 | 
112 | // Buffers allocated using "avs_malloc" must be freed
113 | // using "avs_free" instead of "free".
114 | inline void avs_free(void *ptr)
115 | {
116 |   // Mirroring free()'s semantic requires us to accept NULLs
117 |   if (ptr == NULL)
118 |     return;
119 | 
120 |   free(((void**)ptr)[-1]);
121 | }
122 | 
123 | #ifdef __cplusplus
124 | } // extern "C"
125 | 
126 | // The point of these undef's is to force using the template functions
127 | // if we are in C++ mode. For C, the user can rely only on the macros.
128 | #undef IS_PTR_ALIGNED
129 | #undef ALIGN_NUMBER
130 | #undef ALIGN_POINTER
131 | 
132 | #endif  // __cplusplus
133 | 
134 | #endif  //AVS_ALIGNMENT_H
135 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/avs/capi.h:
--------------------------------------------------------------------------------
  1 | // Avisynth C Interface Version 0.20
  2 | // Copyright 2003 Kevin Atkinson
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // As a special exception, I give you permission to link to the
 20 | // Avisynth C interface with independent modules that communicate with
 21 | // the Avisynth C interface solely through the interfaces defined in
 22 | // avisynth_c.h, regardless of the license terms of these independent
 23 | // modules, and to copy and distribute the resulting combined work
 24 | // under terms of your choice, provided that every copy of the
 25 | // combined work is accompanied by a complete copy of the source code
 26 | // of the Avisynth C interface and Avisynth itself (with the version
 27 | // used to produce the combined work), being distributed under the
 28 | // terms of the GNU General Public License plus this exception.  An
 29 | // independent module is a module which is not derived from or based
 30 | // on Avisynth C Interface, such as 3rd-party filters, import and
 31 | // export plugins, or graphical user interfaces.
 32 | 
 33 | #ifndef AVS_CAPI_H
 34 | #define AVS_CAPI_H
 35 | 
 36 | #include "config.h"
 37 | 
 38 | #ifdef AVS_POSIX
 39 | // this is also defined in avs/posix.h
 40 | #ifndef AVS_HAIKU
 41 | #define __declspec(x)
 42 | #endif
 43 | #endif
 44 | 
 45 | #ifdef __cplusplus
 46 | #  define EXTERN_C extern "C"
 47 | #else
 48 | #  define EXTERN_C
 49 | #endif
 50 | 
 51 | #ifdef AVS_WINDOWS
 52 | #ifdef BUILDING_AVSCORE
 53 | #  if defined(GCC) && defined(X86_32)
 54 | #    define AVSC_CC
 55 | #  else // MSVC builds and 64-bit GCC
 56 | #    ifndef AVSC_USE_STDCALL
 57 | #      define AVSC_CC __cdecl
 58 | #    else
 59 | #      define AVSC_CC __stdcall
 60 | #    endif
 61 | #  endif
 62 | #else // needed for programs that talk to AviSynth+
 63 | #  ifndef AVSC_WIN32_GCC32 // see comment below
 64 | #    ifndef AVSC_USE_STDCALL
 65 | #      define AVSC_CC __cdecl
 66 | #    else
 67 | #      define AVSC_CC __stdcall
 68 | #    endif
 69 | #  else
 70 | #    define AVSC_CC
 71 | #  endif
 72 | #endif
 73 | #  else
 74 | #    define AVSC_CC
 75 | #endif
 76 | 
 77 | // On 64-bit Windows, there's only one calling convention,
 78 | // so there is no difference between MSVC and GCC. On 32-bit,
 79 | // this isn't true. The convention that GCC needs to use to
 80 | // even build AviSynth+ as 32-bit makes anything that uses
 81 | // it incompatible with 32-bit MSVC builds of AviSynth+.
 82 | // The AVSC_WIN32_GCC32 define is meant to provide a user
 83 | // switchable way to make builds of FFmpeg to test 32-bit
 84 | // GCC builds of AviSynth+ without having to screw around
 85 | // with alternate headers, while still default to the usual
 86 | // situation of using 32-bit MSVC builds of AviSynth+.
 87 | 
 88 | // Hopefully, this situation will eventually be resolved
 89 | // and a broadly compatible solution will arise so the
 90 | // same 32-bit FFmpeg build can handle either MSVC or GCC
 91 | // builds of AviSynth+.
 92 | 
 93 | #define AVSC_INLINE static __inline
 94 | 
 95 | #ifdef BUILDING_AVSCORE
 96 | #ifdef AVS_WINDOWS
 97 | #  ifndef AVS_STATIC_LIB
 98 | #    define AVSC_EXPORT __declspec(dllexport)
 99 | #  else
100 | #    define AVSC_EXPORT
101 | #  endif
102 | #  define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name
103 | #else
104 | #  define AVSC_EXPORT EXTERN_C
105 | #  define AVSC_API(ret, name) EXTERN_C ret AVSC_CC name
106 | #endif
107 | #else
108 | #  define AVSC_EXPORT EXTERN_C __declspec(dllexport)
109 | #  ifndef AVS_STATIC_LIB
110 | #    define AVSC_IMPORT __declspec(dllimport)
111 | #  else
112 | #    define AVSC_IMPORT
113 | #  endif
114 | #  ifndef AVSC_NO_DECLSPEC
115 | #    define AVSC_API(ret, name) EXTERN_C AVSC_IMPORT ret AVSC_CC name
116 | #  else
117 | #    define AVSC_API(ret, name) typedef ret (AVSC_CC *name##_func)
118 | #  endif
119 | #endif
120 | 
121 | #endif //AVS_CAPI_H
122 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/avs/config.h:
--------------------------------------------------------------------------------
  1 | // Avisynth C Interface Version 0.20
  2 | // Copyright 2003 Kevin Atkinson
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // As a special exception, I give you permission to link to the
 20 | // Avisynth C interface with independent modules that communicate with
 21 | // the Avisynth C interface solely through the interfaces defined in
 22 | // avisynth_c.h, regardless of the license terms of these independent
 23 | // modules, and to copy and distribute the resulting combined work
 24 | // under terms of your choice, provided that every copy of the
 25 | // combined work is accompanied by a complete copy of the source code
 26 | // of the Avisynth C interface and Avisynth itself (with the version
 27 | // used to produce the combined work), being distributed under the
 28 | // terms of the GNU General Public License plus this exception.  An
 29 | // independent module is a module which is not derived from or based
 30 | // on Avisynth C Interface, such as 3rd-party filters, import and
 31 | // export plugins, or graphical user interfaces.
 32 | 
 33 | #ifndef AVS_CONFIG_H
 34 | #define AVS_CONFIG_H
 35 | 
 36 | // Undefine this to get cdecl calling convention
 37 | #define AVSC_USE_STDCALL 1
 38 | 
 39 | // NOTE TO PLUGIN AUTHORS:
 40 | // Because FRAME_ALIGN can be substantially higher than the alignment
 41 | // a plugin actually needs, plugins should not use FRAME_ALIGN to check for
 42 | // alignment. They should always request the exact alignment value they need.
 43 | // This is to make sure that plugins work over the widest range of AviSynth
 44 | // builds possible.
 45 | #define FRAME_ALIGN 64
 46 | 
 47 | #if   defined(_M_AMD64) || defined(__x86_64)
 48 | #   define X86_64
 49 | #elif defined(_M_IX86) || defined(__i386__)
 50 | #   define X86_32
 51 | // VS2017 introduced _M_ARM64
 52 | #elif defined(_M_ARM64) || defined(__aarch64__)
 53 | #   define ARM64
 54 | #elif defined(_M_ARM) || defined(__arm__)
 55 | #   define ARM32
 56 | #elif defined(__PPC64__)
 57 | #   define PPC64
 58 | #elif defined(_M_PPC) || defined(__PPC__) || defined(__POWERPC__)
 59 | #   define PPC32
 60 | #elif defined(__riscv)
 61 | #   define RISCV
 62 | #elif defined(__loongarch__)
 63 | #   define LOONGARCH
 64 | #elif defined(__sparc_v9__)
 65 | #   define SPARC
 66 | #elif defined(__mips__)
 67 | #   define MIPS
 68 | #else
 69 | #   error Unsupported CPU architecture.
 70 | #endif
 71 | 
 72 | //            VC++  LLVM-Clang-cl   MinGW-Gnu
 73 | // MSVC        x          x
 74 | // MSVC_PURE   x
 75 | // CLANG                  x
 76 | // GCC                                  x
 77 | 
 78 | #if defined(__clang__)
 79 | // Check clang first. clang-cl also defines __MSC_VER
 80 | // We set MSVC because they are mostly compatible
 81 | #   define CLANG
 82 | #if defined(_MSC_VER)
 83 | #   define MSVC
 84 | #   define AVS_FORCEINLINE __attribute__((always_inline))
 85 | #else
 86 | #   define AVS_FORCEINLINE __attribute__((always_inline)) inline
 87 | #endif
 88 | #elif   defined(_MSC_VER)
 89 | #   define MSVC
 90 | #   define MSVC_PURE
 91 | #   define AVS_FORCEINLINE __forceinline
 92 | #elif defined(__GNUC__)
 93 | #   define GCC
 94 | #   define AVS_FORCEINLINE __attribute__((always_inline)) inline
 95 | #elif defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
 96 | // Intel C++ Compilers with MSVC command line interface will not appear here rather at _MSC_VER
 97 | #   define AVS_FORCEINLINE inline
 98 | #   undef __forceinline
 99 | #   define __forceinline inline
100 | #else
101 | #   error Unsupported compiler.
102 | #   define AVS_FORCEINLINE inline
103 | #   undef __forceinline
104 | #   define __forceinline inline
105 | #endif
106 | 
107 | #if defined(_WIN32)
108 | #   define AVS_WINDOWS
109 | #elif defined(__linux__)
110 | #   define AVS_LINUX
111 | #   define AVS_POSIX
112 | #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
113 | #   define AVS_BSD
114 | #   define AVS_POSIX
115 | #elif defined(__APPLE__)
116 | #   define AVS_MACOS
117 | #   define AVS_POSIX
118 | #elif defined(__HAIKU__)
119 | #   define AVS_HAIKU
120 | #   define AVS_POSIX
121 | #else
122 | #   error Operating system unsupported.
123 | #endif
124 | 
125 | #if defined(AVS_WINDOWS)
126 | #  if defined(X86_32) || defined(X86_64)
127 | #    define AVS_WINDOWS_X86
128 | #  elif defined(ARM64) || defined(ARM32)
129 | #    define AVS_WINDOWS_ARM
130 | #  endif
131 | #endif
132 | 
133 | #if defined(MSVC) && !defined(AVS_WINDOWS_X86)
134 | #    error Unsupported combination of compiler, operating system, and machine architecture.
135 | #endif
136 | 
137 | // useful warnings disabler macros for supported compilers
138 | 
139 | #if defined(_MSC_VER)
140 | #define DISABLE_WARNING_PUSH           __pragma(warning( push ))
141 | #define DISABLE_WARNING_POP            __pragma(warning( pop ))
142 | #define DISABLE_WARNING(warningNumber) __pragma(warning( disable : warningNumber ))
143 | 
144 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE      DISABLE_WARNING(4101)
145 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION            DISABLE_WARNING(4505)
146 | // other warnings you want to deactivate...
147 | 
148 | #elif defined(__GNUC__) || defined(__clang__)
149 | #define DO_PRAGMA(X) _Pragma(#X)
150 | #define DISABLE_WARNING_PUSH           DO_PRAGMA(GCC diagnostic push)
151 | #define DISABLE_WARNING_POP            DO_PRAGMA(GCC diagnostic pop)
152 | #define DISABLE_WARNING(warningName)   DO_PRAGMA(GCC diagnostic ignored #warningName)
153 | 
154 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE      DISABLE_WARNING(-Wunused-variable)
155 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION            DISABLE_WARNING(-Wunused-function)
156 | // other warnings you want to deactivate...
157 | 
158 | #else
159 | #define DISABLE_WARNING_PUSH
160 | #define DISABLE_WARNING_POP
161 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE
162 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION
163 | // other warnings you want to deactivate...
164 | 
165 | #endif
166 | 
167 | #if defined(AVS_WINDOWS) && defined(_USING_V110_SDK71_)
168 | // Windows XP does not have proper initialization for
169 | // thread local variables.
170 | // Use workaround instead __declspec(thread)
171 | #define XP_TLS
172 | #endif
173 | 
174 | #ifndef MSVC
175 | // GCC and Clang can be used on big endian systems, MSVC can't.
176 | #  if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
177 | #    define AVS_ENDIANNESS "little"
178 | #  elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
179 | #    define AVS_ENDIANNESS "big"
180 | #  else
181 | #    define AVS_ENDIANNESS "middle"
182 | #  endif
183 | #else
184 | #define AVS_ENDIANNESS "little"
185 | #endif
186 | 
187 | #endif //AVS_CONFIG_H
188 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/avs/cpuid.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_CPUID_H
33 | #define AVSCORE_CPUID_H
34 | 
35 | // For GetCPUFlags.  These are backwards-compatible with those in VirtualDub.
36 | // ending with SSE4_2
37 | // For emulation see https://software.intel.com/en-us/articles/intel-software-development-emulator
38 | enum {
39 |                     /* oldest CPU to support extension */
40 |   CPUF_FORCE        =  0x01,   //  N/A
41 |   CPUF_FPU          =  0x02,   //  386/486DX
42 |   CPUF_MMX          =  0x04,   //  P55C, K6, PII
43 |   CPUF_INTEGER_SSE  =  0x08,   //  PIII, Athlon
44 |   CPUF_SSE          =  0x10,   //  PIII, Athlon XP/MP
45 |   CPUF_SSE2         =  0x20,   //  PIV, K8
46 |   CPUF_3DNOW        =  0x40,   //  K6-2
47 |   CPUF_3DNOW_EXT    =  0x80,   //  Athlon
48 |   CPUF_X86_64       =  0xA0,   //  Hammer (note: equiv. to 3DNow + SSE2, which
49 |                                //          only Hammer will have anyway)
50 |   CPUF_SSE3         = 0x100,   //  PIV+, K8 Venice
51 |   CPUF_SSSE3        = 0x200,   //  Core 2
52 |   CPUF_SSE4         = 0x400,
53 |   CPUF_SSE4_1       = 0x400,   //  Penryn, Wolfdale, Yorkfield
54 |   CPUF_AVX          = 0x800,   //  Sandy Bridge, Bulldozer
55 |   CPUF_SSE4_2       = 0x1000,  //  Nehalem
56 |   // AVS+
57 |   CPUF_AVX2         = 0x2000,   //  Haswell
58 |   CPUF_FMA3         = 0x4000,
59 |   CPUF_F16C         = 0x8000,
60 |   CPUF_MOVBE        = 0x10000,  // Big Endian move
61 |   CPUF_POPCNT       = 0x20000,
62 |   CPUF_AES          = 0x40000,
63 |   CPUF_FMA4         = 0x80000,
64 | 
65 |   CPUF_AVX512F      = 0x100000,  // AVX-512 Foundation.
66 |   CPUF_AVX512DQ     = 0x200000,  // AVX-512 DQ (Double/Quad granular) Instructions
67 |   CPUF_AVX512PF     = 0x400000,  // AVX-512 Prefetch
68 |   CPUF_AVX512ER     = 0x800000,  // AVX-512 Exponential and Reciprocal
69 |   CPUF_AVX512CD     = 0x1000000, // AVX-512 Conflict Detection
70 |   CPUF_AVX512BW     = 0x2000000, // AVX-512 BW (Byte/Word granular) Instructions
71 |   CPUF_AVX512VL     = 0x4000000, // AVX-512 VL (128/256 Vector Length) Extensions
72 |   CPUF_AVX512IFMA   = 0x8000000, // AVX-512 IFMA integer 52 bit
73 |   CPUF_AVX512VBMI   = 0x10000000,// AVX-512 VBMI
74 | };
75 | 
76 | #ifdef BUILDING_AVSCORE
77 | int GetCPUFlags();
78 | void SetMaxCPU(int new_flags);
79 | #endif
80 | 
81 | #endif // AVSCORE_CPUID_H
82 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/avs/filesystem.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Snippet copied from filesystem/README.md
 4 | 
 5 | #if defined(__cplusplus) && __cplusplus >= 201703L && defined(__has_include)
 6 | #if __has_include(<filesystem>)
 7 | #define GHC_USE_STD_FS
 8 | #include <filesystem>
 9 | namespace fs = std::filesystem;
10 | #endif
11 | #endif
12 | #ifndef GHC_USE_STD_FS
13 | #include <ghc/filesystem.hpp>
14 | namespace fs = ghc::filesystem;
15 | #endif
16 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/avs/minmax.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_MINMAX_H
33 | #define AVSCORE_MINMAX_H
34 | 
35 | template<typename T>
36 | T min(T v1, T v2)
37 | {
38 |   return v1 < v2 ? v1 : v2;
39 | }
40 | 
41 | template<typename T>
42 | T max(T v1, T v2)
43 | {
44 |   return v1 > v2 ? v1 : v2;
45 | }
46 | 
47 | template<typename T>
48 | T clamp(T n, T min, T max)
49 | {
50 |     n = n > max ? max : n;
51 |     return n < min ? min : n;
52 | }
53 | 
54 | #endif // AVSCORE_MINMAX_H
55 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/avs/posix.h:
--------------------------------------------------------------------------------
  1 | // This program is free software; you can redistribute it and/or modify
  2 | // it under the terms of the GNU General Public License as published by
  3 | // the Free Software Foundation; either version 2 of the License, or
  4 | // (at your option) any later version.
  5 | //
  6 | // This program is distributed in the hope that it will be useful,
  7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
  8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  9 | // GNU General Public License for more details.
 10 | //
 11 | // You should have received a copy of the GNU General Public License
 12 | // along with this program; if not, write to the Free Software
 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 14 | // http://www.gnu.org/copyleft/gpl.html .
 15 | //
 16 | // Linking Avisynth statically or dynamically with other modules is making a
 17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
 18 | // General Public License cover the whole combination.
 19 | //
 20 | // As a special exception, the copyright holders of Avisynth give you
 21 | // permission to link Avisynth with independent modules that communicate with
 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
 23 | // terms of these independent modules, and to copy and distribute the
 24 | // resulting combined work under terms of your choice, provided that
 25 | // every copy of the combined work is accompanied by a complete copy of
 26 | // the source code of Avisynth (the version of Avisynth used to produce the
 27 | // combined work), being distributed under the terms of the GNU General
 28 | // Public License plus this exception.  An independent module is a module
 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
 30 | // import and export plugins, or graphical user interfaces.
 31 | 
 32 | #ifdef AVS_POSIX
 33 | #ifndef AVSCORE_POSIX_H
 34 | #define AVSCORE_POSIX_H
 35 | 
 36 | #ifdef __cplusplus
 37 | #include <cstring>
 38 | #endif
 39 | #include <strings.h>
 40 | #include <unistd.h>
 41 | 
 42 | // Define these MSVC-extension used in Avisynth
 43 | #define __single_inheritance
 44 | 
 45 | // These things don't exist in Linux
 46 | #if defined(AVS_HAIKU)
 47 | #undef __declspec
 48 | #endif
 49 | #define __declspec(x)
 50 | #define lstrlen strlen
 51 | #define lstrcmp strcmp
 52 | #define lstrcmpi strcasecmp
 53 | #define _stricmp strcasecmp
 54 | #define _strnicmp strncasecmp
 55 | #define _strdup strdup
 56 | #define SetCurrentDirectory(x) chdir(x)
 57 | #define SetCurrentDirectoryW(x) chdir(x)
 58 | #define GetCurrentDirectoryW(x) getcwd(x)
 59 | #define _putenv putenv
 60 | #define _alloca alloca
 61 | 
 62 | // Borrowing some compatibility macros from AvxSynth, slightly modified
 63 | #define UInt32x32To64(a, b) ((uint64_t)(((uint64_t)((uint32_t)(a))) * ((uint32_t)(b))))
 64 | #define Int64ShrlMod32(a, b) ((uint64_t)((uint64_t)(a) >> (b)))
 65 | #define Int32x32To64(a, b)  ((int64_t)(((int64_t)((long)(a))) * ((long)(b))))
 66 | 
 67 | #define InterlockedIncrement(x) __sync_add_and_fetch((x), 1)
 68 | #define InterlockedDecrement(x) __sync_sub_and_fetch((x), 1)
 69 | #define InterlockedExchangeAdd(x, v) __sync_add_and_fetch((x), (v))
 70 | 
 71 | #define MulDiv(nNumber, nNumerator, nDenominator)   (int32_t) (((int64_t) (nNumber) * (int64_t) (nNumerator) + (int64_t) ((nDenominator)/2)) / (int64_t) (nDenominator))
 72 | 
 73 | #ifndef TRUE
 74 | #define TRUE  true
 75 | #endif
 76 | 
 77 | #ifndef FALSE
 78 | #define FALSE false
 79 | #endif
 80 | 
 81 | #define S_FALSE       (0x00000001)
 82 | #define E_FAIL        (0x80004005)
 83 | #define FAILED(hr)    ((hr) & 0x80000000)
 84 | #define SUCCEEDED(hr) (!FAILED(hr))
 85 | 
 86 | // Statuses copied from comments in exception.cpp
 87 | #define STATUS_GUARD_PAGE_VIOLATION 0x80000001
 88 | #define STATUS_DATATYPE_MISALIGNMENT 0x80000002
 89 | #define STATUS_BREAKPOINT 0x80000003
 90 | #define STATUS_SINGLE_STEP 0x80000004
 91 | #define STATUS_ACCESS_VIOLATION 0xc0000005
 92 | #define STATUS_IN_PAGE_ERROR 0xc0000006
 93 | #define STATUS_INVALID_HANDLE 0xc0000008
 94 | #define STATUS_NO_MEMORY 0xc0000017
 95 | #define STATUS_ILLEGAL_INSTRUCTION 0xc000001d
 96 | #define STATUS_NONCONTINUABLE_EXCEPTION 0xc0000025
 97 | #define STATUS_INVALID_DISPOSITION 0xc0000026
 98 | #define STATUS_ARRAY_BOUNDS_EXCEEDED 0xc000008c
 99 | #define STATUS_FLOAT_DENORMAL_OPERAND 0xc000008d
100 | #define STATUS_FLOAT_DIVIDE_BY_ZERO 0xc000008e
101 | #define STATUS_FLOAT_INEXACT_RESULT 0xc000008f
102 | #define STATUS_FLOAT_INVALID_OPERATION 0xc0000090
103 | #define STATUS_FLOAT_OVERFLOW 0xc0000091
104 | #define STATUS_FLOAT_STACK_CHECK 0xc0000092
105 | #define STATUS_FLOAT_UNDERFLOW 0xc0000093
106 | #define STATUS_INTEGER_DIVIDE_BY_ZERO 0xc0000094
107 | #define STATUS_INTEGER_OVERFLOW 0xc0000095
108 | #define STATUS_PRIVILEGED_INSTRUCTION 0xc0000096
109 | #define STATUS_STACK_OVERFLOW 0xc00000fd
110 | 
111 | // Calling convension
112 | #ifndef AVS_HAIKU
113 | #define __stdcall
114 | #define __cdecl
115 | #endif
116 | 
117 | // PowerPC OS X is really niche these days, but this painless equivocation
118 | // of the function/macro names used in posix_get_available_memory()
119 | // is all it takes to let it work.  The G5 was 64-bit, and if 10.5 Leopard
120 | // can run in native 64-bit, it probably uses the names in that block as-is.
121 | #ifdef AVS_MACOS
122 | #ifdef PPC32
123 | #define vm_statistics64_data_t vm_statistics_data_t
124 | #define HOST_VM_INFO64_COUNT HOST_VM_INFO_COUNT
125 | #define HOST_VM_INFO64 HOST_VM_INFO
126 | #define host_statistics64 host_statistics
127 | #endif // PPC32
128 | #endif // AVS_MACOS
129 | 
130 | #endif // AVSCORE_POSIX_H
131 | #endif // AVS_POSIX
132 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/avs/types.h:
--------------------------------------------------------------------------------
 1 | // Avisynth C Interface Version 0.20
 2 | // Copyright 2003 Kevin Atkinson
 3 | 
 4 | // This program is free software; you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License as published by
 6 | // the Free Software Foundation; either version 2 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception.  An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 | 
33 | #ifndef AVS_TYPES_H
34 | #define AVS_TYPES_H
35 | 
36 | // Define all types necessary for interfacing with avisynth.dll
37 | #include <stdint.h>
38 | //#include <stdbool.h>
39 | #ifdef __cplusplus
40 |   #include <cstddef>
41 |   #include <cstdarg>
42 | #else
43 |   #include <stddef.h>
44 |   #include <stdarg.h>
45 | #endif
46 | 
47 | // Raster types used by VirtualDub & Avisynth
48 | typedef uint32_t Pixel32;
49 | typedef uint8_t  BYTE;
50 | 
51 | // Audio Sample information
52 | typedef float SFLOAT;
53 | 
54 | #endif //AVS_TYPES_H
55 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/avs/win.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_WIN_H
33 | #define AVSCORE_WIN_H
34 | 
35 | // Whenever you need windows headers, start by including this file, then the rest.
36 | 
37 | // WWUUT? We require XP now?
38 | #if !defined(NTDDI_VERSION) && !defined(_WIN32_WINNT)
39 |   #define NTDDI_VERSION 0x05020000
40 |   #define _WIN32_WINNT  0x0502
41 | #endif
42 | 
43 | #define WIN32_LEAN_AND_MEAN
44 | #define STRICT
45 | #if !defined(NOMINMAX)
46 |     #define NOMINMAX
47 | #endif
48 | 
49 | #include <windows.h>
50 | 
51 | // Provision for UTF-8 max 4 bytes per code point
52 | #define AVS_MAX_PATH MAX_PATH*4
53 | 
54 | #endif // AVSCORE_WIN_H
55 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/binary1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpsdr/plugins_JPSDR/7826b045cfc28a0738088ec03a5e6f2edd44c9f0/Plugins_JPSDR/binary1.bin


--------------------------------------------------------------------------------
/Plugins_JPSDR/internal.h:
--------------------------------------------------------------------------------
  1 | // Avisynth v2.5.  Copyright 2002 Ben Rudiak-Gould et al.
  2 | // http://www.avisynth.org
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // Linking Avisynth statically or dynamically with other modules is making a
 20 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
 21 | // General Public License cover the whole combination.
 22 | //
 23 | // As a special exception, the copyright holders of Avisynth give you
 24 | // permission to link Avisynth with independent modules that communicate with
 25 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
 26 | // terms of these independent modules, and to copy and distribute the
 27 | // resulting combined work under terms of your choice, provided that
 28 | // every copy of the combined work is accompanied by a complete copy of
 29 | // the source code of Avisynth (the version of Avisynth used to produce the
 30 | // combined work), being distributed under the terms of the GNU General
 31 | // Public License plus this exception.  An independent module is a module
 32 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
 33 | // import and export plugins, or graphical user interfaces.
 34 | 
 35 | 
 36 | #ifndef __Internal_H__
 37 | #define __Internal_H__
 38 | 
 39 | #define AVS_VERSION 2.56
 40 | #define AVS_VERSTR "AviSynth 2.56, build:"__DATE__" ["__TIME__"]"
 41 | 
 42 | 
 43 | // env->ManageCache() Non user keys definition
 44 | // Define user accessible keys in avisynth.h
 45 | //
 46 | #define MC_ReturnVideoFrameBuffer 0xFFFF0001
 47 | 
 48 | 
 49 | #include "./avisynth.h"
 50 | 
 51 | 
 52 | 
 53 | int RGB2YUV(int rgb);
 54 | 
 55 | PClip Create_MessageClip(const char* message, int width, int height,
 56 |   int pixel_type, bool shrink, int textcolor, int halocolor, int bgcolor,
 57 |   IScriptEnvironment* env);
 58 | 
 59 | PClip new_Splice(PClip _child1, PClip _child2, bool realign_sound, IScriptEnvironment* env);
 60 | PClip new_SeparateFields(PClip _child, IScriptEnvironment* env);
 61 | PClip new_AssumeFrameBased(PClip _child);
 62 | 
 63 | void BitBlt(BYTE* dstp, int dst_pitch, const BYTE* srcp, 
 64 |             int src_pitch, int row_size, int height);
 65 | 
 66 |   void asm_BitBlt_ISSE(BYTE* dstp, int dst_pitch, const BYTE* srcp, int src_pitch, int row_size, int height);
 67 |   void asm_BitBlt_MMX(BYTE* dstp, int dst_pitch, const BYTE* srcp, int src_pitch, int row_size, int height);
 68 | 
 69 | long GetCPUFlags();
 70 | 
 71 | 
 72 | class _PixelClip {
 73 |   enum { buffer=320 };
 74 |   BYTE clip[256+buffer*2];
 75 | public:
 76 |   _PixelClip() {  
 77 |     memset(clip, 0, buffer);
 78 |     for (int i=0; i<256; ++i) clip[i+buffer] = i;
 79 |     memset(clip+buffer+256, 255, buffer);
 80 |   }
 81 |   BYTE operator()(int i) { return clip[i+buffer]; }
 82 | };
 83 | 
 84 | extern _PixelClip PixelClip;
 85 | 
 86 | 
 87 | template<class ListNode>
 88 | static __inline void Relink(ListNode* newprev, ListNode* me, ListNode* newnext) {
 89 |   if (me == newprev || me == newnext) return;
 90 |   me->next->prev = me->prev;
 91 |   me->prev->next = me->next;
 92 |   me->prev = newprev;
 93 |   me->next = newnext;
 94 |   me->prev->next = me->next->prev = me;
 95 | }
 96 | 
 97 | 
 98 | 
 99 | /*** Inline helper methods ***/
100 | 
101 | 
102 | static __inline BYTE ScaledPixelClip(int i) {
103 |   return PixelClip((i+32768) >> 16);
104 | }
105 | 
106 | 
107 | static __inline bool IsClose(int a, int b, unsigned threshold) 
108 |   { return (unsigned(a-b+threshold) <= threshold*2); }
109 | 
110 | 
111 | 
112 | 
113 | #endif  // __Internal_H__
114 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/nnedi3.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | **                    nnedi3 v0.9.4.65 for Avs+/Avisynth 2.6.x
  3 | **
  4 | **   Copyright (C) 2010-2011 Kevin Stone
  5 | **
  6 | **   This program is free software; you can redistribute it and/or modify
  7 | **   it under the terms of the GNU General Public License as published by
  8 | **   the Free Software Foundation; either version 2 of the License, or
  9 | **   (at your option) any later version.
 10 | **
 11 | **   This program is distributed in the hope that it will be useful,
 12 | **   but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | **   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | **   GNU General Public License for more details.
 15 | **
 16 | **   You should have received a copy of the GNU General Public License
 17 | **   along with this program; if not, write to the Free Software
 18 | **   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 19 | */
 20 | 
 21 | #include <windows.h>
 22 | #define _USE_MATH_DEFINES
 23 | #include <math.h>
 24 | #include <tchar.h>
 25 | #include <float.h>
 26 | #include <stdio.h>
 27 | #include "./avisynth.h"
 28 | #include "./PlanarFrame.h"
 29 | #include "./ThreadPoolInterface.h"
 30 | 
 31 | #define NUM_NSIZE 7
 32 | #define NUM_NNS 5
 33 | const int xdiaTable[NUM_NSIZE] = {8,16,32,48,8,16,32};
 34 | const int ydiaTable[NUM_NSIZE] = {6,6,6,6,4,4,4};
 35 | const int nnsTable[NUM_NNS] = {16,32,64,128,256};
 36 | const int nnsTablePow2[NUM_NNS] = {4,5,6,7,8};
 37 | 
 38 | #ifndef clamp
 39 | #define clamp(n,vmin,vmax) ((n>vmin)?((n<vmax)?n:vmax):vmin)
 40 | #endif
 41 | #define CB2(n) clamp(n,0,254)
 42 | 
 43 | #define PLANE_MAX 4
 44 | 
 45 | struct PS_INFO {
 46 | 	int field[PLANE_MAX],ident;
 47 | 	const uint8_t *srcp[PLANE_MAX];
 48 | 	uint8_t *dstp[PLANE_MAX];
 49 | 	uint8_t *NNPixels[PLANE_MAX];
 50 | 	int NNPixels_pitch[PLANE_MAX];
 51 | 	int src_pitch[PLANE_MAX],dst_pitch[PLANE_MAX];
 52 | 	int height[PLANE_MAX],width[PLANE_MAX];
 53 | 	int sheight[PLANE_MAX],eheight[PLANE_MAX];
 54 | 	int sheight2[PLANE_MAX],eheight2[PLANE_MAX];
 55 | 	int *lcount[PLANE_MAX],opt,qual;
 56 | 	float *input,*temp;
 57 | 	float *weights0,**weights1;
 58 | 	int asize,nns,xdia,ydia,fapprox;
 59 | 	bool Y,U,V,A;
 60 | 	int pscrn;
 61 | 	uint8_t current_plane;
 62 | 	uint8_t plane_range[PLANE_MAX];
 63 | 	bool int16_prescreener,int16_predictor;
 64 | 	uint8_t bits_per_pixel;
 65 | 	uint16_t *val_min_max;
 66 | 	IScriptEnvironment *env;
 67 | };
 68 | 
 69 | class nnedi3 : public GenericVideoFilter
 70 | {
 71 | protected:
 72 | 	bool dh,Y,U,V,A;
 73 | 	int pscrn;
 74 | 	int field,opt,nns,etype;
 75 | 	int *lcount[PLANE_MAX],qual,nsize,fapprox;
 76 | 	PlanarFrame *srcPF,*dstPF;
 77 | 	PS_INFO pssInfo[MAX_MT_THREADS];
 78 | 	float *weights0,*weights1[2];
 79 | 	uint8_t threads,threads_number;
 80 | 	bool sleep;
 81 | 	Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
 82 | 	uint32_t UserId;
 83 | 	uint8_t *NNPixels[PLANE_MAX];
 84 | 	
 85 | 	bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
 86 | 	uint8_t pixelsize; // AVS16
 87 | 	uint8_t bits_per_pixel;
 88 | 
 89 | 	void calcStartEnd2(void);
 90 | 	void copyPad(PVideoFrame &src,int fn,IScriptEnvironment *env);
 91 | 
 92 | 	ThreadPoolFunction StaticThreadpoolF;
 93 | 
 94 | 	static void StaticThreadpool(void *ptr);
 95 | 
 96 | 	void FreeData(void);
 97 | 
 98 | public:
 99 | 	nnedi3(PClip _child,int _field,bool _dh,bool _Y,bool _U,bool _V,bool _A,int _nsize,int _nns,int _qual,int _etype,
100 | 		int _pscrn,uint8_t _threads,int _opt,int _fapprox,bool _sleep,int range_mode, bool negativePrefetch,bool _asvp,IScriptEnvironment *env);
101 | 	virtual ~nnedi3();
102 | 	PVideoFrame __stdcall GetFrame(int n,IScriptEnvironment *env);
103 | 
104 | 	int __stdcall SetCacheHints(int cachehints, int frame_range);
105 | };
106 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/plugins_JPSDR.rc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpsdr/plugins_JPSDR/7826b045cfc28a0738088ec03a5e6f2edd44c9f0/Plugins_JPSDR/plugins_JPSDR.rc


--------------------------------------------------------------------------------
/Plugins_JPSDR/resample.h:
--------------------------------------------------------------------------------
  1 | // Avisynth v2.5.  Copyright 2002 Ben Rudiak-Gould et al.
  2 | // http://www.avisynth.org
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // Linking Avisynth statically or dynamically with other modules is making a
 20 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
 21 | // General Public License cover the whole combination.
 22 | //
 23 | // As a special exception, the copyright holders of Avisynth give you
 24 | // permission to link Avisynth with independent modules that communicate with
 25 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
 26 | // terms of these independent modules, and to copy and distribute the
 27 | // resulting combined work under terms of your choice, provided that
 28 | // every copy of the combined work is accompanied by a complete copy of
 29 | // the source code of Avisynth (the version of Avisynth used to produce the
 30 | // combined work), being distributed under the terms of the GNU General
 31 | // Public License plus this exception.  An independent module is a module
 32 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
 33 | // import and export plugins, or graphical user interfaces.
 34 | 
 35 | #ifndef __Resample_H__
 36 | #define __Resample_H__
 37 | 
 38 | //#include <stdint.h>
 39 | #include <windows.h>
 40 | #include "./avisynth.h"
 41 | #include "./resample_functions.h"
 42 | #include "./ThreadPoolInterface.h"
 43 | 
 44 | #define RESAMPLE_MT_VERSION "ResampleMT 2.5.1 JPSDR"
 45 | 
 46 | typedef enum ChromaLocation_e
 47 | {
 48 |   AVS_CHROMA_UNUSED = -1,
 49 |   AVS_CHROMA_LEFT = 0,
 50 |   AVS_CHROMA_CENTER = 1,
 51 |   AVS_CHROMA_TOP_LEFT = 2,
 52 |   AVS_CHROMA_TOP = 3,
 53 |   AVS_CHROMA_BOTTOM_LEFT = 4,
 54 |   AVS_CHROMA_BOTTOM = 5,
 55 |   AVS_CHROMA_DV = 6 // Special to Avisynth
 56 | } ChromaLocation_e;
 57 | 
 58 | // Resizer function pointer
 59 | typedef void (*ResamplerV)(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
 60 | typedef void (*ResamplerH)(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2);
 61 | 
 62 | 
 63 | typedef struct _MT_Data_Info_ResampleMT
 64 | {
 65 | 	const BYTE*src1,*src2,*src3,*src4;
 66 | 	BYTE *dst1,*dst2,*dst3,*dst4;
 67 | 	int src_pitch1,src_pitch2,src_pitch3,src_pitch4;
 68 | 	int dst_pitch1,dst_pitch2,dst_pitch3,dst_pitch4;
 69 | 	int32_t src_Y_h_min,src_Y_h_max,src_Y_w;
 70 | 	int32_t src_UV_h_min,src_UV_h_max,src_UV_w;
 71 | 	int32_t dst_Y_h_min,dst_Y_h_max,dst_Y_w;
 72 | 	int32_t dst_UV_h_min,dst_UV_h_max,dst_UV_w;
 73 | 	void *filter_storage_luma,*filter_storage_luma2,*filter_storage_luma3,*filter_storage_luma4;
 74 | 	void *filter_storage_chromaU,*filter_storage_chromaV;
 75 | 	int *src_pitch_table_luma,*src_pitch_table_chromaU,*src_pitch_table_chromaV;
 76 | 	ResamplingProgram *resampling_program_luma,*resampling_program_chroma;
 77 | 	bool top,bottom;
 78 | } MT_Data_Info_ResampleMT;
 79 | 
 80 | 
 81 | 
 82 | /**
 83 |   * Class to resize in the horizontal direction using a specified sampling filter
 84 |   * Helper for resample functions
 85 |  **/
 86 | class FilteredResizeH : public GenericVideoFilter
 87 | {
 88 | public:
 89 |   FilteredResizeH( PClip _child, double subrange_left, double subrange_width, int target_width, uint8_t _threads,
 90 | 	  bool _sleep,int range_mode,bool desample,int accuracy, bool negativePrefetch,
 91 | 	  bool _avsp,bool preserve_center,ChromaLocation_e chroma_placement,
 92 | 	  ResamplingFunction* func,IScriptEnvironment* env );
 93 |   virtual ~FilteredResizeH(void);
 94 |   PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
 95 | 
 96 |   int __stdcall SetCacheHints(int cachehints, int frame_range);
 97 | 
 98 |   //static ResamplerH GetResampler(int CPU, bool aligned, int pixelsize, int bits_per_pixel, ResamplingProgram* program, IScriptEnvironment* env);
 99 |   ResamplerH GetResampler(bool aligned, ResamplingProgram* program, IScriptEnvironment* env);
100 | 
101 | private:
102 | 	Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
103 | 	MT_Data_Info_ResampleMT MT_Data[MAX_MT_THREADS];
104 | 	uint8_t threads,threads_number;
105 | 	bool sleep;
106 | 	uint32_t UserId;
107 | 	
108 | 	ThreadPoolFunction ResampleH_MT;
109 | 
110 | 	static void StaticThreadpoolH(void *ptr);
111 | 
112 | 	uint8_t CreateMTData(uint8_t max_threads,int32_t src_size_x,int32_t src_size_y,int32_t dst_size_x,int32_t dst_size_y, int UV_w, int UV_h);
113 | 
114 | 	void FreeData(void);
115 | 
116 | 	void ResamplerLumaMT(MT_Data_Info_ResampleMT *MT_DataGF);
117 | 	void ResamplerLumaMT2(MT_Data_Info_ResampleMT *MT_DataGF);
118 | 	void ResamplerLumaMT3(MT_Data_Info_ResampleMT *MT_DataGF);
119 | 	void ResamplerLumaMT4(MT_Data_Info_ResampleMT *MT_DataGF);
120 | 	void ResamplerUChromaMT(MT_Data_Info_ResampleMT *MT_DataGF);
121 | 	void ResamplerVChromaMT(MT_Data_Info_ResampleMT *MT_DataGF);
122 | 	
123 | 
124 |   // Resampling
125 |   ResamplingProgram *resampling_program_luma;
126 |   ResamplingProgram *resampling_program_chroma;
127 | 
128 |   // Note: these pointer are currently not used; they are used to pass data into run-time resampler.
129 |   // They are kept because this may be needed later (like when we implemented actual horizontal resizer.)
130 |   void* filter_storage_luma;
131 |   void* filter_storage_chroma;
132 | 
133 |   int src_width, src_height, dst_width, dst_height;
134 |   bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
135 |   uint8_t pixelsize; // AVS16
136 |   uint8_t bits_per_pixel;
137 |   uint8_t plane_range[4];
138 |   bool mode_YUY2;
139 |   bool Enable_MMX,Enable_SSE2,Enable_SSE3,Enable_SSSE3,Enable_SSE4_1,Enable_AVX2;
140 | 
141 |   ResamplerH resampler_h_luma;
142 |   ResamplerH resampler_h_chroma;
143 | };
144 | 
145 | 
146 | /**
147 |   * Class to resize in the vertical direction using a specified sampling filter
148 |   * Helper for resample functions
149 |  **/
150 | class FilteredResizeV : public GenericVideoFilter
151 | {
152 | public:
153 |   FilteredResizeV( PClip _child, double subrange_top, double subrange_height, int target_height, uint8_t _threads,
154 | 	  bool _sleep,int range_mode,bool desample,int accuracy,int ChromaS,uint8_t ShiftC,bool negativePrefetch,
155 | 	  bool _avsp,bool preserve_center,ChromaLocation_e chroma_placement,
156 | 	  bool ResizeH,ResamplingFunction* func,IScriptEnvironment* env);
157 |   virtual ~FilteredResizeV(void);
158 |   PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
159 | 
160 | 	int __stdcall SetCacheHints(int cachehints, int frame_range);
161 | 
162 |   //static ResamplerV GetResampler(int CPU, bool aligned,int pixelsize, int bits_per_pixel, void*& storage, ResamplingProgram* program);
163 |   //ResamplerV GetResampler(bool aligned,void*& storage, ResamplingProgram* program);
164 |   ResamplerV GetResampler(bool aligned, ResamplingProgram* program, IScriptEnvironment* env);
165 | 
166 | private:
167 | 	Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS];
168 | 	MT_Data_Info_ResampleMT MT_Data[MAX_MT_THREADS];
169 | 	uint8_t threads,threads_number;
170 | 	bool sleep;
171 | 	uint32_t UserId;
172 | 
173 | 	ThreadPoolFunction ResampleV_MT;
174 | 
175 | 	static void StaticThreadpoolV(void *ptr);
176 | 
177 | 	uint8_t CreateMTData(uint8_t max_threads,int32_t src_size_x,int32_t src_size_y,int32_t dst_size_x,int32_t dst_size_y, int UV_w, int UV_h);
178 | 
179 | 	void FreeData(void);
180 | 
181 | 	void ResamplerLumaAlignedMT(MT_Data_Info_ResampleMT *MT_DataGF);
182 | 	void ResamplerLumaUnalignedMT(MT_Data_Info_ResampleMT *MT_DataGF);
183 | 	void ResamplerLumaAlignedMT2(MT_Data_Info_ResampleMT *MT_DataGF);
184 | 	void ResamplerLumaUnalignedMT2(MT_Data_Info_ResampleMT *MT_DataGF);
185 | 	void ResamplerLumaAlignedMT3(MT_Data_Info_ResampleMT *MT_DataGF);
186 | 	void ResamplerLumaUnalignedMT3(MT_Data_Info_ResampleMT *MT_DataGF);
187 | 	void ResamplerLumaAlignedMT4(MT_Data_Info_ResampleMT *MT_DataGF);
188 | 	void ResamplerLumaUnalignedMT4(MT_Data_Info_ResampleMT *MT_DataGF);
189 | 	void ResamplerUChromaAlignedMT(MT_Data_Info_ResampleMT *MT_DataGF);
190 | 	void ResamplerUChromaUnalignedMT(MT_Data_Info_ResampleMT *MT_DataGF);
191 | 	void ResamplerVChromaAlignedMT(MT_Data_Info_ResampleMT *MT_DataGF);
192 | 	void ResamplerVChromaUnalignedMT(MT_Data_Info_ResampleMT *MT_DataGF);
193 | 
194 |   bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8;
195 |   uint8_t pixelsize; // AVS16
196 |   uint8_t bits_per_pixel;
197 |   uint8_t plane_range[4];
198 |   bool mode_YUY2;
199 |   bool Enable_MMX,Enable_SSE2,Enable_SSE3,Enable_SSSE3,Enable_SSE4_1,Enable_AVX2;
200 | 	
201 |   ResamplingProgram *resampling_program_luma;
202 |   ResamplingProgram *resampling_program_chroma;
203 |   int *src_pitch_table_luma;
204 |   int *src_pitch_table_chromaU;
205 |   int *src_pitch_table_chromaV;
206 |   int src_pitch_luma;
207 |   int src_pitch_chromaU;
208 |   int src_pitch_chromaV;
209 | 
210 |   // Note: these pointer are currently not used; they are used to pass data into run-time resampler.
211 |   // They are kept because this may be needed later (like when we implemented actual horizontal resizer.)
212 |   void* filter_storage_luma_aligned;
213 |   void* filter_storage_luma_unaligned;
214 |   void* filter_storage_chroma_aligned;
215 |   void* filter_storage_chroma_unaligned;
216 | 
217 |   ResamplerV resampler_luma_aligned;
218 |   ResamplerV resampler_luma_unaligned;
219 |   ResamplerV resampler_chroma_aligned;
220 |   ResamplerV resampler_chroma_unaligned;
221 | };
222 | 
223 | 
224 | 
225 | class FilteredResizeMT
226 | {
227 | public:
228 | static PClip CreateResizeH( PClip clip, double subrange_left, double subrange_width, int target_width, uint8_t _threads,
229 | 	                         bool _sleep,int range_mode,bool desample,int accuracy, bool negativePrefetch,
230 | 							 bool _avsp, bool preserve_center,ChromaLocation_e chroma_placement,
231 | 							 ResamplingFunction* func,IScriptEnvironment* env );
232 | static PClip CreateResizeV( PClip clip, double subrange_top, double subrange_height, int target_height, uint8_t _threads,
233 | 	                         bool _sleep,int range_mode,bool desample,int accuracy,int ChromaS,uint8_t ShiftC, bool negativePrefetch,
234 | 							 bool _avsp,bool preserve_center,ChromaLocation_e chroma_placement,
235 | 							 bool ResizeH,ResamplingFunction* func,IScriptEnvironment* env );
236 | 
237 | static PClip CreateResize( PClip clip, int target_width, int target_height, int force, int _threads,
238 | 	bool _LogicalCores,bool _MaxPhysCores, bool _SetAffinity,bool _sleep,int prefetch,int range_mode,
239 | 	bool desample,int accuracy,int order,int thread_level,
240 | 	const AVSValue* args,ResamplingFunction* f,
241 | 	bool preserve_center,const char *placement_name,ChromaLocation_e forced_chroma_placement,
242 | 	IScriptEnvironment* env );
243 | 
244 | static AVSValue __cdecl Create_PointResize(AVSValue args, void*, IScriptEnvironment* env);
245 | 
246 | static AVSValue __cdecl Create_BilinearResize(AVSValue args, void*, IScriptEnvironment* env);
247 | 
248 | static AVSValue __cdecl Create_BicubicResize(AVSValue args, void*, IScriptEnvironment* env);
249 | 
250 | // 09-14-2002 - Vlad59 - Lanczos3Resize - 
251 | static AVSValue __cdecl Create_LanczosResize(AVSValue args, void*, IScriptEnvironment* env);
252 | 
253 | static AVSValue __cdecl Create_Lanczos4Resize(AVSValue args, void*, IScriptEnvironment* env);
254 | 
255 | static AVSValue __cdecl Create_BlackmanResize(AVSValue args, void*, IScriptEnvironment* env);
256 | 
257 | static AVSValue __cdecl Create_Spline16Resize(AVSValue args, void*, IScriptEnvironment* env);
258 | 
259 | static AVSValue __cdecl Create_Spline36Resize(AVSValue args, void*, IScriptEnvironment* env);
260 | 
261 | static AVSValue __cdecl Create_Spline64Resize(AVSValue args, void*, IScriptEnvironment* env);
262 | 
263 | static AVSValue __cdecl Create_GaussianResize(AVSValue args, void*, IScriptEnvironment* env);
264 | 
265 | static AVSValue __cdecl Create_SincResize(AVSValue args, void*, IScriptEnvironment* env);
266 | 
267 | static AVSValue __cdecl Create_SinPowerResize(AVSValue args, void*, IScriptEnvironment* env);
268 | 
269 | static AVSValue __cdecl Create_SincLin2Resize(AVSValue args, void*, IScriptEnvironment* env);
270 | 
271 | static AVSValue __cdecl Create_UserDefined2Resize(AVSValue args, void*, IScriptEnvironment* env);
272 | 
273 | // Desample functions
274 | 
275 | static AVSValue __cdecl Create_DeBilinearResize(AVSValue args, void*, IScriptEnvironment* env);
276 | 
277 | static AVSValue __cdecl Create_DeBicubicResize(AVSValue args, void*, IScriptEnvironment* env);
278 | 
279 | // 09-14-2002 - Vlad59 - Lanczos3Resize - 
280 | static AVSValue __cdecl Create_DeLanczosResize(AVSValue args, void*, IScriptEnvironment* env);
281 | 
282 | static AVSValue __cdecl Create_DeLanczos4Resize(AVSValue args, void*, IScriptEnvironment* env);
283 | 
284 | static AVSValue __cdecl Create_DeBlackmanResize(AVSValue args, void*, IScriptEnvironment* env);
285 | 
286 | static AVSValue __cdecl Create_DeSpline16Resize(AVSValue args, void*, IScriptEnvironment* env);
287 | 
288 | static AVSValue __cdecl Create_DeSpline36Resize(AVSValue args, void*, IScriptEnvironment* env);
289 | 
290 | static AVSValue __cdecl Create_DeSpline64Resize(AVSValue args, void*, IScriptEnvironment* env);
291 | 
292 | static AVSValue __cdecl Create_DeGaussianResize(AVSValue args, void*, IScriptEnvironment* env);
293 | 
294 | static AVSValue __cdecl Create_DeSincResize(AVSValue args, void*, IScriptEnvironment* env);
295 | 
296 | static AVSValue __cdecl Create_DeSinPowerResize(AVSValue args, void*, IScriptEnvironment* env);
297 | 
298 | static AVSValue __cdecl Create_DeSincLin2Resize(AVSValue args, void*, IScriptEnvironment* env);
299 | 
300 | static AVSValue __cdecl Create_DeUserDefined2Resize(AVSValue args, void*, IScriptEnvironment* env);
301 | };
302 | 
303 | 
304 | #endif // __Resample_H__
305 | 
306 | 
307 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/resample_avx2.h:
--------------------------------------------------------------------------------
 1 | // Avisynth v2.5.  Copyright 2002 Ben Rudiak-Gould et al.
 2 | // http://www.avisynth.org
 3 | 
 4 | // This program is free software; you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License as published by
 6 | // the Free Software Foundation; either version 2 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // Linking Avisynth statically or dynamically with other modules is making a
20 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
21 | // General Public License cover the whole combination.
22 | //
23 | // As a special exception, the copyright holders of Avisynth give you
24 | // permission to link Avisynth with independent modules that communicate with
25 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
26 | // terms of these independent modules, and to copy and distribute the
27 | // resulting combined work under terms of your choice, provided that
28 | // every copy of the combined work is accompanied by a complete copy of
29 | // the source code of Avisynth (the version of Avisynth used to produce the
30 | // combined work), being distributed under the terms of the GNU General
31 | // Public License plus this exception.  An independent module is a module
32 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
33 | // import and export plugins, or graphical user interfaces.
34 | 
35 | #ifndef __Resample_AVX2_H__
36 | #define __Resample_AVX2_H__
37 | 
38 | #include "./resample_functions.h"
39 | 
40 | template<int filtersizealigned8, int filtersizemod8>
41 | void resizer_h_avx2_generic_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2);
42 | 
43 | template<bool lessthan16bit>
44 | void resizer_h_avx2_generic_uint16_t(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2);
45 | 
46 | void resizer_h_avx2_generic_uint8_t(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2);
47 | 
48 | template<bool lessthan16bit>
49 | void resize_v_avx2_planar_uint16_t(BYTE* dst0, const BYTE* src0, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
50 | 
51 | void resize_v_avx2_planar_float(BYTE* dst0, const BYTE* src0, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
52 | 
53 | void resize_v_avx2_planar_uint8_t(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
54 | 
55 | #endif // __Resample_AVX2_H__
56 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/resample_functions.h:
--------------------------------------------------------------------------------
  1 | // Avisynth v2.5.  Copyright 2002 Ben Rudiak-Gould et al.
  2 | // http://www.avisynth.org
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // Linking Avisynth statically or dynamically with other modules is making a
 20 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
 21 | // General Public License cover the whole combination.
 22 | //
 23 | // As a special exception, the copyright holders of Avisynth give you
 24 | // permission to link Avisynth with independent modules that communicate with
 25 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
 26 | // terms of these independent modules, and to copy and distribute the
 27 | // resulting combined work under terms of your choice, provided that
 28 | // every copy of the combined work is accompanied by a complete copy of
 29 | // the source code of Avisynth (the version of Avisynth used to produce the
 30 | // combined work), being distributed under the terms of the GNU General
 31 | // Public License plus this exception.  An independent module is a module
 32 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
 33 | // import and export plugins, or graphical user interfaces.
 34 | 
 35 | #ifndef __Resample_Functions_H__
 36 | #define __Resample_Functions_H__
 37 | 
 38 | #include <malloc.h>
 39 | #include <math.h>
 40 | #include <vector>
 41 | #include <string.h>
 42 | 
 43 | #include "./avisynth.h"
 44 | #include "./MatrixClass.h"
 45 | #include "./avs/alignment.h"
 46 | 
 47 | #define myalignedfree(ptr) if (ptr!=NULL) { _aligned_free(ptr); ptr=NULL;}
 48 | 
 49 | // Original value: 65536
 50 | // 2 bits sacrificed because of 16 bit signed MMX multiplication
 51 | // NOTE: Don't change this value. It's hard-coded in SIMD code.
 52 | const int FPScale8bits = 14; // fixed point scaler 14 bit
 53 | const int FPScale = 1 << FPScale8bits; // fixed point scaler (1<<14)
 54 | // for 16 bits: one bit less
 55 | const int FPScale16bits = 13;
 56 | const int FPScale16 = 1 << FPScale16bits; // fixed point scaler for 10-16 bit SIMD signed operation
 57 | const int ALIGN_RESIZER_TARGET_SIZE = 8;
 58 | const int ALIGN_FLOAT_RESIZER_COEFF_SIZE = 8; // simd friendly
 59 | 
 60 | // 09-14-2002 - Vlad59 - Lanczos3Resize - Constant added
 61 | #define M_PI 3.14159265358979323846
 62 | 
 63 | struct ResamplingProgram
 64 | {
 65 |   IScriptEnvironment *Env;
 66 |   int source_size, target_size;
 67 |   double crop_start, crop_size;
 68 |   int filter_size;
 69 |   int filter_size_real; // maybe less than filter_size if dimensions are small
 70 |   int filter_size_alignment; // for info, 1 (C), 8 (sse or avx2) or 16 (avx2)
 71 | 
 72 |   // Array of Integer indicate starting point of sampling
 73 |   std::vector<int> pixel_offset;
 74 | 
 75 |   int bits_per_pixel;
 76 | 
 77 |   // Array of array of coefficient for each pixel
 78 |   // {{pixel[0]_coeff}, {pixel[1]_coeff}, ...}
 79 |   short *pixel_coefficient;
 80 |   float *pixel_coefficient_float;
 81 |   // Array of real kernel size, handles edge cases! <= filter_size
 82 |   // for SIMD, coefficients are copied over a padded aligned storage
 83 |   std::vector<short> kernel_sizes; 
 84 |   // 3.7.4- can be different for each line but then they get equalized and aligned.
 85 | 
 86 |   // anti-overread helpers for float resizer simd code reading 8 pixels from a given offset
 87 |   bool overread_possible,StatusOk;
 88 |   int source_overread_offset; // offset from where reading 8 bytes requires masking garbage on the right side
 89 |   int source_overread_beyond_targetx;
 90 |   // in H resizers danger zone starts from here.
 91 |   // When reading aligned_filter_size elements from (src+offset) no longer fits image scanline dimensions
 92 | 
 93 |   ResamplingProgram(int filter_size, int source_size, int target_size, double crop_start, double crop_size, int bits_per_pixel, IScriptEnvironment* env)
 94 |     : Env(env), source_size(source_size), target_size(target_size), crop_start(crop_start), crop_size(crop_size), filter_size(filter_size), filter_size_real(filter_size),
 95 |     bits_per_pixel(bits_per_pixel), pixel_coefficient(NULL), pixel_coefficient_float(NULL)
 96 |   {
 97 | 	StatusOk = true;
 98 |     overread_possible = false;
 99 |     source_overread_offset = -1;
100 |     source_overread_beyond_targetx = -1;
101 | 
102 |     pixel_offset.resize(target_size);
103 |     kernel_sizes.resize(target_size);
104 | 
105 |     // align target_size to 8 units to allow safe 8 pixels/cycle in H resizers
106 |     // pixel_offset is in unrolled loop, 128/256bit simd size does not affect.
107 | 	filter_size_alignment = 1; // just info. nothing special, for C. resize_h_prepare_coeff_8or16 can override and realign the coefficients for SIMD processing
108 | 	if (bits_per_pixel<32)
109 | 		pixel_coefficient = (short*) _aligned_malloc(sizeof(short)*target_size*filter_size, 64);
110 | 	else
111 | 		pixel_coefficient_float = (float*) _aligned_malloc(sizeof(float)*target_size*filter_size, 64);
112 | 
113 |     if (((bits_per_pixel<32) && (pixel_coefficient==NULL)) || 
114 | 		((bits_per_pixel==32) && (pixel_coefficient_float==NULL)))
115 | 	{
116 | 	  myalignedfree(pixel_coefficient_float);
117 | 	  myalignedfree(pixel_coefficient);
118 | 	  StatusOk = false;
119 |       //env->ThrowError("ResamplingProgram: Could not reserve memory.");
120 |     }
121 | 	
122 | 	// Set all values to 0
123 | 	if (bits_per_pixel<32) memset(pixel_coefficient,0,sizeof(short)*target_size*filter_size);
124 | 	else std::fill_n(pixel_coefficient_float, target_size*filter_size, 0.0f);
125 |   };
126 | 
127 |   ~ResamplingProgram()
128 |   {
129 | 	myalignedfree(pixel_coefficient_float);
130 | 	myalignedfree(pixel_coefficient);
131 |   };
132 | };
133 | 
134 | typedef struct ResamplingProgram ResamplingProgram;
135 | 
136 | void resize_prepare_coeffs(ResamplingProgram* p, IScriptEnvironment* env, int filter_size_alignment);
137 | 
138 | /*******************************************
139 |    ***************************************
140 |    **  Helper classes for resample.cpp  **
141 |    ***************************************
142 |  *******************************************/
143 | 
144 | 
145 | class ResamplingFunction 
146 | /**
147 |   * Pure virtual base class for resampling functions
148 |   */
149 | {
150 | public:
151 |   virtual double f(double x) = 0;
152 |   virtual double support() = 0;
153 | 
154 |   virtual ResamplingProgram* GetResamplingProgram(int source_size, double crop_start, double crop_size, int target_size, int bits_per_pixel,
155 | 	double center_pos_src, double center_pos_dst, IScriptEnvironment* env);
156 |   virtual ResamplingProgram* GetDesamplingProgram(int source_size, double crop_start, double crop_size, int target_size, int bits_per_pixel,
157 | 	double center_pos_src, double center_pos_dst, uint8_t accuracy, int SizeY, uint8_t ShiftC, int &SizeOut,IScriptEnvironment* env);
158 |   virtual int GetDesamplingData(int source_size, double crop_start, double crop_size, int target_size, int bits_per_pixel,
159 |   double center_pos_src, double center_pos_dst, uint8_t ShiftC, IScriptEnvironment* env);
160 | };
161 | 
162 | class PointFilter : public ResamplingFunction 
163 | /**
164 |   * Nearest neighbour (point sampler), used in PointResize
165 |  **/
166 | {
167 | public:
168 |   double f(double x);  
169 |   double support() { return 0.0; }  // 0.0 crashes it.
170 |   // Pre 3.7.4 : 0.0001. Comment: 0.0 crashes it. 
171 |   // 3.7.4- this 0 is specially handled in GetResamplingProgram
172 | };
173 | 
174 | 
175 | class TriangleFilter : public ResamplingFunction 
176 | /**
177 |   * Simple triangle filter, used in BilinearResize
178 |  **/
179 | {
180 | public:
181 |   double f(double x);  
182 |   double support() { return 1.0; }
183 | };
184 | 
185 | 
186 | class MitchellNetravaliFilter : public ResamplingFunction 
187 | /**
188 |   * Mitchell-Netraveli filter, used in BicubicResize
189 |  **/
190 | {
191 | public:
192 |   MitchellNetravaliFilter(double b=1.0/3.0, double c=1.0/3.0);
193 |   double f(double x);
194 |   double support() { return 2.0; }
195 | 
196 | private:
197 |   double p0,p2,p3,q0,q1,q2,q3;
198 | };
199 | 
200 | class LanczosFilter : public ResamplingFunction
201 | /**
202 |   * Lanczos filter, used in LanczosResize
203 |  **/
204 | {
205 | public:
206 |   LanczosFilter(int _taps=3);
207 | 	double f(double x);
208 | 	double support() { return taps; };
209 | 
210 | private:
211 | 	double sinc(double value);
212 |   double taps;
213 | };
214 | 
215 | class BlackmanFilter : public ResamplingFunction
216 | /**
217 |   * Blackman filter, used in BlackmanResize
218 |  **/
219 | {
220 | public:
221 |   BlackmanFilter(int _taps=4);
222 | 	double f(double x);
223 | 	double support() { return taps; };
224 | 
225 | private:
226 |   double taps, rtaps;
227 | };
228 | 
229 | // Spline16
230 | class Spline16Filter : public ResamplingFunction
231 | /**
232 |   * Spline16 of Panorama Tools is a cubic-spline, with derivative set to 0 at the edges (4x4 pixels).
233 |  **/
234 | {
235 | public:
236 | 	double f(double x);
237 | 	double support() { return 2.0; };
238 | 
239 | private:
240 | };
241 | 
242 | // Spline36
243 | class Spline36Filter : public ResamplingFunction
244 | /**
245 |   * Spline36 is like Spline16,  except that it uses 6x6=36 pixels.
246 |  **/
247 | {
248 | public:
249 | 	double f(double x);
250 | 	double support() { return 3.0; };
251 | 
252 | private:
253 | };
254 | 
255 | // Spline64
256 | class Spline64Filter : public ResamplingFunction
257 | /**
258 |   * Spline64 is like Spline36,  except that it uses 8x8=64 pixels.
259 |  **/
260 | {
261 | public:
262 | 	double f(double x);
263 | 	double support() { return 4.0; };
264 | 
265 | private:
266 | };
267 | 
268 | 
269 | class GaussianFilter : public ResamplingFunction
270 | /**
271 |   * GaussianFilter, from swscale.
272 |  **/
273 | {
274 | public:
275 |   GaussianFilter(double p=30.0, double _b=2.0, double _s=4.0);
276 | 	double f(double x);
277 | 	double support() { return s; }; // <3.7.4 was fixed at 4.0
278 | 
279 | private:
280 |  double param;
281 |  double b; // base value since 3.7.4
282 |  double s; // variable support since 3.7.4
283 | };
284 | 
285 | class SincFilter : public ResamplingFunction
286 | /**
287 |   * Sinc filter, used in SincResize
288 |  **/
289 | {
290 | public:
291 |   SincFilter(int _taps=4);
292 | 	double f(double x);
293 | 	double support() { return taps; };
294 | 
295 | private:
296 |   double taps;
297 | };
298 | 
299 | class SinPowerFilter : public ResamplingFunction
300 | // SinPow kernel, used in SinPowResize
301 | {
302 | public:
303 | SinPowerFilter(double p = 2.5);
304 | double f(double x);
305 | double support() { return 2.0; }; // 2 very important, 4 cause bugs
306 | 
307 | private:
308 | double param;
309 | };
310 | 
311 | class SincLin2Filter : public ResamplingFunction
312 | 	/**
313 | 	* SincLin2 filter, used in SincLin2Resize
314 | 	**/
315 | {
316 | public:
317 | 	SincLin2Filter(int _taps = 15);
318 | 	double f(double x);
319 | 	double support() { return taps; };
320 | 
321 | private:
322 | 	double sinc(double value);
323 | 	double taps;
324 | };
325 | 
326 | class UserDefined2Filter : public ResamplingFunction
327 | 	/**
328 | 	  * User-defined by 2 samples filter, used in UDef2Resize
329 | 	 **/
330 | {
331 | public:
332 | 	UserDefined2Filter(double _b = 121.0, double _c = 19.0, double _s = 2.3);
333 | 	double f(double x);
334 | 	double support() { return s; }
335 | 
336 | private:
337 | 	double sinc(double value);
338 | 	double a, b, c;
339 | 	double s; // variable support
340 | };
341 | 
342 | #endif  // __Reample_Functions_H__
343 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/resample_sse.h:
--------------------------------------------------------------------------------
  1 | // Avisynth v2.5.  Copyright 2002 Ben Rudiak-Gould et al.
  2 | // http://avisynth.nl
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // Linking Avisynth statically or dynamically with other modules is making a
 20 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
 21 | // General Public License cover the whole combination.
 22 | //
 23 | // As a special exception, the copyright holders of Avisynth give you
 24 | // permission to link Avisynth with independent modules that communicate with
 25 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
 26 | // terms of these independent modules, and to copy and distribute the
 27 | // resulting combined work under terms of your choice, provided that
 28 | // every copy of the combined work is accompanied by a complete copy of
 29 | // the source code of Avisynth (the version of Avisynth used to produce the
 30 | // combined work), being distributed under the terms of the GNU General
 31 | // Public License plus this exception.  An independent module is a module
 32 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
 33 | // import and export plugins, or graphical user interfaces.
 34 | 
 35 | #ifndef __Resample_SSE_H__
 36 | #define __Resample_SSE_H__
 37 | 
 38 | // Intrinsics for SSE4.1, SSSE3, SSE3, SSE2, ISSE and MMX
 39 | #include <emmintrin.h>
 40 | #include <smmintrin.h>
 41 | 
 42 | #include "avisynth.h"
 43 | #include "./avs/config.h"
 44 | #include "./resample.h"
 45 | 
 46 | /***************************************
 47 |  ********* Templated SSE Loader ********
 48 |  ***************************************/
 49 | 
 50 | typedef __m128i (SSELoader)(const __m128i*);
 51 | typedef __m128 (SSELoader_ps)(const float*);
 52 | 
 53 | __forceinline __m128i simd_load_aligned(const __m128i* adr)
 54 | {
 55 |   return _mm_load_si128(adr);
 56 | }
 57 | 
 58 | __forceinline __m128i simd_load_unaligned(const __m128i* adr)
 59 | {
 60 |   return _mm_loadu_si128(adr);
 61 | }
 62 | 
 63 | #if defined(CLANG)
 64 | __attribute__((__target__("sse3")))
 65 | #endif
 66 | __forceinline __m128i simd_load_unaligned_sse3(const __m128i* adr)
 67 | {
 68 |   return _mm_lddqu_si128(adr);
 69 | }
 70 | 
 71 | #if defined(CLANG)
 72 | __attribute__((__target__("sse4.1")))
 73 | #endif
 74 | __forceinline __m128i simd_load_streaming(const __m128i* adr)
 75 | {
 76 |   return _mm_stream_load_si128(const_cast<__m128i*>(adr));
 77 | }
 78 | 
 79 | // float loaders
 80 | __forceinline __m128 simd_loadps_aligned(const float * adr)
 81 | {
 82 |   return _mm_load_ps(adr);
 83 | }
 84 | 
 85 | __forceinline __m128 simd_loadps_unaligned(const float* adr)
 86 | {
 87 |   return _mm_loadu_ps(adr);
 88 | }
 89 | 
 90 | 
 91 | void resize_h_prepare_coeff_8or16(ResamplingProgram* p,IScriptEnvironment* env,int alignFilterSize8or16);
 92 | 
 93 | #ifdef X86_32
 94 | void resize_v_mmx_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
 95 | #endif
 96 | void resize_v_sse2_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
 97 | template<SSELoader load>
 98 | void resize_v_sse2_planarT(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
 99 | template<SSELoader load>
100 | #if defined(CLANG)
101 | __attribute__((__target__("ssse3")))
102 | #endif
103 | void resize_v_ssse3_planarT(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
104 | 
105 | #if defined(GCC) || defined(CLANG)
106 | __attribute__((__target__("sse4.1")))
107 | #endif
108 | void resize_v_sse41_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
109 | 
110 | #if defined(GCC) || defined(CLANG)
111 | __attribute__((__target__("ssse3")))
112 | #endif
113 | void resize_v_ssse3_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
114 | 
115 | template<bool lessthan16bit>
116 | #if defined(GCC) || defined(CLANG)
117 | __attribute__((__target__("ssse3")))
118 | #endif
119 | void resizer_h_ssse3_generic_uint16_t(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2);
120 | 
121 | template<bool lessthan16bit>
122 | #if defined(GCC) || defined(CLANG)
123 | __attribute__((__target__("sse4.1")))
124 | #endif
125 | void resizer_h_sse41_generic_uint16_t(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2);
126 | 
127 | template<int filtersizealigned8, int filtersizemod8>
128 | #if defined(GCC) || defined(CLANG)
129 | __attribute__((__target__("ssse3")))
130 | #endif
131 | void resizer_h_ssse3_generic_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2);
132 | 
133 | template<bool lessthan16bit>
134 | void resize_v_sse2_planar_uint16_t(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
135 | 
136 | template<bool lessthan16bit>
137 | #if defined(GCC) || defined(CLANG)
138 | __attribute__((__target__("sse4.1")))
139 | #endif
140 | void resize_v_sse41_planar_uint16_t(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
141 | 
142 | void resize_v_sse2_planar_float(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2);
143 | 
144 | #if defined(GCC) || defined(CLANG)
145 | __attribute__((__target__("ssse3")))
146 | #endif
147 | void resizer_h_ssse3_generic(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2);
148 | 
149 | #if defined(GCC) || defined(CLANG)
150 | __attribute__((__target__("ssse3")))
151 | #endif
152 | void resizer_h_ssse3_8(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2);
153 | 
154 | 
155 | #endif // __Resample_SSE_H__
156 | 


--------------------------------------------------------------------------------
/Plugins_JPSDR/resource.h:
--------------------------------------------------------------------------------
 1 | //{{NO_DEPENDENCIES}}
 2 | // Microsoft Visual C++ generated include file.
 3 | // Used by nnedi2.rc
 4 | //
 5 | #define IDR_BINARY1                     101
 6 | 
 7 | // Next default values for new objects
 8 | // 
 9 | #ifdef APSTUDIO_INVOKED
10 | #ifndef APSTUDIO_READONLY_SYMBOLS
11 | #define _APS_NEXT_RESOURCE_VALUE        101
12 | #define _APS_NEXT_COMMAND_VALUE         40001
13 | #define _APS_NEXT_CONTROL_VALUE         1000
14 | #define _APS_NEXT_SYMED_VALUE           101
15 | #endif
16 | #endif
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # plugins_JPSDR
2 | Merge of all avisynth plugins
3 | 


--------------------------------------------------------------------------------
/plugins_JPSDR - Readme.txt:
--------------------------------------------------------------------------------
1 | Version 3.5.0
2 | 
3 | Merge of :
4 | AutoYUY2 4.1.10
5 | NNEDI3 0.9.4.65
6 | ResampleMT 2.5.1
7 | aWarSharpMT 2.1.10
8 | HDRTools : 1.0.6
9 | 


--------------------------------------------------------------------------------