├── Plugins_JPSDR.sln ├── Plugins_JPSDR ├── AutoYUY2.cpp ├── AutoYUY2.h ├── AutoYUY2_AVX2_asm.asm ├── AutoYUY2_AVX2_asm_x64.asm ├── AutoYUY2_asm.asm ├── AutoYUY2_asm_x64.asm ├── HDRTools.cpp ├── HDRTools.h ├── HDRTools_AVX2_asm.asm ├── HDRTools_AVX2_asm_x64.asm ├── HDRTools_asm.asm ├── HDRTools_asm_x64.asm ├── MatrixClass.cpp ├── MatrixClass.h ├── MatrixClass_x64.asm ├── MatrixClass_x86.asm ├── PlanarFrame.cpp ├── PlanarFrame.h ├── PlanarFrame_asm.asm ├── PlanarFrame_asm_x64.asm ├── Plugins_JPSDR.vcxproj ├── Plugins_JPSDR.vcxproj.filters ├── Plugins_JPSDR.vcxproj.user ├── ThreadPool.cpp ├── ThreadPool.h ├── ThreadPoolDef.h ├── ThreadPoolInterface.cpp ├── ThreadPoolInterface.h ├── TransferFunctions.cpp ├── TransferFunctions.h ├── aWarpSharp.cpp ├── aWarpSharp.h ├── aWarpSharp_asm.asm ├── aWarpSharp_asm_x64.asm ├── avisynth.h ├── avs │ ├── alignment.h │ ├── capi.h │ ├── config.h │ ├── cpuid.h │ ├── filesystem.h │ ├── minmax.h │ ├── posix.h │ ├── types.h │ └── win.h ├── binary1.bin ├── internal.h ├── nnedi3.cpp ├── nnedi3.h ├── nnedi3_asm.asm ├── nnedi3_asm_FMA.asm ├── nnedi3_asm_FMA_x64.asm ├── nnedi3_asm_x64.asm ├── plugins_JPSDR.cpp ├── plugins_JPSDR.rc ├── resample.cpp ├── resample.h ├── resample_avx2.cpp ├── resample_avx2.h ├── resample_functions.cpp ├── resample_functions.h ├── resample_sse.cpp ├── resample_sse.h └── resource.h ├── README.md └── plugins_JPSDR - Readme.txt /Plugins_JPSDR.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 11.00 3 | # Visual Studio 2010 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Plugins_JPSDR", "Plugins_JPSDR\Plugins_JPSDR.vcxproj", "{0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Win32 = Debug|Win32 9 | Debug|x64 = Debug|x64 10 | Release|Win32 = Release|Win32 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Debug|Win32.ActiveCfg = Debug|Win32 15 | {0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Debug|Win32.Build.0 = Debug|Win32 16 | {0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Debug|x64.ActiveCfg = Debug|x64 17 | {0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Debug|x64.Build.0 = Debug|x64 18 | {0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Release|Win32.ActiveCfg = Release|Win32 19 | {0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Release|Win32.Build.0 = Release|Win32 20 | {0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Release|x64.ActiveCfg = Release|x64 21 | {0CCCD3A3-DDA5-49F1-8B7F-C41376DA1FA1}.Release|x64.Build.0 = Release|x64 22 | EndGlobalSection 23 | GlobalSection(SolutionProperties) = preSolution 24 | HideSolutionNode = FALSE 25 | EndGlobalSection 26 | EndGlobal 27 | -------------------------------------------------------------------------------- /Plugins_JPSDR/AutoYUY2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * AutoYUY2() 3 | * 4 | * Adaptive YV12 upsampling. Progressive picture areas are upsampled 5 | * progressively and interlaced areas are upsampled interlaced. 6 | * Copyright (C) 2005 Donald A. Graft 7 | * Modified by JPSDR 8 | * 9 | * AutoYUY2 is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2, or (at your option) 12 | * any later version. 13 | * 14 | * AutoYUY2 is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * You should have received a copy of the GNU General Public License 20 | * along with GNU Make; see the file COPYING. If not, write to 21 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 22 | * 23 | */ 24 | 25 | #include "./avisynth.h" 26 | #include "./ThreadPoolInterface.h" 27 | 28 | #define AUTOYUY2_VERSION "AutoYUY2 4.1.10 JPSDR" 29 | // Inspired from Neuron2 filter 30 | 31 | #define Interlaced_Tab_Size 3 32 | 33 | #define myfree(ptr) if (ptr!=NULL) { free(ptr); ptr=NULL;} 34 | 35 | 36 | typedef struct _MT_Data_Info_AutoYUY2 37 | { 38 | void *src1,*src2,*src3; 39 | void *dst1,*dst2,*dst3; 40 | ptrdiff_t src_pitch1,src_pitch2,src_pitch3; 41 | ptrdiff_t dst_pitch1,dst_pitch2,dst_pitch3; 42 | int32_t src_Y_h_min,src_Y_h_max,src_Y_w; 43 | int32_t src_UV_h_min,src_UV_h_max,src_UV_w; 44 | int32_t dst_Y_h_min,dst_Y_h_max,dst_Y_w; 45 | int32_t dst_UV_h_min,dst_UV_h_max,dst_UV_w; 46 | bool top,bottom; 47 | } MT_Data_Info_AutoYUY2; 48 | 49 | 50 | class AutoYUY2 : public GenericVideoFilter 51 | { 52 | public: 53 | AutoYUY2(PClip _child, int _threshold, int _mode, int _output,uint8_t _threads, bool _sleep, bool negativePrefetch, IScriptEnvironment* env); 54 | virtual ~AutoYUY2(); 55 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 56 | 57 | int __stdcall SetCacheHints(int cachehints, int frame_range); 58 | 59 | private: 60 | int threshold; 61 | int mode; 62 | int output; 63 | bool sleep; 64 | uint16_t *lookup_Upscale8; 65 | uint32_t *lookup_Upscale16; 66 | bool *interlaced_tab_U[MAX_MT_THREADS][Interlaced_Tab_Size],*interlaced_tab_V[MAX_MT_THREADS][Interlaced_Tab_Size]; 67 | bool SSE2_Enable,AVX_Enable,AVX2_Enable,has_at_least_v8; 68 | 69 | bool grey,avsp,isRGBPfamily,isAlphaChannel; 70 | uint8_t pixelsize; // AVS16 71 | uint8_t bits_per_pixel; 72 | 73 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 74 | MT_Data_Info_AutoYUY2 MT_Data[MAX_MT_THREADS]; 75 | uint8_t threads,threads_number; 76 | uint32_t UserId; 77 | 78 | ThreadPoolFunction StaticThreadpoolF; 79 | 80 | static void StaticThreadpool(void *ptr); 81 | 82 | void FreeData(void); 83 | }; 84 | 85 | 86 | -------------------------------------------------------------------------------- /Plugins_JPSDR/AutoYUY2_AVX2_asm.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; AutoYUY2() 3 | ; 4 | ; Adaptive YV12 upsampling. Progressive picture areas are upsampled 5 | ; progressively and interlaced areas are upsampled interlaced. 6 | ; Copyright (C) 2005 Donald A. Graft 7 | ; ASM part made by JPSDR 8 | ; 9 | ; AutoYUY2 is free software; you can redistribute it and/or modify 10 | ; it under the terms of the GNU General Public License as published by 11 | ; the Free Software Foundation; either version 2, or (at your option) 12 | ; any later version. 13 | ; 14 | ; AutoYUY2 is distributed in the hope that it will be useful, 15 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | ; GNU General Public License for more details. 18 | ; 19 | ; You should have received a copy of the GNU General Public License 20 | ; along with GNU Make; see the file COPYING. If not, write to 21 | ; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 22 | ; 23 | 24 | .586 25 | .xmm 26 | .model flat,c 27 | 28 | .code 29 | 30 | 31 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword 32 | 33 | public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2 34 | 35 | push esi 36 | push edi 37 | push ebx 38 | 39 | vpcmpeqb ymm3,ymm3,ymm3 40 | 41 | mov edi,dst 42 | mov esi,src1 43 | mov edx,src2 44 | xor eax,eax 45 | mov ecx,w32 46 | mov ebx,32 47 | 48 | Convert_Planar420_to_Planar422_x3x1_8_AVX2_1: 49 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 50 | vmovdqa ymm1,YMMWORD ptr[edx+eax] 51 | vpxor ymm2,ymm0,ymm3 52 | vpxor ymm1,ymm1,ymm3 53 | vpavgb ymm2,ymm2,ymm1 54 | vpxor ymm2,ymm2,ymm3 55 | vpavgb ymm2,ymm2,ymm0 56 | 57 | vmovdqa YMMWORD ptr[edi+eax],ymm2 58 | add eax,ebx 59 | loop Convert_Planar420_to_Planar422_x3x1_8_AVX2_1 60 | 61 | vzeroupper 62 | 63 | pop ebx 64 | pop edi 65 | pop esi 66 | 67 | ret 68 | 69 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2 endp 70 | 71 | 72 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword 73 | 74 | public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2 75 | 76 | push esi 77 | push edi 78 | push ebx 79 | 80 | vpcmpeqb ymm3,ymm3,ymm3 81 | 82 | mov edi,dst 83 | mov esi,src1 84 | mov edx,src2 85 | xor eax,eax 86 | mov ecx,w16 87 | mov ebx,32 88 | 89 | Convert_Planar420_to_Planar422_x3x1_16_AVX2_1: 90 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 91 | vmovdqa ymm1,YMMWORD ptr[edx+eax] 92 | vpxor ymm2,ymm0,ymm3 93 | vpxor ymm1,ymm1,ymm3 94 | vpavgw ymm2,ymm2,ymm1 95 | vpxor ymm2,ymm2,ymm3 96 | vpavgw ymm2,ymm2,ymm0 97 | 98 | vmovdqa YMMWORD ptr[edi+eax],ymm2 99 | add eax,ebx 100 | loop Convert_Planar420_to_Planar422_x3x1_16_AVX2_1 101 | 102 | vzeroupper 103 | 104 | pop ebx 105 | pop edi 106 | pop esi 107 | 108 | ret 109 | 110 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2 endp 111 | 112 | 113 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword 114 | 115 | public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2 116 | 117 | push esi 118 | push edi 119 | push ebx 120 | 121 | vpcmpeqb ymm3,ymm3,ymm3 122 | 123 | mov edi,dst 124 | mov esi,src1 125 | mov edx,src2 126 | xor eax,eax 127 | mov ecx,w32 128 | mov ebx,32 129 | 130 | Convert_Planar420_to_Planar422_x3x5_8_AVX2_1: 131 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 132 | vmovdqa ymm1,YMMWORD ptr[edx+eax] 133 | vpxor ymm2,ymm0,ymm3 134 | vpxor ymm1,ymm1,ymm3 135 | vpavgb ymm2,ymm2,ymm1 136 | vpavgb ymm2,ymm2,ymm1 137 | vpxor ymm2,ymm2,ymm3 138 | vpavgb ymm2,ymm2,ymm0 139 | 140 | vmovdqa YMMWORD ptr[edi+eax],ymm2 141 | add eax,ebx 142 | loop Convert_Planar420_to_Planar422_x3x5_8_AVX2_1 143 | 144 | vzeroupper 145 | 146 | pop ebx 147 | pop edi 148 | pop esi 149 | 150 | ret 151 | 152 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2 endp 153 | 154 | 155 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword 156 | 157 | public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2 158 | 159 | push esi 160 | push edi 161 | push ebx 162 | 163 | vpcmpeqb ymm3,ymm3,ymm3 164 | 165 | mov edi,dst 166 | mov esi,src1 167 | mov edx,src2 168 | xor eax,eax 169 | mov ecx,w16 170 | mov ebx,32 171 | 172 | Convert_Planar420_to_Planar422_x3x5_16_AVX2_1: 173 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 174 | vmovdqa ymm1,YMMWORD ptr[edx+eax] 175 | vpxor ymm2,ymm0,ymm3 176 | vpxor ymm1,ymm1,ymm3 177 | vpavgw ymm2,ymm2,ymm1 178 | vpavgw ymm2,ymm2,ymm1 179 | vpxor ymm2,ymm2,ymm3 180 | vpavgw ymm2,ymm2,ymm0 181 | 182 | vmovdqa YMMWORD ptr[edi+eax],ymm2 183 | add eax,ebx 184 | loop Convert_Planar420_to_Planar422_x3x5_16_AVX2_1 185 | 186 | vzeroupper 187 | 188 | pop ebx 189 | pop edi 190 | pop esi 191 | 192 | ret 193 | 194 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2 endp 195 | 196 | 197 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword 198 | 199 | public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2 200 | 201 | push esi 202 | push edi 203 | push ebx 204 | 205 | vpcmpeqb ymm3,ymm3,ymm3 206 | 207 | mov edi,dst 208 | mov esi,src1 209 | mov edx,src2 210 | xor eax,eax 211 | mov ecx,w32 212 | mov ebx,32 213 | 214 | Convert_Planar420_to_Planar422_x7x1_8_AVX2_1: 215 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 216 | vmovdqa ymm1,YMMWORD ptr[edx+eax] 217 | vpxor ymm2,ymm0,ymm3 218 | vpxor ymm1,ymm1,ymm3 219 | vpavgb ymm1,ymm1,ymm2 220 | vpavgb ymm1,ymm1,ymm2 221 | vpxor ymm1,ymm1,ymm3 222 | vpavgb ymm1,ymm1,ymm0 223 | vmovdqa YMMWORD ptr[edi+eax],ymm1 224 | add eax,ebx 225 | loop Convert_Planar420_to_Planar422_x7x1_8_AVX2_1 226 | 227 | vzeroupper 228 | 229 | pop ebx 230 | pop edi 231 | pop esi 232 | 233 | ret 234 | 235 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2 endp 236 | 237 | 238 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword 239 | 240 | public JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2 241 | 242 | push esi 243 | push edi 244 | push ebx 245 | 246 | vpcmpeqb ymm3,ymm3,ymm3 247 | 248 | mov edi,dst 249 | mov esi,src1 250 | mov edx,src2 251 | xor eax,eax 252 | mov ecx,w16 253 | mov ebx,32 254 | 255 | Convert_Planar420_to_Planar422_x7x1_16_AVX2_1: 256 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 257 | vmovdqa ymm1,YMMWORD ptr[edx+eax] 258 | vpxor ymm2,ymm0,ymm3 259 | vpxor ymm1,ymm1,ymm3 260 | vpavgw ymm1,ymm1,ymm2 261 | vpavgw ymm1,ymm1,ymm2 262 | vpxor ymm1,ymm1,ymm3 263 | vpavgw ymm1,ymm1,ymm0 264 | vmovdqa YMMWORD ptr[edi+eax],ymm1 265 | add eax,ebx 266 | loop Convert_Planar420_to_Planar422_x7x1_16_AVX2_1 267 | 268 | vzeroupper 269 | 270 | pop ebx 271 | pop edi 272 | pop esi 273 | 274 | ret 275 | 276 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2 endp 277 | 278 | 279 | end 280 | 281 | 282 | 283 | 284 | 285 | -------------------------------------------------------------------------------- /Plugins_JPSDR/AutoYUY2_AVX2_asm_x64.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; AutoYUY2() 3 | ; 4 | ; Adaptive YV12 upsampling. Progressive picture areas are upsampled 5 | ; progressively and interlaced areas are upsampled interlaced. 6 | ; Copyright (C) 2005 Donald A. Graft 7 | ; ASM part made by JPSDR 8 | ; 9 | ; AutoYUY2 is free software; you can redistribute it and/or modify 10 | ; it under the terms of the GNU General Public License as published by 11 | ; the Free Software Foundation; either version 2, or (at your option) 12 | ; any later version. 13 | ; 14 | ; AutoYUY2 is distributed in the hope that it will be useful, 15 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | ; GNU General Public License for more details. 18 | ; 19 | ; You should have received a copy of the GNU General Public License 20 | ; along with GNU Make; see the file COPYING. If not, write to 21 | ; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 22 | ; 23 | 24 | .data 25 | 26 | .code 27 | 28 | 29 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword 30 | ; src1 = rcx 31 | ; src2 = rdx 32 | ; dst = r8 33 | ; w32 = r9d 34 | 35 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2 proc public frame 36 | 37 | .endprolog 38 | 39 | vpcmpeqb ymm3,ymm3,ymm3 40 | 41 | mov r10,rcx ; r10=src1 42 | xor rcx,rcx 43 | xor rax,rax 44 | mov ecx,r9d 45 | mov r11,32 46 | 47 | Convert_Planar420_to_Planar422_x3x1_8_AVX2_1: 48 | vmovdqa ymm0,YMMWORD ptr[r10+rax] 49 | vmovdqa ymm1,YMMWORD ptr[rdx+rax] 50 | vpxor ymm2,ymm0,ymm3 51 | vpxor ymm1,ymm1,ymm3 52 | vpavgb ymm2,ymm2,ymm1 53 | vpxor ymm2,ymm2,ymm3 54 | vpavgb ymm2,ymm2,ymm0 55 | 56 | vmovdqa YMMWORD ptr[r8+rax],ymm2 57 | add rax,r11 58 | loop Convert_Planar420_to_Planar422_x3x1_8_AVX2_1 59 | 60 | vzeroupper 61 | 62 | ret 63 | 64 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_8_AVX2 endp 65 | 66 | 67 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword 68 | ; src1 = rcx 69 | ; src2 = rdx 70 | ; dst = r8 71 | ; w16 = r9d 72 | 73 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2 proc public frame 74 | 75 | .endprolog 76 | 77 | vpcmpeqb ymm3,ymm3,ymm3 78 | 79 | mov r10,rcx ; r10=src1 80 | xor rcx,rcx 81 | xor rax,rax 82 | mov ecx,r9d 83 | mov r11,32 84 | 85 | Convert_Planar420_to_Planar422_x3x1_16_AVX2_1: 86 | vmovdqa ymm0,YMMWORD ptr[r10+rax] 87 | vmovdqa ymm1,YMMWORD ptr[rdx+rax] 88 | vpxor ymm2,ymm0,ymm3 89 | vpxor ymm1,ymm1,ymm3 90 | vpavgw ymm2,ymm2,ymm1 91 | vpxor ymm2,ymm2,ymm3 92 | vpavgw ymm2,ymm2,ymm0 93 | 94 | vmovdqa YMMWORD ptr[r8+rax],ymm2 95 | add rax,r11 96 | loop Convert_Planar420_to_Planar422_x3x1_16_AVX2_1 97 | 98 | vzeroupper 99 | 100 | ret 101 | 102 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x1_16_AVX2 endp 103 | 104 | 105 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword 106 | ; src1 = rcx 107 | ; src2 = rdx 108 | ; dst = r8 109 | ; w32 = r9d 110 | 111 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2 proc public frame 112 | 113 | .endprolog 114 | 115 | vpcmpeqb ymm3,ymm3,ymm3 116 | 117 | mov r10,rcx ; r10=src1 118 | xor rcx,rcx 119 | xor rax,rax 120 | mov ecx,r9d 121 | mov r11,32 122 | 123 | Convert_Planar420_to_Planar422_x3x5_8_AVX2_1: 124 | vmovdqa ymm0,YMMWORD ptr[r10+rax] 125 | vmovdqa ymm1,YMMWORD ptr[rdx+rax] 126 | vpxor ymm2,ymm0,ymm3 127 | vpxor ymm1,ymm1,ymm3 128 | vpavgb ymm2,ymm2,ymm1 129 | vpavgb ymm2,ymm2,ymm1 130 | vpxor ymm2,ymm2,ymm3 131 | vpavgb ymm2,ymm2,ymm0 132 | 133 | vmovdqa YMMWORD ptr[r8+rax],ymm2 134 | add rax,r11 135 | loop Convert_Planar420_to_Planar422_x3x5_8_AVX2_1 136 | 137 | vzeroupper 138 | 139 | ret 140 | 141 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_8_AVX2 endp 142 | 143 | 144 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword 145 | ; src1 = rcx 146 | ; src2 = rdx 147 | ; dst = r8 148 | ; w16 = r9d 149 | 150 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2 proc public frame 151 | 152 | .endprolog 153 | 154 | vpcmpeqb ymm3,ymm3,ymm3 155 | 156 | mov r10,rcx ; r10=src1 157 | xor rcx,rcx 158 | xor rax,rax 159 | mov ecx,r9d 160 | mov r11,32 161 | 162 | Convert_Planar420_to_Planar422_x3x5_16_AVX2_1: 163 | vmovdqa ymm0,YMMWORD ptr[r10+rax] 164 | vmovdqa ymm1,YMMWORD ptr[rdx+rax] 165 | vpxor ymm2,ymm0,ymm3 166 | vpxor ymm1,ymm1,ymm3 167 | vpavgw ymm2,ymm2,ymm1 168 | vpavgw ymm2,ymm2,ymm1 169 | vpxor ymm2,ymm2,ymm3 170 | vpavgw ymm2,ymm2,ymm0 171 | 172 | vmovdqa YMMWORD ptr[r8+rax],ymm2 173 | add rax,r11 174 | loop Convert_Planar420_to_Planar422_x3x5_16_AVX2_1 175 | 176 | vzeroupper 177 | 178 | ret 179 | 180 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x3x5_16_AVX2 endp 181 | 182 | 183 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword 184 | ; src1 = rcx 185 | ; src2 = rdx 186 | ; dst = r8 187 | ; w32 = r9d 188 | 189 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2 proc public frame 190 | 191 | .endprolog 192 | 193 | vpcmpeqb ymm3,ymm3,ymm3 194 | 195 | mov r10,rcx ; r10=src1 196 | xor rcx,rcx 197 | xor rax,rax 198 | mov ecx,r9d 199 | mov r11,32 200 | 201 | Convert_Planar420_to_Planar422_x7x1_8_AVX2_1: 202 | vmovdqa ymm0,YMMWORD ptr[r10+rax] 203 | vmovdqa ymm1,YMMWORD ptr[rdx+rax] 204 | vpxor ymm2,ymm0,ymm3 205 | vpxor ymm1,ymm1,ymm3 206 | vpavgb ymm1,ymm1,ymm2 207 | vpavgb ymm1,ymm1,ymm2 208 | vpxor ymm1,ymm1,ymm3 209 | vpavgb ymm1,ymm1,ymm0 210 | vmovdqa YMMWORD ptr[r8+rax],ymm1 211 | add rax,r11 212 | loop Convert_Planar420_to_Planar422_x7x1_8_AVX2_1 213 | 214 | vzeroupper 215 | 216 | ret 217 | 218 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_8_AVX2 endp 219 | 220 | 221 | ;JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword 222 | ; src1 = rcx 223 | ; src2 = rdx 224 | ; dst = r8 225 | ; w16 = r9d 226 | 227 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2 proc public frame 228 | 229 | .endprolog 230 | 231 | vpcmpeqb ymm3,ymm3,ymm3 232 | 233 | mov r10,rcx ; r10=src1 234 | xor rcx,rcx 235 | xor rax,rax 236 | mov ecx,r9d 237 | mov r11,32 238 | 239 | Convert_Planar420_to_Planar422_x7x1_16_AVX2_1: 240 | vmovdqa ymm0,YMMWORD ptr[r10+rax] 241 | vmovdqa ymm1,YMMWORD ptr[rdx+rax] 242 | vpxor ymm2,ymm0,ymm3 243 | vpxor ymm1,ymm1,ymm3 244 | vpavgw ymm1,ymm1,ymm2 245 | vpavgw ymm1,ymm1,ymm2 246 | vpxor ymm1,ymm1,ymm3 247 | vpavgw ymm1,ymm1,ymm0 248 | vmovdqa YMMWORD ptr[r8+rax],ymm1 249 | add rax,r11 250 | loop Convert_Planar420_to_Planar422_x7x1_16_AVX2_1 251 | 252 | vzeroupper 253 | 254 | ret 255 | 256 | JPSDR_AutoYUY2_Convert_Planar420_to_Planar422_x7x1_16_AVX2 endp 257 | 258 | 259 | end 260 | -------------------------------------------------------------------------------- /Plugins_JPSDR/HDRTools_AVX2_asm.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; HDRTools() 3 | ; 4 | ; Several functions for working on HDR data, and linear to non-linear convertions. 5 | ; Copyright (C) 2018 JPSDR 6 | ; 7 | ; HDRTools is free software; you can redistribute it and/or modify 8 | ; it under the terms of the GNU General Public License as published by 9 | ; the Free Software Foundation; either version 2, or (at your option) 10 | ; any later version. 11 | ; 12 | ; HDRTools is distributed in the hope that it will be useful, 13 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | ; GNU General Public License for more details. 16 | ; 17 | ; You should have received a copy of the GNU General Public License 18 | ; along with GNU Make; see the file COPYING. If not, write to 19 | ; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | ; 21 | ; 22 | 23 | .586 24 | .xmm 25 | .model flat,c 26 | 27 | .data 28 | 29 | align 16 30 | 31 | data segment align(32) 32 | 33 | data_f_1048575 real4 8 dup(1048575.0) 34 | data_f_65535 real4 8 dup(65535.0) 35 | data_dw_1048575 dword 8 dup(1048575) 36 | data_dw_65535 dword 8 dup(65535) 37 | data_dw_0 dword 8 dup(0) 38 | 39 | data_w_128 word 16 dup(128) 40 | data_w_32 word 16 dup(32) 41 | data_w_8 word 16 dup(8) 42 | 43 | .code 44 | 45 | 46 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword 47 | 48 | public JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 49 | 50 | push esi 51 | push edi 52 | push ebx 53 | 54 | vpcmpeqb ymm3,ymm3,ymm3 55 | 56 | mov edi,dst 57 | mov esi,src1 58 | mov edx,src2 59 | xor eax,eax 60 | mov ecx,w 61 | mov ebx,32 62 | 63 | Convert_Planar420_to_Planar422_8_AVX2_1: 64 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 65 | vmovdqa ymm1,YMMWORD ptr[edx+eax] 66 | vpxor ymm2,ymm0,ymm3 67 | vpxor ymm1,ymm1,ymm3 68 | vpavgb ymm2,ymm2,ymm1 69 | vpxor ymm2,ymm2,ymm3 70 | vpavgb ymm2,ymm2,ymm0 71 | 72 | vmovdqa YMMWORD ptr[edi+eax],ymm2 73 | add eax,ebx 74 | loop Convert_Planar420_to_Planar422_8_AVX2_1 75 | 76 | vzeroupper 77 | 78 | pop ebx 79 | pop edi 80 | pop esi 81 | 82 | ret 83 | 84 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 endp 85 | 86 | 87 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword 88 | 89 | public JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 90 | 91 | push esi 92 | push edi 93 | push ebx 94 | 95 | vpcmpeqb ymm3,ymm3,ymm3 96 | 97 | mov edi,dst 98 | mov esi,src1 99 | mov edx,src2 100 | xor eax,eax 101 | mov ecx,w 102 | mov ebx,32 103 | 104 | Convert_Planar420_to_Planar422_16_AVX2_1: 105 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 106 | vmovdqa ymm1,YMMWORD ptr[edx+eax] 107 | vpxor ymm2,ymm0,ymm3 108 | vpxor ymm1,ymm1,ymm3 109 | vpavgw ymm2,ymm2,ymm1 110 | vpxor ymm2,ymm2,ymm3 111 | vpavgw ymm2,ymm2,ymm0 112 | 113 | vmovdqa YMMWORD ptr[edi+eax],ymm2 114 | add eax,ebx 115 | loop Convert_Planar420_to_Planar422_16_AVX2_1 116 | 117 | vzeroupper 118 | 119 | pop ebx 120 | pop edi 121 | pop esi 122 | 123 | ret 124 | 125 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 endp 126 | 127 | 128 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword,h:dword,src_pitch2:dword,dst_pitch:dword 129 | 130 | public JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 131 | 132 | push esi 133 | push edi 134 | push ebx 135 | 136 | mov edi,dst 137 | mov esi,src1 138 | mov edx,src2 139 | mov ebx,32 140 | 141 | Convert_Planar422_to_Planar420_8_AVX2_1: 142 | xor eax,eax 143 | mov ecx,w32 144 | 145 | Convert_Planar422_to_Planar420_8_AVX2_2: 146 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 147 | vpavgb ymm0,ymm0,YMMWORD ptr[edx+eax] 148 | 149 | vmovdqa YMMWORD ptr[edi+eax],ymm0 150 | add eax,ebx 151 | loop Convert_Planar422_to_Planar420_8_AVX2_2 152 | 153 | add esi,src_pitch2 154 | add edx,src_pitch2 155 | add edi,dst_pitch 156 | dec h 157 | jnz short Convert_Planar422_to_Planar420_8_AVX2_1 158 | 159 | vzeroupper 160 | 161 | pop ebx 162 | pop edi 163 | pop esi 164 | 165 | ret 166 | 167 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 endp 168 | 169 | 170 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword,h:dword,src_pitch2:dword,dst_pitch:dword 171 | 172 | public JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 173 | 174 | push esi 175 | push edi 176 | push ebx 177 | 178 | mov edi,dst 179 | mov esi,src1 180 | mov edx,src2 181 | mov ebx,32 182 | 183 | Convert_Planar422_to_Planar420_16_AVX2_1: 184 | xor eax,eax 185 | mov ecx,w16 186 | 187 | Convert_Planar422_to_Planar420_16_AVX2_2: 188 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 189 | vpavgw ymm0,ymm0,YMMWORD ptr[edx+eax] 190 | 191 | vmovdqa YMMWORD ptr[edi+eax],ymm0 192 | add eax,ebx 193 | loop Convert_Planar422_to_Planar420_16_AVX2_2 194 | 195 | add esi,src_pitch2 196 | add edx,src_pitch2 197 | add edi,dst_pitch 198 | dec h 199 | jnz short Convert_Planar422_to_Planar420_16_AVX2_1 200 | 201 | vzeroupper 202 | 203 | pop ebx 204 | pop edi 205 | pop esi 206 | 207 | ret 208 | 209 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 endp 210 | 211 | 212 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword, 213 | ValMin:dword,Coeff:dword 214 | 215 | public JPSDR_HDRTools_Scale_20_XYZ_AVX2 216 | 217 | push esi 218 | push edi 219 | push ebx 220 | 221 | mov esi,ValMin 222 | vmovss xmm1,dword ptr[esi] 223 | vshufps xmm1,xmm1,xmm1,0 224 | vinsertf128 ymm1,ymm1,xmm1,1 225 | mov esi,Coeff 226 | vmovss xmm2,dword ptr[esi] 227 | vshufps xmm2,xmm2,xmm2,0 228 | vinsertf128 ymm2,ymm2,xmm2,1 229 | 230 | vmovdqa ymm3,YMMWORD ptr data_dw_1048575 231 | vmovdqa ymm4,YMMWORD ptr data_dw_0 232 | vmulps ymm2,ymm2,YMMWORD ptr data_f_1048575 233 | 234 | mov esi,src 235 | mov edi,dst 236 | mov ebx,w8 237 | mov edx,32 238 | 239 | Scale_20_XYZ_AVX2_1: 240 | xor eax,eax 241 | mov ecx,ebx 242 | Scale_20_XYZ_AVX2_2: 243 | vaddps ymm0,ymm1,YMMWORD ptr[esi+eax] 244 | vmulps ymm0,ymm0,ymm2 245 | vcvtps2dq ymm0,ymm0 246 | vpminsd ymm0,ymm0,ymm3 247 | vpmaxsd ymm0,ymm0,ymm4 248 | vmovdqa YMMWORD ptr[edi+eax],ymm0 249 | 250 | add eax,edx 251 | loop Scale_20_XYZ_AVX2_2 252 | 253 | add esi,src_pitch 254 | add edi,dst_pitch 255 | dec h 256 | jnz short Scale_20_XYZ_AVX2_1 257 | 258 | vzeroupper 259 | 260 | pop ebx 261 | pop edi 262 | pop esi 263 | 264 | ret 265 | 266 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 endp 267 | 268 | 269 | JPSDR_HDRTools_Scale_20_RGB_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword 270 | 271 | public JPSDR_HDRTools_Scale_20_RGB_AVX2 272 | 273 | push esi 274 | push edi 275 | push ebx 276 | 277 | vmovaps ymm1,YMMWORD ptr data_f_1048575 278 | vmovdqa ymm2,YMMWORD ptr data_dw_1048575 279 | vmovdqa ymm3,YMMWORD ptr data_dw_0 280 | 281 | mov esi,src 282 | mov edi,dst 283 | mov ebx,w8 284 | mov edx,32 285 | 286 | Scale_20_RGB_AVX2_1: 287 | xor eax,eax 288 | mov ecx,ebx 289 | Scale_20_RGB_AVX2_2: 290 | vmulps ymm0,ymm1,YMMWORD ptr[esi+eax] 291 | vcvtps2dq ymm0,ymm0 292 | vpminsd ymm0,ymm0,ymm2 293 | vpmaxsd ymm0,ymm0,ymm3 294 | vmovdqa YMMWORD ptr[edi+eax],ymm0 295 | 296 | add eax,edx 297 | loop Scale_20_RGB_AVX2_2 298 | 299 | add esi,src_pitch 300 | add edi,dst_pitch 301 | dec h 302 | jnz short Scale_20_RGB_AVX2_1 303 | 304 | vzeroupper 305 | 306 | pop ebx 307 | pop edi 308 | pop esi 309 | 310 | ret 311 | 312 | JPSDR_HDRTools_Scale_20_RGB_AVX2 endp 313 | 314 | 315 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 316 | src_pitch:dword,dst_pitch:dword 317 | 318 | public JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 319 | 320 | push esi 321 | push edi 322 | push ebx 323 | 324 | vmovdqa ymm1,YMMWORD ptr data_w_128 325 | 326 | mov esi,src 327 | mov edi,dst 328 | mov ebx,w 329 | shr ebx,2 330 | mov edx,32 331 | 332 | Convert_RGB64_16toRGB64_8_AVX2_1: 333 | mov ecx,ebx 334 | xor eax,eax 335 | or ecx,ecx 336 | jz short Convert_RGB64_16toRGB64_8_AVX2_3 337 | 338 | Convert_RGB64_16toRGB64_8_AVX2_2: 339 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 340 | vpaddusw ymm0,ymm0,ymm1 341 | vpsrlw ymm0,ymm0,8 342 | vmovdqa YMMWORD ptr[edi+eax],ymm0 343 | add eax,edx 344 | loop Convert_RGB64_16toRGB64_8_AVX2_2 345 | 346 | Convert_RGB64_16toRGB64_8_AVX2_3: 347 | test w,2 348 | jz short Convert_RGB64_16toRGB64_8_AVX2_4 349 | 350 | vmovdqa xmm0,XMMWORD ptr[esi+eax] 351 | vpaddusw xmm0,xmm0,xmm1 352 | vpsrlw xmm0,xmm0,8 353 | vmovdqa XMMWORD ptr[edi+eax],xmm0 354 | 355 | add eax,16 356 | 357 | Convert_RGB64_16toRGB64_8_AVX2_4: 358 | test w,1 359 | jz short Convert_RGB64_16toRGB64_8_AVX2_5 360 | 361 | vmovq xmm0,qword ptr[esi+eax] 362 | vpaddusw xmm0,xmm0,xmm1 363 | vpsrlw xmm0,xmm0,8 364 | vmovq qword ptr[edi+eax],xmm0 365 | 366 | Convert_RGB64_16toRGB64_8_AVX2_5: 367 | add esi,src_pitch 368 | add edi,dst_pitch 369 | dec h 370 | jnz short Convert_RGB64_16toRGB64_8_AVX2_1 371 | 372 | pop ebx 373 | pop edi 374 | pop esi 375 | 376 | ret 377 | 378 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 endp 379 | 380 | 381 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 382 | src_pitch:dword,dst_pitch:dword 383 | 384 | public JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 385 | 386 | push esi 387 | push edi 388 | push ebx 389 | 390 | vmovdqa ymm1,YMMWORD ptr data_w_32 391 | 392 | mov esi,src 393 | mov edi,dst 394 | mov ebx,w 395 | shr ebx,2 396 | mov edx,32 397 | 398 | Convert_RGB64_16toRGB64_10_AVX2_1: 399 | mov ecx,ebx 400 | xor eax,eax 401 | or ecx,ecx 402 | jz short Convert_RGB64_16toRGB64_10_AVX2_3 403 | 404 | Convert_RGB64_16toRGB64_10_AVX2_2: 405 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 406 | vpaddusw ymm0,ymm0,ymm1 407 | vpsrlw ymm0,ymm0,6 408 | vmovdqa YMMWORD ptr[edi+eax],ymm0 409 | add eax,edx 410 | loop Convert_RGB64_16toRGB64_10_AVX2_2 411 | 412 | Convert_RGB64_16toRGB64_10_AVX2_3: 413 | test w,2 414 | jz short Convert_RGB64_16toRGB64_10_AVX2_4 415 | 416 | vmovdqa xmm0,XMMWORD ptr[esi+eax] 417 | vpaddusw xmm0,xmm0,xmm1 418 | vpsrlw xmm0,xmm0,6 419 | vmovdqa XMMWORD ptr[edi+eax],xmm0 420 | 421 | add eax,16 422 | 423 | Convert_RGB64_16toRGB64_10_AVX2_4: 424 | test w,1 425 | jz short Convert_RGB64_16toRGB64_10_AVX2_5 426 | 427 | vmovq xmm0,qword ptr[esi+eax] 428 | vpaddusw xmm0,xmm0,xmm1 429 | vpsrlw xmm0,xmm0,6 430 | vmovq qword ptr[edi+eax],xmm0 431 | 432 | Convert_RGB64_16toRGB64_10_AVX2_5: 433 | add esi,src_pitch 434 | add edi,dst_pitch 435 | dec h 436 | jnz short Convert_RGB64_16toRGB64_10_AVX2_1 437 | 438 | pop ebx 439 | pop edi 440 | pop esi 441 | 442 | ret 443 | 444 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 endp 445 | 446 | 447 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 448 | src_pitch:dword,dst_pitch:dword 449 | 450 | public JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 451 | 452 | push esi 453 | push edi 454 | push ebx 455 | 456 | vmovdqa ymm1,YMMWORD ptr data_w_8 457 | 458 | mov esi,src 459 | mov edi,dst 460 | mov ebx,w 461 | shr ebx,2 462 | mov edx,32 463 | 464 | Convert_RGB64_16toRGB64_12_AVX2_1: 465 | mov ecx,ebx 466 | xor eax,eax 467 | or ecx,ecx 468 | jz short Convert_RGB64_16toRGB64_12_AVX2_3 469 | 470 | Convert_RGB64_16toRGB64_12_AVX2_2: 471 | vmovdqa ymm0,YMMWORD ptr[esi+eax] 472 | vpaddusw ymm0,ymm0,ymm1 473 | vpsrlw ymm0,ymm0,4 474 | vmovdqa YMMWORD ptr[edi+eax],ymm0 475 | add eax,edx 476 | loop Convert_RGB64_16toRGB64_12_AVX2_2 477 | 478 | Convert_RGB64_16toRGB64_12_AVX2_3: 479 | test w,2 480 | jz short Convert_RGB64_16toRGB64_12_AVX2_4 481 | 482 | vmovdqa xmm0,XMMWORD ptr[esi+eax] 483 | vpaddusw xmm0,xmm0,xmm1 484 | vpsrlw xmm0,xmm0,4 485 | vmovdqa XMMWORD ptr[edi+eax],xmm0 486 | 487 | add eax,16 488 | 489 | Convert_RGB64_16toRGB64_12_AVX2_4: 490 | test w,1 491 | jz short Convert_RGB64_16toRGB64_12_AVX2_5 492 | 493 | vmovq xmm0,qword ptr[esi+eax] 494 | vpaddusw xmm0,xmm0,xmm1 495 | vpsrlw xmm0,xmm0,4 496 | vmovq qword ptr[edi+eax],xmm0 497 | 498 | Convert_RGB64_16toRGB64_12_AVX2_5: 499 | add esi,src_pitch 500 | add edi,dst_pitch 501 | dec h 502 | jnz short Convert_RGB64_16toRGB64_12_AVX2_1 503 | 504 | pop ebx 505 | pop edi 506 | pop esi 507 | 508 | ret 509 | 510 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 endp 511 | 512 | 513 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc dst:dword,srcY:dword,w:dword,h:dword,dst_pitch:dword,src_pitchY:dword 514 | 515 | public JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 516 | 517 | push esi 518 | push edi 519 | push ebx 520 | 521 | mov ebx,w 522 | shr ebx,1 523 | mov esi,srcY 524 | mov edi,dst 525 | mov edx,8 526 | pxor xmm4,xmm4 527 | 528 | Convert_16_RGB64_HLG_OOTF_AVX2_1: 529 | mov ecx,ebx 530 | xor eax,eax 531 | or ecx,ecx 532 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_3 533 | 534 | Convert_16_RGB64_HLG_OOTF_AVX2_2: 535 | vmovss xmm0,dword ptr[esi+eax] 536 | vmovss xmm1,dword ptr[esi+eax+4] 537 | vshufps xmm0,xmm0,xmm0,0 538 | vshufps xmm1,xmm1,xmm1,0 539 | vmovdqa xmm2,XMMWORD ptr[edi+2*eax] 540 | vinsertf128 ymm0,ymm0,xmm1,1 541 | vpunpckhwd xmm3,xmm2,xmm4 542 | vpunpcklwd xmm2,xmm2,xmm4 543 | vinserti128 ymm2,ymm2,xmm3,1 544 | vcvtdq2ps ymm2,ymm2 545 | vmulps ymm2,ymm2,ymm0 546 | vcvtps2dq ymm2,ymm2 547 | vextracti128 xmm3,ymm2,1 548 | vpackusdw xmm2,xmm2,xmm3 549 | vmovdqa XMMWORD ptr[edi+2*eax],xmm2 550 | 551 | add eax,edx 552 | loop Convert_16_RGB64_HLG_OOTF_AVX2_2 553 | 554 | Convert_16_RGB64_HLG_OOTF_AVX2_3: 555 | test w,1 556 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_4 557 | 558 | vmovss xmm0,dword ptr[esi+eax] 559 | vshufps xmm0,xmm0,xmm0,0 560 | vmovq xmm2,qword ptr[edi+2*eax] 561 | vpunpcklwd xmm2,xmm2,xmm4 562 | vcvtdq2ps xmm2,xmm2 563 | vmulps xmm2,xmm2,xmm0 564 | vcvtps2dq xmm2,xmm2 565 | vpackusdw xmm2,xmm2,xmm2 566 | vmovq qword ptr[edi+2*eax],xmm2 567 | 568 | Convert_16_RGB64_HLG_OOTF_AVX2_4: 569 | add edi,dst_pitch 570 | add esi,src_pitchY 571 | dec h 572 | jnz Convert_16_RGB64_HLG_OOTF_AVX2_1 573 | 574 | vzeroupper 575 | 576 | pop ebx 577 | pop edi 578 | pop esi 579 | 580 | ret 581 | 582 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 endp 583 | 584 | 585 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc src:dword,dst1:dword,dst2:dword,w8:dword,h:dword,src_pitch:dword, 586 | dst_pitch1:dword,dst_pitch2:dword,ValMinX:dword,CoeffX:dword,ValMinZ:dword,CoeffZ:dword 587 | 588 | public JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 589 | 590 | push esi 591 | push edi 592 | push ebx 593 | 594 | mov esi,ValMinX 595 | vmovss xmm2,dword ptr[esi] 596 | vshufps xmm2,xmm2,xmm2,0 597 | vinsertf128 ymm1,ymm1,xmm1,1 598 | mov esi,CoeffX 599 | vmovss xmm3,dword ptr[esi] 600 | vshufps xmm3,xmm3,xmm3,0 601 | vinsertf128 ymm3,ymm3,xmm3,1 602 | 603 | mov esi,ValMinZ 604 | vmovss xmm4,dword ptr[esi] 605 | vshufps xmm4,xmm4,xmm4,0 606 | vinsertf128 ymm4,ymm4,xmm4,1 607 | mov esi,CoeffZ 608 | vmovss xmm5,dword ptr[esi] 609 | vshufps xmm5,xmm5,xmm5,0 610 | vinsertf128 ymm5,ymm5,xmm5,1 611 | 612 | vmovdqa ymm6,YMMWORD ptr data_dw_65535 613 | vmovdqa ymm7,YMMWORD ptr data_dw_0 614 | vmulps ymm3,ymm3,YMMWORD ptr data_f_65535 615 | vmulps ymm5,ymm5,YMMWORD ptr data_f_65535 616 | 617 | mov esi,src 618 | mov edi,dst1 619 | mov edx,dst2 620 | mov ebx,32 621 | 622 | BT2446C_16_XYZ_AVX2_1: 623 | xor eax,eax 624 | mov ecx,w8 625 | BT2446C_16_XYZ_AVX2_2: 626 | vmovaps ymm0,YMMWORD ptr[edi+eax] 627 | vmovaps ymm1,YMMWORD ptr[edx+eax] 628 | vmulps ymm0,ymm0,YMMWORD ptr[esi+eax] 629 | vmulps ymm1,ymm1,YMMWORD ptr[esi+eax] 630 | vaddps ymm0,ymm0,ymm2 631 | vaddps ymm1,ymm1,ymm4 632 | vmulps ymm0,ymm0,ymm3 633 | vmulps ymm1,ymm1,ymm5 634 | vcvtps2dq ymm0,ymm0 635 | vcvtps2dq ymm1,ymm1 636 | vpminsd ymm0,ymm0,ymm6 637 | vpminsd ymm1,ymm1,ymm6 638 | vpmaxsd ymm0,ymm0,ymm7 639 | vpmaxsd ymm1,ymm1,ymm7 640 | vmovdqa YMMWORD ptr[edi+eax],ymm0 641 | vmovdqa YMMWORD ptr[edx+eax],ymm1 642 | 643 | add eax,ebx 644 | loop BT2446C_16_XYZ_AVX2_2 645 | 646 | add esi,src_pitch 647 | add edi,dst_pitch1 648 | add edx,dst_pitch2 649 | dec h 650 | jnz short BT2446C_16_XYZ_AVX2_1 651 | 652 | vzeroupper 653 | 654 | pop ebx 655 | pop edi 656 | pop esi 657 | 658 | ret 659 | 660 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 endp 661 | 662 | 663 | end 664 | 665 | 666 | 667 | 668 | 669 | -------------------------------------------------------------------------------- /Plugins_JPSDR/HDRTools_AVX2_asm_x64.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; HDRTools() 3 | ; 4 | ; Several functions for working on HDR data, and linear to non-linear convertions. 5 | ; Copyright (C) 2018 JPSDR 6 | ; 7 | ; HDRTools is free software; you can redistribute it and/or modify 8 | ; it under the terms of the GNU General Public License as published by 9 | ; the Free Software Foundation; either version 2, or (at your option) 10 | ; any later version. 11 | ; 12 | ; HDRTools is distributed in the hope that it will be useful, 13 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | ; GNU General Public License for more details. 16 | ; 17 | ; You should have received a copy of the GNU General Public License 18 | ; along with GNU Make; see the file COPYING. If not, write to 19 | ; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | ; 21 | ; 22 | 23 | .data 24 | 25 | align 16 26 | 27 | data segment align(32) 28 | 29 | data_f_1048575 real4 8 dup(1048575.0) 30 | data_f_65535 real4 8 dup(65535.0) 31 | data_dw_1048575 dword 8 dup(1048575) 32 | data_dw_65535 dword 8 dup(65535) 33 | data_dw_0 dword 8 dup(0) 34 | 35 | data_w_128 word 16 dup(128) 36 | data_w_32 word 16 dup(32) 37 | data_w_8 word 16 dup(8) 38 | 39 | .code 40 | 41 | 42 | ;JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword 43 | ; src1 = rcx 44 | ; src2 = rdx 45 | ; dst = r8 46 | ; w = r9d 47 | 48 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 proc public frame 49 | 50 | .endprolog 51 | 52 | vpcmpeqb ymm3,ymm3,ymm3 53 | 54 | mov r10,rcx ; r10=src1 55 | xor rcx,rcx 56 | xor rax,rax 57 | mov ecx,r9d 58 | mov r11,32 59 | 60 | Convert_Planar420_to_Planar422_8_AVX2_1: 61 | vmovdqa ymm0,YMMWORD ptr[r10+rax] 62 | vmovdqa ymm1,YMMWORD ptr[rdx+rax] 63 | vpxor ymm2,ymm0,ymm3 64 | vpxor ymm1,ymm1,ymm3 65 | vpavgb ymm2,ymm2,ymm1 66 | vpxor ymm2,ymm2,ymm3 67 | vpavgb ymm2,ymm2,ymm0 68 | 69 | vmovdqa YMMWORD ptr[r8+rax],ymm2 70 | add rax,r11 71 | loop Convert_Planar420_to_Planar422_8_AVX2_1 72 | 73 | vzeroupper 74 | 75 | ret 76 | 77 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_8_AVX2 endp 78 | 79 | 80 | ;JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc src1:dword,src2:dword,dst:dword,w:dword 81 | ; src1 = rcx 82 | ; src2 = rdx 83 | ; dst = r8 84 | ; w = r9d 85 | 86 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 proc public frame 87 | 88 | .endprolog 89 | 90 | vpcmpeqb ymm3,ymm3,ymm3 91 | 92 | mov r10,rcx ; r10=src1 93 | xor rcx,rcx 94 | xor rax,rax 95 | mov ecx,r9d 96 | mov r11,32 97 | 98 | Convert_Planar420_to_Planar422_16_AVX2_1: 99 | vmovdqa ymm0,YMMWORD ptr[r10+rax] 100 | vmovdqa ymm1,YMMWORD ptr[rdx+rax] 101 | vpxor ymm2,ymm0,ymm3 102 | vpxor ymm1,ymm1,ymm3 103 | vpavgw ymm2,ymm2,ymm1 104 | vpxor ymm2,ymm2,ymm3 105 | vpavgw ymm2,ymm2,ymm0 106 | 107 | vmovdqa YMMWORD ptr[r8+rax],ymm2 108 | add rax,r11 109 | loop Convert_Planar420_to_Planar422_16_AVX2_1 110 | 111 | vzeroupper 112 | 113 | ret 114 | 115 | JPSDR_HDRTools_Convert_Planar420_to_Planar422_16_AVX2 endp 116 | 117 | 118 | ;JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc src1:dword,src2:dword,dst:dword,w32:dword,h:dword,src_pitch2:dword,dst_pitch:dword 119 | ; src1 = rcx 120 | ; src2 = rdx 121 | ; dst = r8 122 | ; w32 = r9d 123 | 124 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 proc public frame 125 | 126 | h equ dword ptr[rbp+48] 127 | src_pitch2 equ qword ptr[rbp+56] 128 | dst_pitch equ qword ptr[rbp+64] 129 | 130 | push rbp 131 | .pushreg rbp 132 | mov rbp,rsp 133 | push rsi 134 | .pushreg rsi 135 | push rbx 136 | .pushreg rbx 137 | push r12 138 | .pushreg r12 139 | .endprolog 140 | 141 | mov rsi,rcx 142 | mov r10d,h 143 | mov rbx,32 144 | mov r11,src_pitch2 145 | mov r12,dst_pitch 146 | xor rcx,rcx 147 | 148 | Convert_Planar422_to_Planar420_8_AVX2_1: 149 | xor rax,rax 150 | mov ecx,r9d 151 | 152 | Convert_Planar422_to_Planar420_8_AVX2_2: 153 | vmovdqa ymm0,YMMWORD ptr[rsi+rax] 154 | vpavgb ymm0,ymm0,YMMWORD ptr[rdx+rax] 155 | 156 | vmovdqa YMMWORD ptr[r8+rax],ymm0 157 | add rax,rbx 158 | loop Convert_Planar422_to_Planar420_8_AVX2_2 159 | 160 | add rsi,r11 161 | add rdx,r11 162 | add r8,r12 163 | dec r10d 164 | jnz short Convert_Planar422_to_Planar420_8_AVX2_1 165 | 166 | vzeroupper 167 | 168 | pop r12 169 | pop rbx 170 | pop rsi 171 | pop rbp 172 | 173 | ret 174 | 175 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_8_AVX2 endp 176 | 177 | 178 | ;JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc src1:dword,src2:dword,dst:dword,w16:dword,h:dword,src_pitch2:dword,dst_pitch:dword 179 | ; src1 = rcx 180 | ; src2 = rdx 181 | ; dst = r8 182 | ; w16 = r9d 183 | 184 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 proc public frame 185 | 186 | h equ dword ptr[rbp+48] 187 | src_pitch2 equ qword ptr[rbp+56] 188 | dst_pitch equ qword ptr[rbp+64] 189 | 190 | push rbp 191 | .pushreg rbp 192 | mov rbp,rsp 193 | push rsi 194 | .pushreg rsi 195 | push rbx 196 | .pushreg rbx 197 | push r12 198 | .pushreg r12 199 | .endprolog 200 | 201 | mov rsi,rcx 202 | mov r10d,h 203 | mov rbx,32 204 | mov r11,src_pitch2 205 | mov r12,dst_pitch 206 | xor rcx,rcx 207 | 208 | Convert_Planar422_to_Planar420_16_AVX2_1: 209 | xor rax,rax 210 | mov ecx,r9d 211 | 212 | Convert_Planar422_to_Planar420_16_AVX2_2: 213 | vmovdqa ymm0,YMMWORD ptr[rsi+rax] 214 | vpavgw ymm0,ymm0,YMMWORD ptr[rdx+rax] 215 | 216 | vmovdqa YMMWORD ptr[r8+rax],ymm0 217 | add rax,rbx 218 | loop Convert_Planar422_to_Planar420_16_AVX2_2 219 | 220 | add rsi,r11 221 | add rdx,r11 222 | add r8,r12 223 | dec r10d 224 | jnz short Convert_Planar422_to_Planar420_16_AVX2_1 225 | 226 | vzeroupper 227 | 228 | pop r12 229 | pop rbx 230 | pop rsi 231 | pop rbp 232 | 233 | ret 234 | 235 | JPSDR_HDRTools_Convert_Planar422_to_Planar420_16_AVX2 endp 236 | 237 | 238 | ;JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword, 239 | ; ValMin:dword,Coeff:dword 240 | ; src = rcx 241 | ; dst = rdx 242 | ; w8 = r8d 243 | ; h = r9d 244 | 245 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 proc public frame 246 | 247 | src_pitch equ qword ptr[rbp+48] 248 | dst_pitch equ qword ptr[rbp+56] 249 | ValMin equ qword ptr[rbp+64] 250 | Coeff equ qword ptr[rbp+72] 251 | 252 | push rbp 253 | .pushreg rbp 254 | mov rbp,rsp 255 | push rsi 256 | .pushreg rsi 257 | push rbx 258 | .pushreg rbx 259 | .endprolog 260 | 261 | mov rsi,ValMin 262 | vmovss xmm1,dword ptr[rsi] 263 | vshufps xmm1,xmm1,xmm1,0 264 | vinsertf128 ymm1,ymm1,xmm1,1 265 | mov rsi,Coeff 266 | vmovss xmm2,dword ptr[rsi] 267 | vshufps xmm2,xmm2,xmm2,0 268 | vinsertf128 ymm2,ymm2,xmm2,1 269 | 270 | vmovdqa ymm3,YMMWORD ptr data_dw_1048575 271 | vmovdqa ymm4,YMMWORD ptr data_dw_0 272 | vmulps ymm2,ymm2,YMMWORD ptr data_f_1048575 273 | 274 | mov rsi,rcx 275 | mov r10,src_pitch 276 | mov r11,dst_pitch 277 | mov rbx,32 278 | xor rcx,rcx 279 | 280 | Scale_20_XYZ_AVX2_1: 281 | xor rax,rax 282 | mov ecx,r8d 283 | Scale_20_XYZ_AVX2_2: 284 | vaddps ymm0,ymm1,YMMWORD ptr[rsi+rax] 285 | vmulps ymm0,ymm0,ymm2 286 | vcvtps2dq ymm0,ymm0 287 | vpminsd ymm0,ymm0,ymm3 288 | vpmaxsd ymm0,ymm0,ymm4 289 | vmovdqa YMMWORD ptr[rdx+rax],ymm0 290 | 291 | add rax,rbx 292 | loop Scale_20_XYZ_AVX2_2 293 | 294 | add rsi,r10 295 | add rdx,r11 296 | dec r9d 297 | jnz short Scale_20_XYZ_AVX2_1 298 | 299 | vzeroupper 300 | 301 | pop rbx 302 | pop rsi 303 | pop rbp 304 | 305 | ret 306 | 307 | JPSDR_HDRTools_Scale_20_XYZ_AVX2 endp 308 | 309 | 310 | ;JPSDR_HDRTools_Scale_20_RGB_AVX2 proc src:dword,dst:dword,w8:dword,h:dword,src_pitch:dword,dst_pitch:dword 311 | ; src = rcx 312 | ; dst = rdx 313 | ; w8 = r8d 314 | ; h = r9d 315 | 316 | JPSDR_HDRTools_Scale_20_RGB_AVX2 proc public frame 317 | 318 | src_pitch equ qword ptr[rbp+48] 319 | dst_pitch equ qword ptr[rbp+56] 320 | ValMin equ qword ptr[rbp+64] 321 | Coeff equ qword ptr[rbp+72] 322 | 323 | push rbp 324 | .pushreg rbp 325 | mov rbp,rsp 326 | push rsi 327 | .pushreg rsi 328 | push rbx 329 | .pushreg rbx 330 | .endprolog 331 | 332 | vmovaps ymm1,YMMWORD ptr data_f_1048575 333 | vmovdqa ymm2,YMMWORD ptr data_dw_1048575 334 | vmovdqa ymm3,YMMWORD ptr data_dw_0 335 | 336 | mov rsi,rcx 337 | mov r10,src_pitch 338 | mov r11,dst_pitch 339 | mov rbx,32 340 | xor rcx,rcx 341 | 342 | Scale_20_RGB_AVX2_1: 343 | xor rax,rax 344 | mov ecx,r8d 345 | Scale_20_RGB_AVX2_2: 346 | vmulps ymm0,ymm1,YMMWORD ptr[rsi+rax] 347 | vcvtps2dq ymm0,ymm0 348 | vpminsd ymm0,ymm0,ymm2 349 | vpmaxsd ymm0,ymm0,ymm3 350 | vmovdqa YMMWORD ptr[rdx+rax],ymm0 351 | 352 | add rax,rbx 353 | loop Scale_20_RGB_AVX2_2 354 | 355 | add rsi,r10 356 | add rdx,r11 357 | dec r9d 358 | jnz short Scale_20_RGB_AVX2_1 359 | 360 | vzeroupper 361 | 362 | pop rbx 363 | pop rsi 364 | pop rbp 365 | 366 | ret 367 | 368 | JPSDR_HDRTools_Scale_20_RGB_AVX2 endp 369 | 370 | 371 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 372 | ; src_pitch:dword,dst_pitch:dword 373 | ; src = rcx 374 | ; dst = rdx 375 | ; w = r8d 376 | ; h = r9d 377 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 proc public frame 378 | 379 | src_pitch equ qword ptr[rbp+48] 380 | dst_pitch equ qword ptr[rbp+56] 381 | 382 | push rbp 383 | .pushreg rbp 384 | mov rbp,rsp 385 | push rdi 386 | .pushreg rdi 387 | push rsi 388 | .pushreg rsi 389 | push rbx 390 | .pushreg rbx 391 | push r12 392 | .pushreg r12 393 | push r13 394 | .pushreg r13 395 | push r14 396 | .pushreg r14 397 | .endprolog 398 | 399 | vmovdqa ymm1,YMMWORD ptr data_w_128 400 | 401 | xor rbx,rbx 402 | mov rsi,rcx 403 | mov rdi,rdx 404 | mov ebx,r8d 405 | mov r10,src_pitch 406 | mov r11,dst_pitch 407 | shr ebx,2 408 | mov rdx,32 409 | mov r12,16 410 | mov r13,2 411 | mov r14,1 412 | xor rcx,rcx 413 | 414 | Convert_RGB64_16toRGB64_8_AVX2_1: 415 | mov ecx,ebx 416 | xor rax,rax 417 | or ecx,ecx 418 | jz Convert_RGB64_16toRGB64_8_AVX2_3 419 | 420 | Convert_RGB64_16toRGB64_8_AVX2_2: 421 | vmovdqa ymm0,YMMWORD ptr[rsi+rax] 422 | vpaddusw ymm0,ymm0,ymm1 423 | vpsrlw ymm0,ymm0,8 424 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 425 | add rax,rdx 426 | loop Convert_RGB64_16toRGB64_8_AVX2_2 427 | 428 | Convert_RGB64_16toRGB64_8_AVX2_3: 429 | test r8d,r13d 430 | jz short Convert_RGB64_16toRGB64_8_AVX2_4 431 | 432 | vmovdqa xmm0,XMMWORD ptr[rsi+rax] 433 | vpaddusw xmm0,xmm0,xmm1 434 | vpsrlw xmm0,xmm0,8 435 | vmovdqa XMMWORD ptr[rdi+rax],xmm0 436 | add rax,r12 437 | 438 | Convert_RGB64_16toRGB64_8_AVX2_4: 439 | test r8d,r14d 440 | jz short Convert_RGB64_16toRGB64_8_AVX2_5 441 | 442 | vmovq xmm0,qword ptr[rsi+rax] 443 | vpaddusw xmm0,xmm0,xmm1 444 | vpsrlw xmm0,xmm0,8 445 | vmovq qword ptr[rdi+rax],xmm0 446 | 447 | Convert_RGB64_16toRGB64_8_AVX2_5: 448 | add rsi,r10 449 | add rdi,r11 450 | dec r9d 451 | jnz short Convert_RGB64_16toRGB64_8_AVX2_1 452 | 453 | pop r14 454 | pop r13 455 | pop r12 456 | pop rbx 457 | pop rsi 458 | pop rdi 459 | pop rbp 460 | 461 | ret 462 | 463 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_8_AVX2 endp 464 | 465 | 466 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 467 | ; src_pitch:dword,dst_pitch:dword 468 | ; src = rcx 469 | ; dst = rdx 470 | ; w = r8d 471 | ; h = r9d 472 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 proc public frame 473 | 474 | src_pitch equ qword ptr[rbp+48] 475 | dst_pitch equ qword ptr[rbp+56] 476 | 477 | push rbp 478 | .pushreg rbp 479 | mov rbp,rsp 480 | push rdi 481 | .pushreg rdi 482 | push rsi 483 | .pushreg rsi 484 | push rbx 485 | .pushreg rbx 486 | push r12 487 | .pushreg r12 488 | push r13 489 | .pushreg r13 490 | push r14 491 | .pushreg r14 492 | .endprolog 493 | 494 | vmovdqa ymm1,YMMWORD ptr data_w_32 495 | 496 | xor rbx,rbx 497 | mov rsi,rcx 498 | mov rdi,rdx 499 | mov ebx,r8d 500 | mov r10,src_pitch 501 | mov r11,dst_pitch 502 | shr ebx,2 503 | mov rdx,32 504 | mov r12,16 505 | mov r13,2 506 | mov r14,1 507 | xor rcx,rcx 508 | 509 | Convert_RGB64_16toRGB64_10_AVX2_1: 510 | mov ecx,ebx 511 | xor rax,rax 512 | or ecx,ecx 513 | jz Convert_RGB64_16toRGB64_10_AVX2_3 514 | 515 | Convert_RGB64_16toRGB64_10_AVX2_2: 516 | vmovdqa ymm0,YMMWORD ptr[rsi+rax] 517 | vpaddusw ymm0,ymm0,ymm1 518 | vpsrlw ymm0,ymm0,6 519 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 520 | add rax,rdx 521 | loop Convert_RGB64_16toRGB64_10_AVX2_2 522 | 523 | Convert_RGB64_16toRGB64_10_AVX2_3: 524 | test r8d,r13d 525 | jz short Convert_RGB64_16toRGB64_10_AVX2_4 526 | 527 | vmovdqa xmm0,XMMWORD ptr[rsi+rax] 528 | vpaddusw xmm0,xmm0,xmm1 529 | vpsrlw xmm0,xmm0,6 530 | vmovdqa XMMWORD ptr[rdi+rax],xmm0 531 | add rax,r12 532 | 533 | Convert_RGB64_16toRGB64_10_AVX2_4: 534 | test r8d,r14d 535 | jz short Convert_RGB64_16toRGB64_10_AVX2_5 536 | 537 | vmovq xmm0,qword ptr[rsi+rax] 538 | vpaddusw xmm0,xmm0,xmm1 539 | vpsrlw xmm0,xmm0,6 540 | vmovq qword ptr[rdi+rax],xmm0 541 | 542 | Convert_RGB64_16toRGB64_10_AVX2_5: 543 | add rsi,r10 544 | add rdi,r11 545 | dec r9d 546 | jnz short Convert_RGB64_16toRGB64_10_AVX2_1 547 | 548 | pop r14 549 | pop r13 550 | pop r12 551 | pop rbx 552 | pop rsi 553 | pop rdi 554 | pop rbp 555 | 556 | ret 557 | 558 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_10_AVX2 endp 559 | 560 | 561 | ;JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc src:dword,dst:dword,w:dword,h:dword, 562 | ; src_pitch:dword,dst_pitch:dword 563 | ; src = rcx 564 | ; dst = rdx 565 | ; w = r8d 566 | ; h = r9d 567 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 proc public frame 568 | 569 | src_pitch equ qword ptr[rbp+48] 570 | dst_pitch equ qword ptr[rbp+56] 571 | 572 | push rbp 573 | .pushreg rbp 574 | mov rbp,rsp 575 | push rdi 576 | .pushreg rdi 577 | push rsi 578 | .pushreg rsi 579 | push rbx 580 | .pushreg rbx 581 | push r12 582 | .pushreg r12 583 | push r13 584 | .pushreg r13 585 | push r14 586 | .pushreg r14 587 | .endprolog 588 | 589 | vmovdqa ymm1,YMMWORD ptr data_w_8 590 | 591 | xor rbx,rbx 592 | mov rsi,rcx 593 | mov rdi,rdx 594 | mov ebx,r8d 595 | mov r10,src_pitch 596 | mov r11,dst_pitch 597 | shr ebx,2 598 | mov rdx,32 599 | mov r12,16 600 | mov r13,2 601 | mov r14,1 602 | xor rcx,rcx 603 | 604 | Convert_RGB64_16toRGB64_12_AVX2_1: 605 | mov ecx,ebx 606 | xor rax,rax 607 | or ecx,ecx 608 | jz Convert_RGB64_16toRGB64_12_AVX2_3 609 | 610 | Convert_RGB64_16toRGB64_12_AVX2_2: 611 | vmovdqa ymm0,YMMWORD ptr[rsi+rax] 612 | vpaddusw ymm0,ymm0,ymm1 613 | vpsrlw ymm0,ymm0,4 614 | vmovdqa YMMWORD ptr[rdi+rax],ymm0 615 | add rax,rdx 616 | loop Convert_RGB64_16toRGB64_12_AVX2_2 617 | 618 | Convert_RGB64_16toRGB64_12_AVX2_3: 619 | test r8d,r13d 620 | jz short Convert_RGB64_16toRGB64_12_AVX2_4 621 | 622 | vmovdqa xmm0,XMMWORD ptr[rsi+rax] 623 | vpaddusw xmm0,xmm0,xmm1 624 | vpsrlw xmm0,xmm0,4 625 | vmovdqa XMMWORD ptr[rdi+rax],xmm0 626 | add rax,r12 627 | 628 | Convert_RGB64_16toRGB64_12_AVX2_4: 629 | test r8d,r14d 630 | jz short Convert_RGB64_16toRGB64_12_AVX2_5 631 | 632 | vmovq xmm0,qword ptr[rsi+rax] 633 | vpaddusw xmm0,xmm0,xmm1 634 | vpsrlw xmm0,xmm0,4 635 | vmovq qword ptr[rdi+rax],xmm0 636 | 637 | Convert_RGB64_16toRGB64_12_AVX2_5: 638 | add rsi,r10 639 | add rdi,r11 640 | dec r9d 641 | jnz short Convert_RGB64_16toRGB64_12_AVX2_1 642 | 643 | pop r14 644 | pop r13 645 | pop r12 646 | pop rbx 647 | pop rsi 648 | pop rdi 649 | pop rbp 650 | 651 | ret 652 | 653 | JPSDR_HDRTools_Convert_RGB64_16toRGB64_12_AVX2 endp 654 | 655 | 656 | ;JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc dst:dword,srcY:dword,w:dword,h:dword,dst_pitch:dword,src_pitchY:dword 657 | ; dst = rcx 658 | ; srcY = rdx 659 | ; w = r8d 660 | ; h = r9d 661 | 662 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 proc public frame 663 | 664 | dst_pitch equ qword ptr[rbp+48] 665 | src_pitchY equ qword ptr[rbp+56] 666 | 667 | push rbp 668 | .pushreg rbp 669 | mov rbp,rsp 670 | push rsi 671 | .pushreg rsi 672 | push rdi 673 | .pushreg rdi 674 | push rbx 675 | .pushreg rbx 676 | push r12 677 | .pushreg r12 678 | .endprolog 679 | 680 | mov rdi,rcx 681 | mov rsi,rdx 682 | mov r10d,r8d 683 | mov r11,dst_pitch 684 | mov r12,src_pitchY 685 | mov rdx,8 686 | shr r10d,1 687 | mov rbx,1 688 | xor rcx,rcx 689 | pxor xmm4,xmm4 690 | 691 | Convert_16_RGB64_HLG_OOTF_AVX2_1: 692 | mov ecx,r10d 693 | xor rax,rax 694 | or ecx,ecx 695 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_3 696 | 697 | Convert_16_RGB64_HLG_OOTF_AVX2_2: 698 | vmovss xmm0,dword ptr[rsi+rax] 699 | vmovss xmm1,dword ptr[rsi+rax+4] 700 | vshufps xmm0,xmm0,xmm0,0 701 | vshufps xmm1,xmm1,xmm1,0 702 | vmovdqa xmm2,XMMWORD ptr[rdi+2*rax] 703 | vinsertf128 ymm0,ymm0,xmm1,1 704 | vpunpckhwd xmm3,xmm2,xmm4 705 | vpunpcklwd xmm2,xmm2,xmm4 706 | vinserti128 ymm2,ymm2,xmm3,1 707 | vcvtdq2ps ymm2,ymm2 708 | vmulps ymm2,ymm2,ymm0 709 | vcvtps2dq ymm2,ymm2 710 | vextracti128 xmm3,ymm2,1 711 | vpackusdw xmm2,xmm2,xmm3 712 | vmovdqa XMMWORD ptr[rdi+2*rax],xmm2 713 | 714 | add rax,rdx 715 | loop Convert_16_RGB64_HLG_OOTF_AVX2_2 716 | 717 | Convert_16_RGB64_HLG_OOTF_AVX2_3: 718 | test r8d,ebx 719 | jz short Convert_16_RGB64_HLG_OOTF_AVX2_4 720 | 721 | vmovss xmm0,dword ptr[rsi+rax] 722 | vshufps xmm0,xmm0,xmm0,0 723 | vmovq xmm2,qword ptr[rdi+2*rax] 724 | vpunpcklwd xmm2,xmm2,xmm4 725 | vcvtdq2ps xmm2,xmm2 726 | vmulps xmm2,xmm2,xmm0 727 | vcvtps2dq xmm2,xmm2 728 | vpackusdw xmm2,xmm2,xmm2 729 | vmovq qword ptr[rdi+2*rax],xmm2 730 | 731 | Convert_16_RGB64_HLG_OOTF_AVX2_4: 732 | add rdi,r11 733 | add rsi,r12 734 | dec r9d 735 | jnz Convert_16_RGB64_HLG_OOTF_AVX2_1 736 | 737 | vzeroupper 738 | 739 | pop r12 740 | pop rbx 741 | pop rdi 742 | pop rsi 743 | pop rbp 744 | 745 | ret 746 | 747 | JPSDR_HDRTools_Convert_16_RGB64_HLG_OOTF_AVX2 endp 748 | 749 | 750 | ;JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc src:dword,dst1:dword,dst2:dword,w8:dword,h:dword,src_pitch:dword, 751 | ; dst_pitch1:dword,dst_pitch2:dword,ValMinX:dword,CoeffX:dword,ValMinZ:dword,CoeffZ:dword 752 | ; src = rcx 753 | ; dst1 = rdx 754 | ; dst2 = r8 755 | ; w8 = r9d 756 | 757 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 proc public frame 758 | 759 | h equ dword ptr[rbp+48] 760 | src_pitch equ qword ptr[rbp+56] 761 | dst_pitch1 equ qword ptr[rbp+64] 762 | dst_pitch2 equ qword ptr[rbp+72] 763 | ValMinX equ qword ptr[rbp+80] 764 | CoeffX equ qword ptr[rbp+88] 765 | ValMinZ equ qword ptr[rbp+96] 766 | CoeffZ equ qword ptr[rbp+104] 767 | 768 | push rbp 769 | .pushreg rbp 770 | mov rbp,rsp 771 | push rsi 772 | .pushreg rsi 773 | push rbx 774 | .pushreg rbx 775 | push r12 776 | .pushreg r12 777 | push r13 778 | .pushreg r13 779 | sub rsp,48 780 | .allocstack 48 781 | vmovdqa XMMWORD ptr[rsp],xmm6 782 | .savexmm128 xmm6,0 783 | vmovdqa XMMWORD ptr[rsp+16],xmm7 784 | .savexmm128 xmm7,16 785 | vmovdqa XMMWORD ptr[rsp+32],xmm8 786 | .savexmm128 xmm8,32 787 | .endprolog 788 | 789 | mov rsi,ValMinX 790 | vmovss xmm2,dword ptr[rsi] 791 | vshufps xmm2,xmm2,xmm2,0 792 | vinsertf128 ymm2,ymm2,xmm2,1 793 | mov rsi,CoeffX 794 | vmovss xmm3,dword ptr[rsi] 795 | vshufps xmm3,xmm3,xmm3,0 796 | vinsertf128 ymm3,ymm3,xmm3,1 797 | 798 | mov rsi,ValMinZ 799 | vmovss xmm4,dword ptr[rsi] 800 | vshufps xmm4,xmm4,xmm4,0 801 | vinsertf128 ymm4,ymm4,xmm4,1 802 | mov rsi,CoeffZ 803 | vmovss xmm5,dword ptr[rsi] 804 | vshufps xmm5,xmm5,xmm5,0 805 | vinsertf128 ymm5,ymm5,xmm5,1 806 | 807 | vmovdqa ymm6,YMMWORD ptr data_dw_65535 808 | vmovdqa ymm7,YMMWORD ptr data_dw_0 809 | vmulps ymm3,ymm3,YMMWORD ptr data_f_65535 810 | vmulps ymm5,ymm5,YMMWORD ptr data_f_65535 811 | 812 | mov rsi,rcx 813 | mov r10,src_pitch 814 | mov r11,dst_pitch1 815 | mov r12,dst_pitch2 816 | mov r13d,h 817 | mov rbx,32 818 | xor rcx,rcx 819 | 820 | BT2446C_16_XYZ_AVX2_1: 821 | xor rax,rax 822 | mov ecx,r9d 823 | BT2446C_16_XYZ_AVX2_2: 824 | vmovaps ymm8,YMMWORD ptr[rsi+rax] 825 | vmulps ymm0,ymm8,YMMWORD ptr[rdx+rax] 826 | vmulps ymm1,ymm8,YMMWORD ptr[r8+rax] 827 | vaddps ymm0,ymm0,ymm2 828 | vaddps ymm1,ymm1,ymm4 829 | vmulps ymm0,ymm0,ymm3 830 | vmulps ymm1,ymm1,ymm5 831 | vcvtps2dq ymm0,ymm0 832 | vcvtps2dq ymm1,ymm1 833 | vpminsd ymm0,ymm0,ymm6 834 | vpminsd ymm1,ymm1,ymm6 835 | vpmaxsd ymm0,ymm0,ymm7 836 | vpmaxsd ymm1,ymm1,ymm7 837 | vmovdqa YMMWORD ptr[rdx+rax],ymm0 838 | vmovdqa YMMWORD ptr[r8+rax],ymm1 839 | 840 | add rax,rbx 841 | loop BT2446C_16_XYZ_AVX2_2 842 | 843 | add rsi,r10 844 | add rdx,r11 845 | add r8,r12 846 | dec r13d 847 | jnz short BT2446C_16_XYZ_AVX2_1 848 | 849 | vzeroupper 850 | 851 | vmovdqa xmm8,XMMWORD ptr[rsp+32] 852 | vmovdqa xmm7,XMMWORD ptr[rsp+16] 853 | vmovdqa xmm6,XMMWORD ptr[rsp] 854 | add rsp,48 855 | 856 | pop r13 857 | pop r12 858 | pop rbx 859 | pop rsi 860 | pop rbp 861 | 862 | ret 863 | 864 | JPSDR_HDRTools_BT2446C_16_XYZ_AVX2 endp 865 | 866 | 867 | end 868 | -------------------------------------------------------------------------------- /Plugins_JPSDR/MatrixClass.h: -------------------------------------------------------------------------------- 1 | /* 2 | * MatrixClass 3 | * 4 | * Matrix and vector class allowing several operations. 5 | * Copyright (C) 2017 JPSDR 6 | * 7 | * MatrixClass is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * MatrixClass is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | #ifndef _MATRIX_CLASS_H 24 | #define _MATRIX_CLASS_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | typedef enum COEFF_DATA_TYPE_ {DATA_NONE,DATA_DOUBLE,DATA_FLOAT,DATA_UINT64,DATA_INT64, 31 | DATA_UINT32,DATA_INT32,DATA_UINT16,DATA_INT16,DATA_UINT8,DATA_INT8} COEFF_DATA_TYPE; 32 | 33 | 34 | void SetCPUMatrixClass(bool SSE2,bool AVX,bool AVX2); 35 | 36 | 37 | class Vector 38 | { 39 | public : 40 | Vector(void); 41 | Vector(const uint16_t l,const COEFF_DATA_TYPE data); 42 | Vector(const Vector &x); 43 | virtual ~Vector(void); 44 | 45 | bool AllocCheck(void) const {return(Coeff!=NULL);} 46 | bool Create(void); 47 | bool Create(const uint16_t l,const COEFF_DATA_TYPE data); 48 | bool Create(const Vector &x); 49 | bool CopyStrict(const Vector &x); 50 | bool CopyRaw(const void *ptr); 51 | bool CopyRaw(const void *ptr,uint16_t lgth); 52 | bool ExportRaw(void *ptr); 53 | bool ExportRaw(void *ptr,uint16_t lgth); 54 | void Destroy(void); 55 | bool FillD(const double data); 56 | bool FillF(const float data); 57 | bool FillZero(void); 58 | COEFF_DATA_TYPE GetDataType(void) const {return(data_type);} 59 | bool SetInfo(const uint16_t l,const COEFF_DATA_TYPE data); 60 | void GetInfo(uint16_t &l,COEFF_DATA_TYPE &data) const; 61 | uint16_t GetLength(void) const {return(length);} 62 | void* GetPtrVector(void) const {return(Coeff);} 63 | size_t GetDataSize(void) const {return(size);} 64 | double GetD(const uint16_t i) const {return(((double *)Coeff)[i]);} 65 | float GetF(const uint16_t i) const {return(((float *)Coeff)[i]);} 66 | void SetD(const uint16_t i,const double d) {((double *)Coeff)[i]=d;} 67 | void SetF(const uint16_t i,const float d) {((float *)Coeff)[i]=d;} 68 | bool GetSafeD(const uint16_t i,double &d) const ; 69 | bool SetSafeD(const uint16_t i,const double d); 70 | bool GetSafeF(const uint16_t i,float &d) const ; 71 | bool SetSafeF(const uint16_t i,const float d); 72 | 73 | protected : 74 | void *Coeff; 75 | uint16_t length; 76 | size_t size; 77 | COEFF_DATA_TYPE data_type; 78 | 79 | private : 80 | Vector& operator = (const Vector &other); 81 | bool operator == (const Vector &other) const; 82 | bool operator != (const Vector &other) const; 83 | }; 84 | 85 | class Matrix; 86 | 87 | class Vector_Compute : public Vector 88 | { 89 | protected : 90 | bool SSE2_Enable,AVX_Enable,AVX2_Enable; 91 | 92 | public : 93 | Vector_Compute(void); 94 | Vector_Compute(const uint16_t l,const COEFF_DATA_TYPE data); 95 | Vector_Compute(const Vector_Compute &x); 96 | virtual ~Vector_Compute(void); 97 | 98 | void SetSSE2(bool val) {SSE2_Enable=val;} 99 | void SetAVX(bool val) {AVX_Enable=val;} 100 | void SetAVX2(bool val) {AVX2_Enable=val;} 101 | 102 | bool Mult(const double coef,const Vector &x); 103 | bool Mult(const double coef); 104 | bool Add(const double coef,const Vector &x); 105 | bool Add(const double coef); 106 | bool Sub(const double coef,const Vector &x); 107 | bool Sub(const double coef); 108 | bool Add_X(const Vector &x,const Vector &y); 109 | bool Add_X(const Vector &x); 110 | bool Sub_X(const Vector &x,const Vector &y); 111 | bool Sub_X(const Vector &x); 112 | bool InvSub_X(const Vector &x); 113 | bool Mult_X(const Vector &x,const Vector &y); 114 | bool Mult_X(const Vector &x); 115 | 116 | bool Product_AX(const Matrix &ma,const Vector &x); 117 | bool Product_AX(const Matrix &ma); 118 | bool Product_tAX(const Matrix &ma,const Vector &x); 119 | bool Product_tAX(const Matrix &ma); 120 | 121 | bool Norme2(double &result); 122 | bool Distance2(const Vector &x,double &result); 123 | bool Norme1(double &result); 124 | bool Distance1(const Vector &x,double &result); 125 | 126 | protected : 127 | // Float 128 | void MultF(const double coef,const Vector &x); 129 | void MultF(const double coef); 130 | void AddF(const double coef,const Vector &x); 131 | void AddF(const double coef); 132 | void SubF(const double coef,const Vector &x); 133 | void SubF(const double coef); 134 | void AddF_X(const Vector &x,const Vector &y); 135 | void AddF_X(const Vector &x); 136 | void SubF_X(const Vector &x,const Vector &y); 137 | void SubF_X(const Vector &x); 138 | void InvSubF_X(const Vector &x); 139 | void MultF_X(const Vector &x,const Vector &y); 140 | void MultF_X(const Vector &x); 141 | 142 | void ProductF_AX(const Matrix &ma,const Vector &x); 143 | void ProductF_tAX(const Matrix &ma,const Vector &x); 144 | 145 | double Norme2F(void); 146 | double Distance2F(const Vector &x); 147 | double Norme1F(void); 148 | double Distance1F(const Vector &x); 149 | 150 | // Double 151 | void MultD(const double coef,const Vector &x); 152 | void MultD(const double coef); 153 | void AddD(const double coef,const Vector &x); 154 | void AddD(const double coef); 155 | void SubD(const double coef,const Vector &x); 156 | void SubD(const double coef); 157 | void AddD_X(const Vector &x,const Vector &y); 158 | void AddD_X(const Vector &x); 159 | void SubD_X(const Vector &x,const Vector &y); 160 | void SubD_X(const Vector &x); 161 | void InvSubD_X(const Vector &x); 162 | void MultD_X(const Vector &x,const Vector &y); 163 | void MultD_X(const Vector &x); 164 | 165 | void ProductD_AX(const Matrix &ma,const Vector &x); 166 | void ProductD_tAX(const Matrix &ma,const Vector &x); 167 | 168 | double Norme2D(void); 169 | double Distance2D(const Vector &x); 170 | double Norme1D(void); 171 | double Distance1D(const Vector &x); 172 | 173 | private : 174 | Vector_Compute& operator = (const Vector_Compute &other); 175 | bool operator == (const Vector_Compute &other) const; 176 | bool operator != (const Vector_Compute &other) const; 177 | }; 178 | 179 | 180 | class Matrix 181 | { 182 | public : 183 | Matrix(void); 184 | Matrix(const uint16_t l,const uint16_t c,const COEFF_DATA_TYPE data); 185 | Matrix(const Matrix &m); 186 | virtual ~Matrix(void); 187 | 188 | bool AllocCheck(void) const {return(Coeff!=NULL);} 189 | bool Create(void); 190 | bool Create(const uint16_t l,const uint16_t c,const COEFF_DATA_TYPE data); 191 | bool Create(const Matrix &m); 192 | virtual bool CopyStrict(const Matrix &m); 193 | bool CopyRaw(const void *ptr); 194 | bool CopyRaw(const void *ptr,ptrdiff_t ptr_pitch); 195 | bool CopyRaw(const void *ptr,ptrdiff_t ptr_pitch,uint16_t ln,uint16_t co); 196 | bool ExportRaw(void *ptr); 197 | bool ExportRaw(void *ptr,ptrdiff_t ptr_pitch); 198 | bool ExportRaw(void *ptr,ptrdiff_t ptr_pitch,uint16_t ln,uint16_t co); 199 | void Destroy(void); 200 | bool FillD(const double data); 201 | bool FillF(const float data); 202 | bool FillZero(void); 203 | COEFF_DATA_TYPE GetDataType(void) const {return(data_type);} 204 | bool SetInfo(const uint16_t l,const uint16_t c,const COEFF_DATA_TYPE data); 205 | void GetInfo(uint16_t &l,uint16_t &c,COEFF_DATA_TYPE &data) const; 206 | uint16_t GetLines(void) const {return(lines);} 207 | uint16_t GetColumns(void) const {return(columns);} 208 | void* GetPtrMatrix(void) const {return(Coeff);} 209 | void* GetPtrMatrixLine(const uint16_t i) const {return((void *)((uint8_t *)Coeff+i*pitch));} 210 | ptrdiff_t GetPitch(void) const {return(pitch);} 211 | size_t GetDataSize(void) const {return(size);} 212 | double GetD(const uint16_t i,const uint16_t j) const {return(((double *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]);} 213 | float GetF(const uint16_t i,const uint16_t j) const {return(((float *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]);} 214 | void SetD(const uint16_t i,const uint16_t j,const double d) {((double *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]=d;} 215 | void SetF(const uint16_t i,const uint16_t j,const float d) {((float *)((uint8_t *)Coeff+(ptrdiff_t)i*pitch))[j]=d;} 216 | bool GetSafeD(const uint16_t i,const uint16_t j,double &d) const ; 217 | bool SetSafeD(const uint16_t i,const uint16_t j,const double d); 218 | bool GetSafeF(const uint16_t i,const uint16_t j,float &d) const ; 219 | bool SetSafeF(const uint16_t i,const uint16_t j,const float d); 220 | 221 | protected : 222 | void *Coeff; 223 | uint16_t columns,lines; 224 | size_t size; 225 | ptrdiff_t pitch; 226 | COEFF_DATA_TYPE data_type; 227 | 228 | Matrix& operator=(const Matrix&){return(*this);} 229 | 230 | private : 231 | bool operator == (const Matrix &other) const; 232 | bool operator != (const Matrix &other) const; 233 | }; 234 | 235 | 236 | class Matrix_Compute : public Matrix 237 | { 238 | protected : 239 | double zero_value; 240 | bool SSE2_Enable,AVX_Enable,AVX2_Enable; 241 | 242 | public : 243 | Matrix_Compute(void); 244 | Matrix_Compute(const uint16_t l,const uint16_t c,const COEFF_DATA_TYPE data); 245 | Matrix_Compute(const Matrix_Compute &m); 246 | virtual ~Matrix_Compute(void); 247 | 248 | void SetSSE2(bool val) {SSE2_Enable=val;} 249 | void SetAVX(bool val) {AVX_Enable=val;} 250 | void SetAVX2(bool val) {AVX2_Enable=val;} 251 | 252 | bool CreateTranspose(const Matrix &m); 253 | virtual bool CopyStrict(const Matrix_Compute &m); 254 | void SetZeroValue(const double z) {zero_value=fabs(z);} 255 | double GetZeroValue(void) const {return(zero_value);} 256 | 257 | bool Transpose(void); 258 | bool Transpose(const Matrix &ma); 259 | 260 | bool Mult(const double coef,const Matrix &ma); 261 | bool Mult(const double coef); 262 | bool Add(const double coef,const Matrix &ma); 263 | bool Add(const double coef); 264 | bool Sub(const double coef,const Matrix &ma); 265 | bool Sub(const double coef); 266 | bool Add_A(const Matrix &ma,const Matrix &mb); 267 | bool Add_A(const Matrix &ma); 268 | bool Sub_A(const Matrix &ma,const Matrix &mb); 269 | bool Sub_A(const Matrix &ma); 270 | bool InvSub_A(const Matrix &ma); 271 | bool Mult_A(const Matrix &ma,const Matrix &mb); 272 | bool Mult_A(const Matrix &ma); 273 | 274 | bool Product_AB(const Matrix &ma,const Matrix &mb); 275 | bool Product_AtB(const Matrix &ma,const Matrix &mb); 276 | bool Product_tAA(const Matrix &ma); 277 | bool Product_tAA(void); 278 | 279 | bool Inverse(const Matrix &ma); 280 | bool Inverse(void); 281 | int8_t InverseSafe(const Matrix_Compute &ma); 282 | int8_t InverseSafe(void); 283 | 284 | bool Norme2(double &result); 285 | bool Distance2(const Matrix &ma,double &result); 286 | bool Norme1(double &result); 287 | bool Distance1(const Matrix &ma,double &result); 288 | 289 | protected : 290 | // Float 291 | void TransposeF(const Matrix &ma); 292 | 293 | void MultF(const double coef,const Matrix &ma); 294 | void MultF(const double coef); 295 | void AddF(const double coef,const Matrix &ma); 296 | void AddF(const double coef); 297 | void SubF(const double coef,const Matrix &ma); 298 | void SubF(const double coef); 299 | void AddF_A(const Matrix &ma,const Matrix &mb); 300 | void AddF_A(const Matrix &ma); 301 | void SubF_A(const Matrix &ma,const Matrix &mb); 302 | void SubF_A(const Matrix &ma); 303 | void InvSubF_A(const Matrix &ma); 304 | void MultF_A(const Matrix &ma,const Matrix &mb); 305 | void MultF_A(const Matrix &ma); 306 | 307 | void ProductF_AB(const Matrix &ma,const Matrix &mb); 308 | void ProductF_AtB(const Matrix &ma,const Matrix &mb); 309 | 310 | bool InverseF(const Matrix &ma); 311 | int8_t InverseSafeF(const Matrix_Compute &ma); 312 | 313 | double Norme2F(void); 314 | double Distance2F(const Matrix &ma); 315 | double Norme1F(void); 316 | double Distance1F(const Matrix &ma); 317 | 318 | // Double 319 | void MultD(const double coef,const Matrix &ma); 320 | void MultD(const double coef); 321 | void AddD(const double coef,const Matrix &ma); 322 | void AddD(const double coef); 323 | void SubD(const double coef,const Matrix &ma); 324 | void SubD(const double coef); 325 | void AddD_A(const Matrix &ma,const Matrix &mb); 326 | void AddD_A(const Matrix &ma); 327 | void SubD_A(const Matrix &ma,const Matrix &mb); 328 | void SubD_A(const Matrix &ma); 329 | void InvSubD_A(const Matrix &ma); 330 | void MultD_A(const Matrix &ma,const Matrix &mb); 331 | void MultD_A(const Matrix &ma); 332 | 333 | void TransposeD(const Matrix &ma); 334 | 335 | void ProductD_AB(const Matrix &ma,const Matrix &mb); 336 | void ProductD_AtB(const Matrix &ma,const Matrix &mb); 337 | 338 | bool InverseD(const Matrix &ma); 339 | int8_t InverseSafeD(const Matrix_Compute &ma); 340 | 341 | double Norme2D(void); 342 | double Distance2D(const Matrix &ma); 343 | double Norme1D(void); 344 | double Distance1D(const Matrix &ma); 345 | 346 | // U64 347 | void TransposeU64(const Matrix &ma); 348 | 349 | // I64 350 | void TransposeI64(const Matrix &ma); 351 | 352 | // U32 353 | void TransposeU32(const Matrix &ma); 354 | 355 | // I32 356 | void TransposeI32(const Matrix &ma); 357 | 358 | // U16 359 | void TransposeU16(const Matrix &ma); 360 | 361 | // I16 362 | void TransposeI16(const Matrix &ma); 363 | 364 | // U8 365 | void TransposeU8(const Matrix &ma); 366 | 367 | // I8 368 | void TransposeI8(const Matrix &ma); 369 | 370 | Matrix_Compute& operator=(const Matrix_Compute&){return(*this);} 371 | 372 | private : 373 | bool operator == (const Matrix_Compute &other) const; 374 | bool operator != (const Matrix_Compute &other) const; 375 | }; 376 | 377 | #endif -------------------------------------------------------------------------------- /Plugins_JPSDR/PlanarFrame.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jpsdr/plugins_JPSDR/7826b045cfc28a0738088ec03a5e6f2edd44c9f0/Plugins_JPSDR/PlanarFrame.cpp -------------------------------------------------------------------------------- /Plugins_JPSDR/PlanarFrame.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar 3 | ** to packed conversions, and always gives 16 bit alignment for all 4 | ** planes. Supports Y8/YV12/YV16/YV24/YUY2/RGB24 frames from avisynth, can do any planar format 5 | ** internally. 6 | ** 7 | ** Copyright (C) 2005-2006 Kevin Stone 8 | ** 9 | ** This program is free software; you can redistribute it and/or modify 10 | ** it under the terms of the GNU General Public License as published by 11 | ** the Free Software Foundation; either version 2 of the License, or 12 | ** (at your option) any later version. 13 | ** 14 | ** This program is distributed in the hope that it will be useful, 15 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | ** GNU General Public License for more details. 18 | ** 19 | ** You should have received a copy of the GNU General Public License 20 | ** along with this program; if not, write to the Free Software 21 | ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22 | */ 23 | 24 | #ifndef __PlanarFrame_H__ 25 | #define __PlanarFrame_H__ 26 | 27 | #include 28 | #include 29 | #include 30 | #include "./internal.h" 31 | #include "./avs/cpuid.h" 32 | 33 | #define MIN_PAD 10 34 | #define MIN_ALIGNMENT 64 35 | 36 | #define PLANAR_420 1 37 | #define PLANAR_422 2 38 | #define PLANAR_444 3 39 | 40 | 41 | class PlanarFrame 42 | { 43 | private: 44 | bool useSIMD,useAVX; 45 | int cpu; 46 | int ypitch,uvpitch; 47 | int ywidth,uvwidth; 48 | int yheight,uvheight; 49 | bool alloc_ok; 50 | 51 | bool grey,isRGBPfamily,isAlphaChannel; 52 | uint8_t pixelsize; // AVS16 53 | uint8_t bits_per_pixel; 54 | 55 | uint8_t *planar_1,*planar_2,*planar_3,*planar_4; 56 | bool allocSpace(VideoInfo &viInfo); 57 | bool allocSpace(int specs[4],bool rgbplanar,bool alphaplanar,uint8_t _pixelsize,uint8_t _bits_per_pixel); 58 | int getCPUInfo(void); 59 | int checkCPU(void); 60 | bool copyInternalFrom(PVideoFrame &frame,VideoInfo &viInfo); 61 | bool copyInternalFrom(PlanarFrame &frame); 62 | bool copyInternalTo(PVideoFrame &frame,VideoInfo &viInfo); 63 | bool copyInternalTo(PlanarFrame &frame); 64 | bool copyInternalPlaneTo(PlanarFrame &frame,uint8_t plane); 65 | void conv422toYUY2(uint8_t *py,uint8_t *pu,uint8_t *pv,uint8_t *dst,int pitch1Y,int pitch1UV,int pitch2, 66 | int width,int height); 67 | void conv444toRGB24(uint8_t *py,uint8_t *pu,uint8_t *pv,uint8_t *dst,int pitch1Y,int pitch1UV,int pitch2, 68 | int width,int height); 69 | 70 | public: 71 | PlanarFrame(void); 72 | PlanarFrame(VideoInfo &viInfo); 73 | virtual ~PlanarFrame(void); 74 | bool GetAllocStatus(void) {return(alloc_ok);} 75 | bool createPlanar(int yheight,int uvheight,int ywidth,int uvwidth,bool rgbplanar,bool alphaplanar,uint8_t pixelsize,uint8_t bits_per_pixel); 76 | bool createPlanar(int height,int width,uint8_t chroma_format,bool rgbplanar,bool alphaplanar,uint8_t pixelsize,uint8_t bits_per_pixel); 77 | bool createFromProfile(VideoInfo &viInfo); 78 | bool createFromFrame(PVideoFrame &frame,VideoInfo &viInfo); 79 | bool createFromPlanar(PlanarFrame &frame); 80 | bool copyFrom(PVideoFrame &frame,VideoInfo &viInfo); 81 | bool copyTo(PVideoFrame &frame,VideoInfo &viInfo); 82 | bool copyFrom(PlanarFrame &frame); 83 | bool copyTo(PlanarFrame &frame); 84 | bool copyChromaTo(PlanarFrame &dst); 85 | bool copyPlaneTo(PlanarFrame &dst,uint8_t plane); 86 | void freePlanar(); 87 | uint8_t* GetPtr(uint8_t plane); 88 | int GetWidth(uint8_t plane); 89 | int GetHeight(uint8_t plane); 90 | int GetPitch(uint8_t plane); 91 | int getCPUFlags(void) {return cpu;} 92 | inline void BitBlt(uint8_t *dstp,int dst_pitch,const uint8_t *srcp,int src_pitch,int row_size,int height); 93 | PlanarFrame& operator=(PlanarFrame &ob2); 94 | void convYUY2to422(const uint8_t *src,uint8_t *py,uint8_t *pu,uint8_t *pv,int pitch1,int pitch2Y,int pitch2UV, 95 | int width,int height); 96 | void convRGB24to444(const uint8_t *src,uint8_t *py,uint8_t *pu,uint8_t *pv,int pitch1,int pitch2Y,int pitch2UV, 97 | int width,int height); 98 | }; 99 | 100 | #endif -------------------------------------------------------------------------------- /Plugins_JPSDR/PlanarFrame_asm.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar 3 | ; to packed conversions, and always gives 16 bit alignment for all 4 | ; planes. Supports Y8/YV12/YV16/YV24/YUY2/RGB24 frames from avisynth, can do any planar 5 | ; format internally. 6 | ; 7 | ; Copyright (C) 2005-2010 Kevin Stone 8 | ; 9 | ; This program is free software; you can redistribute it and/or modify 10 | ; it under the terms of the GNU General Public License as published by 11 | ; the Free Software Foundation; either version 2 of the License, or 12 | ; (at your option) any later version. 13 | ; 14 | ; This program is distributed in the hope that it will be useful, 15 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | ; GNU General Public License for more details. 18 | ; 19 | ; You should have received a copy of the GNU General Public License 20 | ; along with this program; if not, write to the Free Software 21 | ; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22 | ; 23 | 24 | .xmm 25 | .model flat,c 26 | 27 | .data 28 | 29 | align 16 30 | 31 | Ymask qword 2 dup(00FF00FF00FF00FFh) 32 | 33 | .code 34 | 35 | 36 | convYUY2to422_MMX proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword 37 | 38 | public convYUY2to422_MMX 39 | 40 | push ebx 41 | push edi 42 | push esi 43 | 44 | mov edi,src 45 | mov ebx,py 46 | mov edx,pu 47 | mov esi,pv 48 | mov ecx,width_ 49 | shr ecx,1 50 | movq mm5,qword ptr Ymask 51 | yloop: 52 | xor eax,eax 53 | align 16 54 | xloop: 55 | movq mm0,[edi+eax*4] ;VYUYVYUY 56 | movq mm1,[edi+eax*4+8] ;VYUYVYUY 57 | movq mm2,mm0 ;VYUYVYUY 58 | movq mm3,mm1 ;VYUYVYUY 59 | pand mm0,mm5 ;0Y0Y0Y0Y 60 | psrlw mm2,8 ;0V0U0V0U 61 | pand mm1,mm5 ;0Y0Y0Y0Y 62 | psrlw mm3,8 ;0V0U0V0U 63 | packuswb mm0,mm1 ;YYYYYYYY 64 | packuswb mm2,mm3 ;VUVUVUVU 65 | movq mm4,mm2 ;VUVUVUVU 66 | pand mm2,mm5 ;0U0U0U0U 67 | psrlw mm4,8 ;0V0V0V0V 68 | packuswb mm2,mm2 ;xxxxUUUU 69 | packuswb mm4,mm4 ;xxxxVVVV 70 | movq [ebx+eax*2],mm0 ;store y 71 | movd dword ptr[edx+eax],mm2 ;store u 72 | movd dword ptr[esi+eax],mm4 ;store v 73 | add eax,4 74 | cmp eax,ecx 75 | jl short xloop 76 | add edi,pitch1 77 | add ebx,pitch2Y 78 | add edx,pitch2UV 79 | add esi,pitch2UV 80 | dec height 81 | jnz short yloop 82 | emms 83 | 84 | pop esi 85 | pop edi 86 | pop ebx 87 | 88 | ret 89 | 90 | convYUY2to422_MMX endp 91 | 92 | 93 | convYUY2to422_SSE2 proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword 94 | 95 | public convYUY2to422_SSE2 96 | 97 | push ebx 98 | push edi 99 | push esi 100 | 101 | mov edi,src 102 | mov ebx,py 103 | mov edx,pu 104 | mov esi,pv 105 | 106 | yloop_2: 107 | xor eax,eax 108 | mov ecx,width_ 109 | shr ecx,1 110 | jz short suite1_2 111 | 112 | xloop_2: 113 | movdqa xmm0,XMMWORD ptr[edi+4*eax] ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1 114 | movdqa xmm1,XMMWORD ptr[edi+4*eax+16] ;V8Y16U8Y15V7Y14U7Y13 V6Y12U6Y11V5Y10U5Y9 115 | movdqa xmm2,xmm0 ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1 116 | punpcklbw xmm0,xmm1 ;V6V2Y12Y4U6U2Y11Y3 V5V1Y10Y2U5U1Y9Y1 117 | punpckhbw xmm2,xmm1 ;V8V4Y16Y8U8U4Y15Y7 V7V3Y14Y6U7U3Y13Y5 118 | movdqa xmm1,xmm0 119 | punpcklbw xmm0,xmm2 ;V7V5V3V1Y14Y10Y6Y2 U7U5U3U1Y13Y9Y5Y1 120 | punpckhbw xmm1,xmm2 ;V8V6V4V2Y16Y12Y8Y4 U8U6U4U2Y15Y11Y7Y3 121 | movdqa xmm2,xmm0 122 | punpcklbw xmm0,xmm1 ;U8U7U6U5U4U3U2U1 Y15Y13Y11Y9Y5Y3Y1 123 | punpckhbw xmm2,xmm1 ;V8V7V6V5V4V3V2V1 Y16Y14Y12Y10Y8Y6Y4Y2 124 | movhps qword ptr [edx+eax],xmm0 125 | punpcklbw xmm0,xmm2 ;Y16Y15Y14Y13Y12Y11Y10Y9Y8Y7Y6Y5Y4Y3Y2Y1 126 | movhps qword ptr [esi+eax],xmm2 127 | movdqa XMMWORD ptr[ebx+2*eax],xmm0 128 | add eax,8 129 | loop xloop_2 130 | 131 | suite1_2: 132 | mov ecx,width_ 133 | and ecx,1 134 | jz short suite2_2 135 | 136 | movdqa xmm0,XMMWORD ptr[edi+4*eax] ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1 137 | movhlps xmm1,xmm0 ;V4Y8U4Y7V3Y6U3Y5 V4Y8U4Y7V3Y6U3Y5 138 | punpcklbw xmm0,xmm1 ;V4V2Y8Y4U4U2Y7Y3 V3V1Y6Y2U3U1Y5Y1 139 | movhlps xmm1,xmm0 ;V4V2Y8Y4U4U2Y7Y3 V4V2Y8Y4U4U2Y7Y3 140 | punpcklbw xmm0,xmm1 ;V4V3V2V1Y8Y6Y4Y2 U4U3U2U1Y7Y5Y3Y1 141 | movhlps xmm2,xmm0 ;xxxxxxxx V4V3V2V1Y8Y6Y4Y2 142 | movdqa xmm1,xmm0 143 | psrlq xmm0,32 ;0000V4V3V2V1 0000U4U3U2U1 144 | punpcklbw xmm1,xmm2 ; xxxxxxxx Y8Y7Y6Y5Y4Y3Y2Y1 145 | movd dword ptr[edx+eax],xmm0 146 | movhlps xmm2,xmm0 147 | movq qword ptr[ebx+2*eax],xmm1 148 | movd dword ptr[esi+eax],xmm2 149 | 150 | 151 | suite2_2: 152 | add edi,pitch1 153 | add ebx,pitch2Y 154 | add edx,pitch2UV 155 | add esi,pitch2UV 156 | dec height 157 | jnz yloop_2 158 | 159 | pop esi 160 | pop edi 161 | pop ebx 162 | 163 | ret 164 | 165 | convYUY2to422_SSE2 endp 166 | 167 | 168 | convYUY2to422_AVX proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword 169 | 170 | public convYUY2to422_AVX 171 | 172 | push ebx 173 | push edi 174 | push esi 175 | 176 | mov edi,src 177 | mov ebx,py 178 | mov edx,pu 179 | mov esi,pv 180 | 181 | yloop_2_AVX: 182 | xor eax,eax 183 | mov ecx,width_ 184 | shr ecx,1 185 | jz short suite1_2_AVX 186 | 187 | xloop_2_AVX: 188 | vmovdqa xmm0,XMMWORD ptr[edi+4*eax] ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1 189 | vmovdqa xmm1,XMMWORD ptr[edi+4*eax+16] ;V8Y16U8Y15V7Y14U7Y13 V6Y12U6Y11V5Y10U5Y9 190 | vpunpckhbw xmm2,xmm0,xmm1 ;V8V4Y16Y8U8U4Y15Y7 V7V3Y14Y6U7U3Y13Y5 191 | vpunpcklbw xmm0,xmm0,xmm1 ;V6V2Y12Y4U6U2Y11Y3 V5V1Y10Y2U5U1Y9Y1 192 | vpunpckhbw xmm1,xmm0,xmm2 ;V8V6V4V2Y16Y12Y8Y4 U8U6U4U2Y15Y11Y7Y3 193 | vpunpcklbw xmm0,xmm0,xmm2 ;V7V5V3V1Y14Y10Y6Y2 U7U5U3U1Y13Y9Y5Y1 194 | vpunpckhbw xmm2,xmm0,xmm1 ;V8V7V6V5V4V3V2V1 Y16Y14Y12Y10Y8Y6Y4Y2 195 | vpunpcklbw xmm0,xmm0,xmm1 ;U8U7U6U5U4U3U2U1 Y15Y13Y11Y9Y5Y3Y1 196 | vmovhps qword ptr [edx+eax],xmm0 197 | vpunpcklbw xmm0,xmm0,xmm2 ;Y16Y15Y14Y13Y12Y11Y10Y9Y8Y7Y6Y5Y4Y3Y2Y1 198 | vmovhps qword ptr [esi+eax],xmm2 199 | vmovdqa XMMWORD ptr[ebx+2*eax],xmm0 200 | add eax,8 201 | loop xloop_2_AVX 202 | 203 | suite1_2_AVX: 204 | mov ecx,width_ 205 | and ecx,1 206 | jz short suite2_2_AVX 207 | 208 | vmovdqa xmm0,XMMWORD ptr[edi+4*eax] ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1 209 | vmovhlps xmm1,xmm1,xmm0 ;V4Y8U4Y7V3Y6U3Y5 V4Y8U4Y7V3Y6U3Y5 210 | vpunpcklbw xmm0,xmm0,xmm1 ;V4V2Y8Y4U4U2Y7Y3 V3V1Y6Y2U3U1Y5Y1 211 | vmovhlps xmm1,xmm1,xmm0 ;V4V2Y8Y4U4U2Y7Y3 V4V2Y8Y4U4U2Y7Y3 212 | vpunpcklbw xmm0,xmm0,xmm1 ;V4V3V2V1Y8Y6Y4Y2 U4U3U2U1Y7Y5Y3Y1 213 | vmovhlps xmm2,xmm2,xmm0 ;xxxxxxxx V4V3V2V1Y8Y6Y4Y2 214 | vpunpcklbw xmm1,xmm0,xmm2 ; xxxxxxxx Y8Y7Y6Y5Y4Y3Y2Y1 215 | vpsrlq xmm0,xmm0,32 ;0000V4V3V2V1 0000U4U3U2U1 216 | vmovd dword ptr[edx+eax],xmm0 217 | vmovhlps xmm2,xmm2,xmm0 218 | vmovq qword ptr[ebx+2*eax],xmm1 219 | vmovd dword ptr[esi+eax],xmm2 220 | 221 | 222 | suite2_2_AVX: 223 | add edi,pitch1 224 | add ebx,pitch2Y 225 | add edx,pitch2UV 226 | add esi,pitch2UV 227 | dec height 228 | jnz yloop_2_AVX 229 | 230 | pop esi 231 | pop edi 232 | pop ebx 233 | 234 | ret 235 | 236 | convYUY2to422_AVX endp 237 | 238 | 239 | conv422toYUY2_MMX proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,pitch2:dword,width_:dword,height:dword 240 | 241 | public conv422toYUY2_MMX 242 | 243 | push ebx 244 | push edi 245 | push esi 246 | 247 | mov ebx,py 248 | mov edx,pu 249 | mov esi,pv 250 | mov edi,dst 251 | mov ecx,width_ 252 | shr ecx,1 253 | yloop_3: 254 | xor eax,eax 255 | align 16 256 | xloop_3: 257 | movq mm0,[ebx+eax*2] ;YYYYYYYY 258 | movd mm1,dword ptr[edx+eax] ;0000UUUU 259 | movd mm2,dword ptr[esi+eax] ;0000VVVV 260 | movq mm3,mm0 ;YYYYYYYY 261 | punpcklbw mm1,mm2 ;VUVUVUVU 262 | punpcklbw mm0,mm1 ;VYUYVYUY 263 | punpckhbw mm3,mm1 ;VYUYVYUY 264 | movq [edi+eax*4],mm0 ;store 265 | movq [edi+eax*4+8],mm3 ;store 266 | add eax,4 267 | cmp eax,ecx 268 | jl short xloop_3 269 | add ebx,pitch1Y 270 | add edx,pitch1UV 271 | add esi,pitch1UV 272 | add edi,pitch2 273 | dec height 274 | jnz short yloop_3 275 | emms 276 | 277 | pop esi 278 | pop edi 279 | pop ebx 280 | 281 | ret 282 | 283 | conv422toYUY2_MMX endp 284 | 285 | 286 | conv422toYUY2_SSE2 proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,modulo2:dword,width_:dword,height:dword 287 | 288 | public conv422toYUY2_SSE2 289 | 290 | push ebx 291 | push edi 292 | push esi 293 | 294 | mov ebx,py 295 | mov edx,pu 296 | mov esi,pv 297 | mov edi,dst 298 | 299 | yloop_4: 300 | xor eax,eax 301 | mov ecx,width_ 302 | shr ecx,1 303 | jz suite1 304 | 305 | xloop_4: 306 | movq xmm1,qword ptr[edx+4*eax] ;00000000UUUUUUUU 307 | movq xmm0,qword ptr[esi+4*eax] ;00000000VVVVVVVV 308 | movdqa xmm2,XMMWORD ptr[ebx+8*eax] ;YYYYYYYYYYYYYYYY 309 | punpcklbw xmm1,xmm0 ;VUVUVUVUVUVUVUVU 310 | movdqa xmm3,xmm2 311 | add eax,2 312 | punpcklbw xmm2,xmm1 ;VYUYVYUYVYUYVYUY 313 | punpckhbw xmm3,xmm1 ;VYUYVYUYVYUYVYUY 314 | 315 | movdqa XMMWORD ptr[edi],xmm2 316 | movdqa XMMWORD ptr[edi+16],xmm3 317 | add edi,32 318 | 319 | loop xloop_4 320 | 321 | suite1: 322 | mov ecx,width_ 323 | and ecx,1 324 | jz short suite2 325 | 326 | movd xmm1,dword ptr[edx+4*eax] ;000000000000UUUU 327 | movd xmm0,dword ptr[esi+4*eax] ;000000000000VVVV 328 | movq xmm2,qword ptr[ebx+8*eax] ;00000000YYYYYYYY 329 | punpcklbw xmm1,xmm0 ;00000000VUVUVUVU 330 | punpcklbw xmm2,xmm1 ;VYUYVYUYVYUYVYUY 331 | 332 | movdqa XMMWORD ptr[edi],xmm2 333 | add edi,16 334 | 335 | suite2: 336 | add ebx,pitch1Y 337 | add edx,pitch1UV 338 | add esi,pitch1UV 339 | add edi,modulo2 340 | dec height 341 | jnz short yloop_4 342 | 343 | pop esi 344 | pop edi 345 | pop ebx 346 | 347 | ret 348 | 349 | conv422toYUY2_SSE2 endp 350 | 351 | 352 | conv422toYUY2_AVX proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,modulo2:dword,width_:dword,height:dword 353 | 354 | public conv422toYUY2_AVX 355 | 356 | push ebx 357 | push edi 358 | push esi 359 | 360 | mov ebx,py 361 | mov edx,pu 362 | mov esi,pv 363 | mov edi,dst 364 | 365 | yloop_4_AVX: 366 | xor eax,eax 367 | mov ecx,width_ 368 | shr ecx,1 369 | jz suite1_AVX 370 | 371 | xloop_4_AVX: 372 | vmovq xmm1,qword ptr[edx+4*eax] ;00000000UUUUUUUU 373 | vmovq xmm0,qword ptr[esi+4*eax] ;00000000VVVVVVVV 374 | vmovdqa xmm2,XMMWORD ptr[ebx+8*eax] ;YYYYYYYYYYYYYYYY 375 | vpunpcklbw xmm1,xmm1,xmm0 ;VUVUVUVUVUVUVUVU 376 | add eax,2 377 | vpunpckhbw xmm3,xmm2,xmm1 ;VYUYVYUYVYUYVYUY 378 | vpunpcklbw xmm2,xmm2,xmm1 ;VYUYVYUYVYUYVYUY 379 | 380 | vmovdqa XMMWORD ptr[edi],xmm2 381 | vmovdqa XMMWORD ptr[edi+16],xmm3 382 | add edi,32 383 | 384 | loop xloop_4_AVX 385 | 386 | suite1_AVX: 387 | mov ecx,width_ 388 | and ecx,1 389 | jz short suite2_AVX 390 | 391 | vmovd xmm1,dword ptr[edx+4*eax] ;000000000000UUUU 392 | vmovd xmm0,dword ptr[esi+4*eax] ;000000000000VVVV 393 | vmovq xmm2,qword ptr[ebx+8*eax] ;00000000YYYYYYYY 394 | vpunpcklbw xmm1,xmm1,xmm0 ;00000000VUVUVUVU 395 | vpunpcklbw xmm2,xmm2,xmm1 ;VYUYVYUYVYUYVYUY 396 | 397 | vmovdqa XMMWORD ptr[edi],xmm2 398 | add edi,16 399 | 400 | suite2_AVX: 401 | add ebx,pitch1Y 402 | add edx,pitch1UV 403 | add esi,pitch1UV 404 | add edi,modulo2 405 | dec height 406 | jnz short yloop_4_AVX 407 | 408 | pop esi 409 | pop edi 410 | pop ebx 411 | 412 | ret 413 | 414 | conv422toYUY2_AVX endp 415 | 416 | 417 | end 418 | -------------------------------------------------------------------------------- /Plugins_JPSDR/PlanarFrame_asm_x64.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar 3 | ; to packed conversions, and always gives 16 bit alignment for all 4 | ; planes. Supports Y8/YV12/YV16/YV24/YUY2/RGB24 frames from avisynth, can do any planar 5 | ; format internally. 6 | ; 7 | ; Copyright (C) 2005-2010 Kevin Stone 8 | ; 9 | ; This program is free software; you can redistribute it and/or modify 10 | ; it under the terms of the GNU General Public License as published by 11 | ; the Free Software Foundation; either version 2 of the License, or 12 | ; (at your option) any later version. 13 | ; 14 | ; This program is distributed in the hope that it will be useful, 15 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | ; GNU General Public License for more details. 18 | ; 19 | ; You should have received a copy of the GNU General Public License 20 | ; along with this program; if not, write to the Free Software 21 | ; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22 | ; 23 | 24 | .data 25 | 26 | align 16 27 | 28 | Ymask qword 2 dup(00FF00FF00FF00FFh) 29 | 30 | .code 31 | 32 | 33 | ;convYUY2to422_MMX proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword 34 | ; src = rcx 35 | ; py = rdx 36 | ; pu = r8 37 | ; pv = r9 38 | 39 | convYUY2to422_MMX proc public frame 40 | 41 | pitch1 equ dword ptr[rbp+48] 42 | pitch2Y equ dword ptr[rbp+56] 43 | pitch2UV equ dword ptr[rbp+64] 44 | width_ equ dword ptr[rbp+72] 45 | height equ dword ptr[rbp+80] 46 | 47 | push rbp 48 | .pushreg rbp 49 | mov rbp,rsp 50 | push rbx 51 | .pushreg rbx 52 | push rsi 53 | .pushreg rsi 54 | push rdi 55 | .pushreg rdi 56 | push r12 57 | .pushreg r12 58 | .endprolog 59 | 60 | mov rdi,rcx 61 | mov rbx,rdx 62 | mov rdx,r8 63 | mov rsi,r9 64 | xor rcx,rcx 65 | mov ecx,width_ 66 | shr ecx,1 67 | movq mm5,qword ptr Ymask 68 | 69 | xor r8,r8 70 | mov r8d,height 71 | movsxd r9,pitch1 72 | movsxd r10,pitch2Y 73 | movsxd r11,pitch2UV 74 | mov r12,4 75 | 76 | yloop: 77 | xor rax,rax 78 | align 16 79 | xloop: 80 | movq mm0,[rdi+rax*4] ;VYUYVYUY 81 | movq mm1,[rdi+rax*4+8] ;VYUYVYUY 82 | movq mm2,mm0 ;VYUYVYUY 83 | movq mm3,mm1 ;VYUYVYUY 84 | pand mm0,mm5 ;0Y0Y0Y0Y 85 | psrlw mm2,8 ;0V0U0V0U 86 | pand mm1,mm5 ;0Y0Y0Y0Y 87 | psrlw mm3,8 ;0V0U0V0U 88 | packuswb mm0,mm1 ;YYYYYYYY 89 | packuswb mm2,mm3 ;VUVUVUVU 90 | movq mm4,mm2 ;VUVUVUVU 91 | pand mm2,mm5 ;0U0U0U0U 92 | psrlw mm4,8 ;0V0V0V0V 93 | packuswb mm2,mm2 ;xxxxUUUU 94 | packuswb mm4,mm4 ;xxxxVVVV 95 | movq [rbx+rax*2],mm0 ;store y 96 | movd dword ptr[rdx+rax],mm2 ;store u 97 | movd dword ptr[rsi+rax],mm4 ;store v 98 | add rax,r12 99 | cmp rax,rcx 100 | jl short xloop 101 | add rdi,r9 102 | add rbx,r10 103 | add rdx,r11 104 | add rsi,r11 105 | dec r8 106 | jnz short yloop 107 | emms 108 | 109 | pop r12 110 | pop rdi 111 | pop rsi 112 | pop rbx 113 | pop rbp 114 | 115 | ret 116 | 117 | convYUY2to422_MMX endp 118 | 119 | 120 | ;convYUY2to422_SSE2 proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword 121 | ; src = rcx 122 | ; py = rdx 123 | ; pu = r8 124 | ; pv = r9 125 | 126 | convYUY2to422_SSE2 proc public frame 127 | 128 | pitch1 equ dword ptr[rbp+48] 129 | pitch2Y equ dword ptr[rbp+56] 130 | pitch2UV equ dword ptr[rbp+64] 131 | width_ equ dword ptr[rbp+72] 132 | height equ dword ptr[rbp+80] 133 | 134 | push rbp 135 | .pushreg rbp 136 | mov rbp,rsp 137 | push rbx 138 | .pushreg rbx 139 | push rsi 140 | .pushreg rsi 141 | push rdi 142 | .pushreg rdi 143 | push r12 144 | .pushreg r12 145 | push r13 146 | .pushreg r13 147 | push r14 148 | .pushreg r14 149 | push r15 150 | .pushreg r15 151 | .endprolog 152 | 153 | mov rdi,rcx 154 | mov rbx,rdx 155 | mov rdx,r8 156 | mov rsi,r9 157 | xor rcx,rcx 158 | mov r13d,width_ 159 | 160 | xor r8,r8 161 | mov r8d,height 162 | movsxd r9,pitch1 163 | movsxd r10,pitch2Y 164 | movsxd r11,pitch2UV 165 | mov r12,8 166 | mov r14d,r13d 167 | shr r14d,1 168 | mov r15d,1 169 | 170 | yloop_2: 171 | xor rax,rax 172 | mov ecx,r14d 173 | or ecx,ecx 174 | jz short suite1_2 175 | 176 | xloop_2: 177 | movdqa xmm0,XMMWORD ptr[rdi+4*rax] ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1 178 | movdqa xmm1,XMMWORD ptr[rdi+4*rax+16] ;V8Y16U8Y15V7Y14U7Y13 V6Y12U6Y11V5Y10U5Y9 179 | movdqa xmm2,xmm0 ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1 180 | punpcklbw xmm0,xmm1 ;V6V2Y12Y4U6U2Y11Y3 V5V1Y10Y2U5U1Y9Y1 181 | punpckhbw xmm2,xmm1 ;V8V4Y16Y8U8U4Y15Y7 V7V3Y14Y6U7U3Y13Y5 182 | movdqa xmm1,xmm0 183 | punpcklbw xmm0,xmm2 ;V7V5V3V1Y14Y10Y6Y2 U7U5U3U1Y13Y9Y5Y1 184 | punpckhbw xmm1,xmm2 ;V8V6V4V2Y16Y12Y8Y4 U8U6U4U2Y15Y11Y7Y3 185 | movdqa xmm2,xmm0 186 | punpcklbw xmm0,xmm1 ;U8U7U6U5U4U3U2U1 Y15Y13Y11Y9Y5Y3Y1 187 | punpckhbw xmm2,xmm1 ;V8V7V6V5V4V3V2V1 Y16Y14Y12Y10Y8Y6Y4Y2 188 | movhps qword ptr [rdx+rax],xmm0 189 | punpcklbw xmm0,xmm2 ;Y16Y15Y14Y13Y12Y11Y10Y9Y8Y7Y6Y5Y4Y3Y2Y1 190 | movhps qword ptr [rsi+rax],xmm2 191 | movdqa XMMWORD ptr[rbx+2*rax],xmm0 192 | add rax,r12 193 | loop xloop_2 194 | 195 | suite1_2: 196 | mov ecx,r13d 197 | and ecx,r15d 198 | jz short suite2_2 199 | 200 | movdqa xmm0,XMMWORD ptr[rdi+4*rax] ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1 201 | movhlps xmm1,xmm0 ;V4Y8U4Y7V3Y6U3Y5 V4Y8U4Y7V3Y6U3Y5 202 | punpcklbw xmm0,xmm1 ;V4V2Y8Y4U4U2Y7Y3 V3V1Y6Y2U3U1Y5Y1 203 | movhlps xmm1,xmm0 ;V4V2Y8Y4U4U2Y7Y3 V4V2Y8Y4U4U2Y7Y3 204 | punpcklbw xmm0,xmm1 ;V4V3V2V1Y8Y6Y4Y2 U4U3U2U1Y7Y5Y3Y1 205 | movhlps xmm2,xmm0 ;xxxxxxxx V4V3V2V1Y8Y6Y4Y2 206 | movdqa xmm1,xmm0 207 | psrlq xmm0,32 ;0000V4V3V2V1 0000U4U3U2U1 208 | punpcklbw xmm1,xmm2 ; xxxxxxxx Y8Y7Y6Y5Y4Y3Y2Y1 209 | movd dword ptr[rdx+rax],xmm0 210 | movhlps xmm2,xmm0 211 | movq qword ptr[rbx+2*rax],xmm1 212 | movd dword ptr[rsi+rax],xmm2 213 | 214 | suite2_2: 215 | add rdi,r9 216 | add rbx,r10 217 | add rdx,r11 218 | add rsi,r11 219 | dec r8 220 | jnz yloop_2 221 | 222 | pop r15 223 | pop r14 224 | pop r13 225 | pop r12 226 | pop rdi 227 | pop rsi 228 | pop rbx 229 | pop rbp 230 | 231 | ret 232 | 233 | convYUY2to422_SSE2 endp 234 | 235 | 236 | ;convYUY2to422_AVX proc src:dword,py:dword,pu:dword,pv:dword,pitch1:dword,pitch2Y:dword,pitch2UV:dword,width_:dword,height:dword 237 | ; src = rcx 238 | ; py = rdx 239 | ; pu = r8 240 | ; pv = r9 241 | 242 | convYUY2to422_AVX proc public frame 243 | 244 | pitch1 equ dword ptr[rbp+48] 245 | pitch2Y equ dword ptr[rbp+56] 246 | pitch2UV equ dword ptr[rbp+64] 247 | width_ equ dword ptr[rbp+72] 248 | height equ dword ptr[rbp+80] 249 | 250 | push rbp 251 | .pushreg rbp 252 | mov rbp,rsp 253 | push rbx 254 | .pushreg rbx 255 | push rsi 256 | .pushreg rsi 257 | push rdi 258 | .pushreg rdi 259 | push r12 260 | .pushreg r12 261 | push r13 262 | .pushreg r13 263 | push r14 264 | .pushreg r14 265 | push r15 266 | .pushreg r15 267 | .endprolog 268 | 269 | mov rdi,rcx 270 | mov rbx,rdx 271 | mov rdx,r8 272 | mov rsi,r9 273 | xor rcx,rcx 274 | mov r13d,width_ 275 | 276 | xor r8,r8 277 | mov r8d,height 278 | movsxd r9,pitch1 279 | movsxd r10,pitch2Y 280 | movsxd r11,pitch2UV 281 | mov r12,8 282 | mov r14d,r13d 283 | shr r14d,1 284 | mov r15d,1 285 | 286 | yloop_2_AVX: 287 | xor rax,rax 288 | mov ecx,r14d 289 | or ecx,ecx 290 | jz short suite1_2_AVX 291 | 292 | xloop_2_AVX: 293 | vmovdqa xmm0,XMMWORD ptr[rdi+4*rax] ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1 294 | vmovdqa xmm1,XMMWORD ptr[rdi+4*rax+16] ;V8Y16U8Y15V7Y14U7Y13 V6Y12U6Y11V5Y10U5Y9 295 | vpunpckhbw xmm2,xmm0,xmm1 ;V8V4Y16Y8U8U4Y15Y7 V7V3Y14Y6U7U3Y13Y5 296 | vpunpcklbw xmm0,xmm0,xmm1 ;V6V2Y12Y4U6U2Y11Y3 V5V1Y10Y2U5U1Y9Y1 297 | vpunpckhbw xmm1,xmm0,xmm2 ;V8V6V4V2Y16Y12Y8Y4 U8U6U4U2Y15Y11Y7Y3 298 | vpunpcklbw xmm0,xmm0,xmm2 ;V7V5V3V1Y14Y10Y6Y2 U7U5U3U1Y13Y9Y5Y1 299 | vpunpckhbw xmm2,xmm0,xmm1 ;V8V7V6V5V4V3V2V1 Y16Y14Y12Y10Y8Y6Y4Y2 300 | vpunpcklbw xmm0,xmm0,xmm1 ;U8U7U6U5U4U3U2U1 Y15Y13Y11Y9Y5Y3Y1 301 | vmovhps qword ptr [rdx+rax],xmm0 302 | vpunpcklbw xmm0,xmm0,xmm2 ;Y16Y15Y14Y13Y12Y11Y10Y9Y8Y7Y6Y5Y4Y3Y2Y1 303 | vmovhps qword ptr [rsi+rax],xmm2 304 | vmovdqa XMMWORD ptr[rbx+2*rax],xmm0 305 | add rax,r12 306 | loop xloop_2_AVX 307 | 308 | suite1_2_AVX: 309 | mov ecx,r13d 310 | and ecx,r15d 311 | jz short suite2_2_AVX 312 | 313 | vmovdqa xmm0,XMMWORD ptr[rdi+4*rax] ;V4Y8U4Y7V3Y6U3Y5 V2Y4U2Y3V1Y2U1Y1 314 | vmovhlps xmm1,xmm1,xmm0 ;V4Y8U4Y7V3Y6U3Y5 V4Y8U4Y7V3Y6U3Y5 315 | vpunpcklbw xmm0,xmm0,xmm1 ;V4V2Y8Y4U4U2Y7Y3 V3V1Y6Y2U3U1Y5Y1 316 | vmovhlps xmm1,xmm1,xmm0 ;V4V2Y8Y4U4U2Y7Y3 V4V2Y8Y4U4U2Y7Y3 317 | vpunpcklbw xmm0,xmm0,xmm1 ;V4V3V2V1Y8Y6Y4Y2 U4U3U2U1Y7Y5Y3Y1 318 | vmovhlps xmm2,xmm2,xmm0 ;xxxxxxxx V4V3V2V1Y8Y6Y4Y2 319 | vpunpcklbw xmm1,xmm0,xmm2 ; xxxxxxxx Y8Y7Y6Y5Y4Y3Y2Y1 320 | vpsrlq xmm0,xmm0,32 ;0000V4V3V2V1 0000U4U3U2U1 321 | vmovd dword ptr[rdx+rax],xmm0 322 | vmovhlps xmm2,xmm2,xmm0 323 | vmovq qword ptr[rbx+2*rax],xmm1 324 | vmovd dword ptr[rsi+rax],xmm2 325 | 326 | suite2_2_AVX: 327 | add rdi,r9 328 | add rbx,r10 329 | add rdx,r11 330 | add rsi,r11 331 | dec r8 332 | jnz yloop_2_AVX 333 | 334 | pop r15 335 | pop r14 336 | pop r13 337 | pop r12 338 | pop rdi 339 | pop rsi 340 | pop rbx 341 | pop rbp 342 | 343 | ret 344 | 345 | convYUY2to422_AVX endp 346 | 347 | 348 | ;conv422toYUY2_MMX proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,pitch2:dword,width_:dword,height:dword 349 | ; py = rcx 350 | ; pu = rdx 351 | ; pv = r8 352 | ; dst = r9 353 | 354 | conv422toYUY2_MMX proc public frame 355 | 356 | pitch1Y equ dword ptr[rbp+48] 357 | pitch1UV equ dword ptr[rbp+56] 358 | pitch2 equ dword ptr[rbp+64] 359 | width_ equ dword ptr[rbp+72] 360 | height equ dword ptr[rbp+80] 361 | 362 | push rbp 363 | .pushreg rbp 364 | mov rbp,rsp 365 | push rbx 366 | .pushreg rbx 367 | push rsi 368 | .pushreg rsi 369 | push rdi 370 | .pushreg rdi 371 | push r12 372 | .pushreg r12 373 | .endprolog 374 | 375 | mov rbx,rcx 376 | mov rsi,r8 377 | mov rdi,r9 378 | xor rcx,rcx 379 | mov ecx,width_ 380 | shr ecx,1 381 | 382 | xor r8,r8 383 | mov r8d,height 384 | movsxd r9,pitch1Y 385 | movsxd r10,pitch1UV 386 | movsxd r11,pitch2 387 | mov r12,4 388 | 389 | yloop_3: 390 | xor rax,rax 391 | align 16 392 | xloop_3: 393 | movq mm0,[rbx+rax*2] ;YYYYYYYY 394 | movd mm1,dword ptr[rdx+rax] ;0000UUUU 395 | movd mm2,dword ptr[rsi+rax] ;0000VVVV 396 | movq mm3,mm0 ;YYYYYYYY 397 | punpcklbw mm1,mm2 ;VUVUVUVU 398 | punpcklbw mm0,mm1 ;VYUYVYUY 399 | punpckhbw mm3,mm1 ;VYUYVYUY 400 | movq [rdi+rax*4],mm0 ;store 401 | movq [rdi+rax*4+8],mm3 ;store 402 | add rax,r12 403 | cmp rax,rcx 404 | jl short xloop_3 405 | add rbx,r9 406 | add rdx,r10 407 | add rsi,r10 408 | add rdi,r11 409 | dec r8 410 | jnz short yloop_3 411 | emms 412 | 413 | pop r12 414 | pop rdi 415 | pop rsi 416 | pop rbx 417 | pop rbp 418 | 419 | ret 420 | 421 | conv422toYUY2_MMX endp 422 | 423 | 424 | ;conv422toYUY2_SSE2 proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,pitch2:dword,width_:dword,height:dword 425 | ; py = rcx 426 | ; pu = rdx 427 | ; pv = r8 428 | ; dst = r9 429 | 430 | conv422toYUY2_SSE2 proc public frame 431 | 432 | pitch1Y equ dword ptr[rbp+48] 433 | pitch1UV equ dword ptr[rbp+56] 434 | pitch2 equ dword ptr[rbp+64] 435 | width_ equ dword ptr[rbp+72] 436 | height equ dword ptr[rbp+80] 437 | 438 | push rbp 439 | .pushreg rbp 440 | mov rbp,rsp 441 | push rbx 442 | .pushreg rbx 443 | push rsi 444 | .pushreg rsi 445 | push rdi 446 | .pushreg rdi 447 | push r12 448 | .pushreg r12 449 | push r13 450 | .pushreg r13 451 | push r14 452 | .pushreg r14 453 | push r15 454 | .pushreg r15 455 | .endprolog 456 | 457 | mov rbx,rcx 458 | mov rsi,r8 459 | mov rdi,r9 460 | xor rcx,rcx 461 | mov r15d,width_ 462 | shr ecx,1 463 | 464 | xor r8,r8 465 | mov r8d,height 466 | movsxd r9,pitch1Y 467 | movsxd r10,pitch1UV 468 | movsxd r11,pitch2 469 | mov r12,16 470 | mov r13,32 471 | mov r14,2 472 | 473 | yloop_4: 474 | xor rax,rax 475 | mov ecx,r15d 476 | shr ecx,1 477 | jz short suite1 478 | 479 | xloop_4: 480 | movq xmm1,qword ptr[rdx+4*rax] ;00000000UUUUUUUU 481 | movq xmm0,qword ptr[rsi+4*rax] ;00000000VVVVVVVV 482 | movdqa xmm2,XMMWORD ptr[rbx+8*rax] ;YYYYYYYYYYYYYYYY 483 | punpcklbw xmm1,xmm0 ;VUVUVUVUVUVUVUVU 484 | movdqa xmm3,xmm2 485 | add rax,r14 486 | punpcklbw xmm2,xmm1 ;VYUYVYUYVYUYVYUY 487 | punpckhbw xmm3,xmm1 ;VYUYVYUYVYUYVYUY 488 | 489 | movdqa XMMWORD ptr[rdi],xmm2 ;store 490 | movdqa XMMWORD ptr[rdi+r12],xmm3 ;store 491 | add rdi,r13 492 | loop xloop_4 493 | 494 | suite1: 495 | mov ecx,r15d 496 | and ecx,1 497 | jz short suite2 498 | 499 | movd xmm1,dword ptr[rdx+4*rax] ;000000000000UUUU 500 | movd xmm0,dword ptr[rsi+4*rax] ;000000000000VVVV 501 | movq xmm2,qword ptr[rbx+8*rax] ;00000000YYYYYYY 502 | punpcklbw xmm1,xmm0 ;00000000VUVUVUVU 503 | punpcklbw xmm2,xmm1 ;VYUYVYUYVYUYVYUY 504 | 505 | movdqa XMMWORD ptr[rdi],xmm2 ;store 506 | add rdi,r12 507 | 508 | suite2: 509 | add rbx,r9 510 | add rdx,r10 511 | add rsi,r10 512 | add rdi,r11 513 | dec r8 514 | jnz short yloop_4 515 | 516 | pop r15 517 | pop r14 518 | pop r13 519 | pop r12 520 | pop rdi 521 | pop rsi 522 | pop rbx 523 | pop rbp 524 | 525 | ret 526 | 527 | conv422toYUY2_SSE2 endp 528 | 529 | 530 | ;conv422toYUY2_AVX proc py:dword,pu:dword,pv:dword,dst:dword,pitch1Y:dword,pitch1UV:dword,pitch2:dword,width_:dword,height:dword 531 | ; py = rcx 532 | ; pu = rdx 533 | ; pv = r8 534 | ; dst = r9 535 | 536 | conv422toYUY2_AVX proc public frame 537 | 538 | pitch1Y equ dword ptr[rbp+48] 539 | pitch1UV equ dword ptr[rbp+56] 540 | pitch2 equ dword ptr[rbp+64] 541 | width_ equ dword ptr[rbp+72] 542 | height equ dword ptr[rbp+80] 543 | 544 | push rbp 545 | .pushreg rbp 546 | mov rbp,rsp 547 | push rbx 548 | .pushreg rbx 549 | push rsi 550 | .pushreg rsi 551 | push rdi 552 | .pushreg rdi 553 | push r12 554 | .pushreg r12 555 | push r13 556 | .pushreg r13 557 | push r14 558 | .pushreg r14 559 | push r15 560 | .pushreg r15 561 | .endprolog 562 | 563 | mov rbx,rcx 564 | mov rsi,r8 565 | mov rdi,r9 566 | xor rcx,rcx 567 | mov r15d,width_ 568 | shr ecx,1 569 | 570 | xor r8,r8 571 | mov r8d,height 572 | movsxd r9,pitch1Y 573 | movsxd r10,pitch1UV 574 | movsxd r11,pitch2 575 | mov r12,16 576 | mov r13,32 577 | mov r14,2 578 | 579 | yloop_4_AVX: 580 | xor rax,rax 581 | mov ecx,r15d 582 | shr ecx,1 583 | jz short suite1_AVX 584 | 585 | xloop_4_AVX: 586 | vmovq xmm1,qword ptr[rdx+4*rax] ;00000000UUUUUUUU 587 | vmovq xmm0,qword ptr[rsi+4*rax] ;00000000VVVVVVVV 588 | vmovdqa xmm2,XMMWORD ptr[rbx+8*rax] ;YYYYYYYYYYYYYYYY 589 | vpunpcklbw xmm1,xmm1,xmm0 ;VUVUVUVUVUVUVUVU 590 | add rax,r14 591 | vpunpckhbw xmm3,xmm2,xmm1 ;VYUYVYUYVYUYVYUY 592 | vpunpcklbw xmm2,xmm2,xmm1 ;VYUYVYUYVYUYVYUY 593 | 594 | vmovdqa XMMWORD ptr[rdi],xmm2 ;store 595 | vmovdqa XMMWORD ptr[rdi+r12],xmm3 ;store 596 | add rdi,r13 597 | loop xloop_4_AVX 598 | 599 | suite1_AVX: 600 | mov ecx,r15d 601 | and ecx,1 602 | jz short suite2_AVX 603 | 604 | vmovd xmm1,dword ptr[rdx+4*rax] ;000000000000UUUU 605 | vmovd xmm0,dword ptr[rsi+4*rax] ;000000000000VVVV 606 | vmovq xmm2,qword ptr[rbx+8*rax] ;00000000YYYYYYY 607 | vpunpcklbw xmm1,xmm1,xmm0 ;00000000VUVUVUVU 608 | vpunpcklbw xmm2,xmm2,xmm1 ;VYUYVYUYVYUYVYUY 609 | 610 | vmovdqa XMMWORD ptr[rdi],xmm2 ;store 611 | add rdi,r12 612 | 613 | suite2_AVX: 614 | add rbx,r9 615 | add rdx,r10 616 | add rsi,r10 617 | add rdi,r11 618 | dec r8 619 | jnz short yloop_4_AVX 620 | 621 | pop r15 622 | pop r14 623 | pop r13 624 | pop r12 625 | pop rdi 626 | pop rsi 627 | pop rbx 628 | pop rbp 629 | 630 | ret 631 | 632 | conv422toYUY2_AVX endp 633 | 634 | 635 | end 636 | -------------------------------------------------------------------------------- /Plugins_JPSDR/Plugins_JPSDR.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Fichiers sources 20 | 21 | 22 | Fichiers sources 23 | 24 | 25 | Fichiers sources 26 | 27 | 28 | Fichiers sources 29 | 30 | 31 | Fichiers sources 32 | 33 | 34 | Fichiers sources 35 | 36 | 37 | Fichiers sources 38 | 39 | 40 | Fichiers sources 41 | 42 | 43 | Fichiers sources 44 | 45 | 46 | Fichiers sources 47 | 48 | 49 | Fichiers sources 50 | 51 | 52 | Fichiers sources 53 | 54 | 55 | Fichiers sources 56 | 57 | 58 | Fichiers sources 59 | 60 | 61 | 62 | 63 | Fichiers d%27en-tête 64 | 65 | 66 | Fichiers d%27en-tête 67 | 68 | 69 | Fichiers d%27en-tête 70 | 71 | 72 | Fichiers d%27en-tête 73 | 74 | 75 | Fichiers d%27en-tête 76 | 77 | 78 | Fichiers d%27en-tête 79 | 80 | 81 | Fichiers d%27en-tête 82 | 83 | 84 | Fichiers d%27en-tête 85 | 86 | 87 | Fichiers d%27en-tête 88 | 89 | 90 | Fichiers d%27en-tête 91 | 92 | 93 | Fichiers d%27en-tête 94 | 95 | 96 | Fichiers d%27en-tête 97 | 98 | 99 | Fichiers d%27en-tête 100 | 101 | 102 | Fichiers d%27en-tête 103 | 104 | 105 | Fichiers d%27en-tête 106 | 107 | 108 | Fichiers d%27en-tête 109 | 110 | 111 | 112 | 113 | Fichiers sources 114 | 115 | 116 | Fichiers sources 117 | 118 | 119 | Fichiers sources 120 | 121 | 122 | Fichiers sources 123 | 124 | 125 | Fichiers sources 126 | 127 | 128 | Fichiers sources 129 | 130 | 131 | Fichiers sources 132 | 133 | 134 | Fichiers sources 135 | 136 | 137 | Fichiers sources 138 | 139 | 140 | Fichiers sources 141 | 142 | 143 | Fichiers sources 144 | 145 | 146 | Fichiers sources 147 | 148 | 149 | Fichiers sources 150 | 151 | 152 | Fichiers sources 153 | 154 | 155 | 156 | 157 | Fichiers sources 158 | 159 | 160 | Fichiers sources 161 | 162 | 163 | Fichiers sources 164 | 165 | 166 | Fichiers sources 167 | 168 | 169 | 170 | 171 | Fichiers de ressources 172 | 173 | 174 | 175 | 176 | Fichiers de ressources 177 | 178 | 179 | -------------------------------------------------------------------------------- /Plugins_JPSDR/Plugins_JPSDR.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | -------------------------------------------------------------------------------- /Plugins_JPSDR/ThreadPool.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jpsdr/plugins_JPSDR/7826b045cfc28a0738088ec03a5e6f2edd44c9f0/Plugins_JPSDR/ThreadPool.cpp -------------------------------------------------------------------------------- /Plugins_JPSDR/ThreadPool.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Threadpool 3 | * 4 | * Create and manage a threadpool. 5 | * Copyright (C) 2016 JPSDR 6 | * 7 | * Threadpool is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * Threadpool is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | #ifndef __ThreadPool_H__ 24 | #define __ThreadPool_H__ 25 | 26 | #include 27 | 28 | #include "./ThreadPoolDef.h" 29 | 30 | #define THREADPOOL_VERSION "ThreadPool 1.4.4" 31 | 32 | #define MAX_PHYSICAL_CORES 64 33 | 34 | typedef struct _MT_Data_Thread 35 | { 36 | Public_MT_Data_Thread *MTData; 37 | uint8_t f_process,thread_Id; 38 | HANDLE nextJob,jobFinished; 39 | } MT_Data_Thread; 40 | 41 | 42 | typedef struct _Arch_CPU 43 | { 44 | uint8_t NbPhysCore,NbLogicCPU; 45 | uint8_t NbHT[MAX_PHYSICAL_CORES]; 46 | ULONG_PTR ProcMask[MAX_PHYSICAL_CORES]; 47 | ULONG_PTR FullMask; 48 | } Arch_CPU; 49 | 50 | 51 | class ThreadPool 52 | { 53 | public : 54 | ThreadPool(void); 55 | virtual ~ThreadPool(); 56 | 57 | protected : 58 | 59 | Arch_CPU CPU; 60 | 61 | public : 62 | 63 | uint8_t GetThreadNumber(uint8_t thread_number,bool logical); 64 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore, 65 | bool SetAffinity,bool sleep,ThreadLevelName priority); 66 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore, 67 | bool SetAffinity,bool sleep) 68 | {return(AllocateThreads(thread_number,offset_core,offset_ht,UseMaxPhysCore,SetAffinity,sleep,NormalThreadLevel));} 69 | bool DeAllocateThreads(void); 70 | bool ChangeThreadsAffinity(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity); 71 | bool ChangeThreadsLevel(ThreadLevelName priority); 72 | bool RequestThreadPool(uint8_t thread_number,Public_MT_Data_Thread *Data,ThreadLevelName priority); 73 | bool RequestThreadPool(uint8_t thread_number,Public_MT_Data_Thread *Data) 74 | {return(RequestThreadPool(thread_number,Data,NoneThreadLevel));} 75 | bool ReleaseThreadPool(bool sleep); 76 | bool StartThreads(void); 77 | bool WaitThreadsEnd(void); 78 | bool GetThreadPoolStatus(void) {return(Status_Ok);} 79 | uint8_t GetCurrentThreadAllocated(void) {return(CurrentThreadsAllocated);} 80 | uint8_t GetCurrentThreadUsed(void) {return(CurrentThreadsUsed);} 81 | uint8_t GetLogicalCPUNumber(void) {return(CPU.NbLogicCPU);} 82 | uint8_t GetPhysicalCoreNumber(void) {return(CPU.NbPhysCore);} 83 | 84 | protected : 85 | 86 | MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 87 | HANDLE nextJob[MAX_MT_THREADS],jobFinished[MAX_MT_THREADS]; 88 | HANDLE thds[MAX_MT_THREADS]; 89 | DWORD tids[MAX_MT_THREADS]; 90 | ULONG_PTR ThreadMask[MAX_MT_THREADS]; 91 | bool ThreadSleep[MAX_MT_THREADS]; 92 | ThreadLevelName nPriority; 93 | 94 | bool Status_Ok; 95 | uint8_t TotalThreadsRequested,CurrentThreadsAllocated,CurrentThreadsUsed; 96 | 97 | void FreeThreadPool(void); 98 | void DestroyThreadPool(void); 99 | void CreateThreadPool(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity, 100 | bool sleep,ThreadLevelName priority); 101 | 102 | private : 103 | 104 | static DWORD WINAPI StaticThreadpool(LPVOID lpParam); 105 | 106 | ThreadPool (const ThreadPool &other); 107 | ThreadPool& operator = (const ThreadPool &other); 108 | bool operator == (const ThreadPool &other) const; 109 | bool operator != (const ThreadPool &other) const; 110 | }; 111 | 112 | #endif // __ThreadPool_H__ 113 | -------------------------------------------------------------------------------- /Plugins_JPSDR/ThreadPoolDef.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Threadpool 3 | * 4 | * Create and manage a threadpool. 5 | * Copyright (C) 2016 JPSDR 6 | * 7 | * Threadpool is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * Threadpool is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | #ifndef __ThreadPoolDef_H__ 24 | #define __ThreadPoolDef_H__ 25 | 26 | #include 27 | 28 | #define MAX_MT_THREADS 128 // Maximum possible 255 29 | #define MAX_THREAD_POOL 64 // Maximum possible 127 30 | 31 | typedef void (*ThreadPoolFunction)(void *ptr); 32 | 33 | enum ThreadLevelName {NoneThreadLevel,IdleThreadLevel,LowestThreadLevel,BelowThreadLevel, 34 | NormalThreadLevel,AboveThreadLevel,HighestThreadLevel,CriticalThreadLevel}; 35 | 36 | typedef struct _Public_MT_Data_Thread 37 | { 38 | ThreadPoolFunction pFunc; 39 | void *pClass; 40 | uint8_t f_process,thread_Id; 41 | void *pData; 42 | } Public_MT_Data_Thread; 43 | 44 | 45 | #endif // __ThreadPoolDef_H__ 46 | -------------------------------------------------------------------------------- /Plugins_JPSDR/ThreadPoolInterface.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ThreadpoolInterface 3 | * 4 | * Allow to use the threadpool, kind of API. 5 | * Copyright (C) 2017 JPSDR 6 | * 7 | * ThreadpoolInterface is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * ThreadpoolInterface is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | #ifndef __ThreadPoolInterface_H__ 24 | #define __ThreadPoolInterface_H__ 25 | 26 | #include 27 | #include 28 | 29 | #include "./ThreadPoolDef.h" 30 | 31 | #define THREADPOOLINTERFACE_VERSION "ThreadPoolInterface 1.12.0" 32 | 33 | class ThreadPoolInterface; 34 | 35 | class UserData 36 | { 37 | friend ThreadPoolInterface; 38 | 39 | public : 40 | 41 | UserData(void); 42 | virtual ~UserData(void); 43 | 44 | protected : 45 | 46 | uint32_t UserId; 47 | bool AllowSeveral; 48 | bool AllowWaiting; 49 | bool AllowTimeOut; 50 | bool AllowRetryMax; 51 | DWORD TimeOut; 52 | uint8_t RetryMax; 53 | int8_t NbrePool; 54 | int8_t UsedPool[MAX_THREAD_POOL]; 55 | }; 56 | 57 | 58 | class ThreadPoolInterface 59 | { 60 | public : 61 | 62 | virtual ~ThreadPoolInterface(void); 63 | static ThreadPoolInterface* Init(uint8_t num); 64 | 65 | uint8_t GetThreadNumber(uint8_t thread_number,bool logical); 66 | int16_t AddPool(uint8_t num); 67 | bool CreatePool(uint8_t num); 68 | bool DeletePool(uint8_t num); 69 | bool RemovePool(uint8_t num); 70 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore, 71 | bool SetAffinity,bool sleep,ThreadLevelName priority,int8_t nPool); 72 | bool AllocateThreads(uint8_t thread_number,uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore, 73 | bool SetAffinity,bool sleep,int8_t nPool) 74 | {return(AllocateThreads(thread_number,offset_core,offset_ht,UseMaxPhysCore,SetAffinity,sleep, 75 | NormalThreadLevel,nPool));} 76 | bool GetUserId(uint32_t &UserId); 77 | bool RemoveUserId(uint32_t UserId); 78 | bool ChangeThreadsAffinity(uint8_t offset_core,uint8_t offset_ht,bool UseMaxPhysCore,bool SetAffinity,int8_t nPool); 79 | bool ChangeThreadsLevel(ThreadLevelName priority,int8_t nPool); 80 | bool DeAllocateUserThreads(uint32_t UserId,bool check); 81 | bool DeAllocatePoolThreads(uint8_t nPool,bool check); 82 | bool DeAllocateAllThreads(bool check); 83 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data, 84 | ThreadLevelName priority,int8_t nPool,bool Exclusive); 85 | bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data, 86 | ThreadLevelName priority,int8_t &nPool,bool Exclusive); 87 | bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data); 88 | bool RequestThreadPool(uint32_t UserId,int8_t &idxPool,uint8_t thread_number,Public_MT_Data_Thread *Data, 89 | ThreadLevelName priority); 90 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data, 91 | int8_t nPool,bool Exclusive) 92 | {return(RequestThreadPool(UserId,thread_number,Data,NoneThreadLevel,nPool,Exclusive));} 93 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data) 94 | {return(RequestThreadPool(UserId,thread_number,Data,NoneThreadLevel,-1,false));} 95 | bool RequestThreadPool(uint32_t UserId,uint8_t thread_number,Public_MT_Data_Thread *Data, 96 | ThreadLevelName priority) 97 | {return(RequestThreadPool(UserId,thread_number,Data,priority,-1,false));} 98 | bool ReleaseThreadPool(uint32_t UserId,bool sleep); 99 | bool ReleaseThreadPool(uint32_t UserId,bool sleep,int8_t idxPool); 100 | bool StartThreads(uint32_t UserId); 101 | bool StartThreads(uint32_t UserId,int8_t idxPool); 102 | bool WaitThreadsEnd(uint32_t UserId); 103 | bool WaitThreadsEnd(uint32_t UserId,int8_t idxPool); 104 | bool GetThreadPoolStatus(uint32_t UserId,int8_t idxPool,int8_t nPool); 105 | uint8_t GetCurrentThreadAllocated(uint32_t UserId,int8_t idxPool,int8_t nPool); 106 | uint8_t GetCurrentThreadUsed(uint32_t UserId,int8_t idxPool,int8_t nPool); 107 | bool EnableAllowSeveral(uint32_t UserId); 108 | bool DisableAllowSeveral(uint32_t UserId); 109 | bool IsAllowedSeveral(uint32_t UserId); 110 | bool EnableWaitonRequest(uint32_t UserId); 111 | bool DisableWaitonRequest(uint32_t UserId); 112 | bool EnableTimeOutonRequest(uint32_t UserId); 113 | bool DisableTimeOutonRequest(uint32_t UserId); 114 | bool EnableRetryMaxonRequest(uint32_t UserId); 115 | bool DisableRetryMaxonRequest(uint32_t UserId); 116 | bool ConfigureTimeOutValue(uint32_t UserId, DWORD dwMilliseconds); 117 | bool ConfigureRetryMaxValue(uint32_t UserId, uint8_t NbreMax); 118 | int8_t GetPoolAllocated(uint32_t UserId); 119 | int8_t GetPoolNumber(uint32_t UserId,int8_t idxPool); 120 | int8_t GetPoolIndex(uint32_t UserId,int8_t nPool); 121 | uint8_t GetLogicalCPUNumber(void); 122 | uint8_t GetPhysicalCoreNumber(void); 123 | 124 | protected : 125 | 126 | bool Status_Ok; 127 | uint8_t NbrePool; 128 | 129 | public : 130 | 131 | bool GetThreadPoolInterfaceStatus(void) {return(Status_Ok);} 132 | int8_t GetCurrentPoolCreated(void) {return((Status_Ok) ? NbrePool:-1);} 133 | 134 | protected : 135 | 136 | ThreadPoolInterface(void); 137 | 138 | CRITICAL_SECTION CriticalSection; 139 | HANDLE ghMutexResources; 140 | BOOL CSectionOk; 141 | HANDLE JobsEnded[MAX_THREAD_POOL],ThreadPoolFree[MAX_THREAD_POOL]; 142 | std::vector TabId; 143 | HANDLE EndExclusive; 144 | bool Error_Occured; 145 | 146 | bool ThreadPoolRequested[MAX_THREAD_POOL],JobsRunning[MAX_THREAD_POOL]; 147 | bool ThreadPoolReleased[MAX_THREAD_POOL],ThreadWaitEnd[MAX_THREAD_POOL]; 148 | bool ThreadPoolWaitFree[MAX_THREAD_POOL]; 149 | uint32_t ThreadPoolUserId[MAX_THREAD_POOL]; 150 | bool ExclusiveMode; 151 | uint8_t NbrePoolEvent; 152 | 153 | bool CreatePoolEvent(uint8_t num); 154 | void FreeData(void); 155 | void FreePool(void); 156 | void FreePool(int8_t nPool); 157 | bool EnterCS(void); 158 | void LeaveCS(void); 159 | bool GetMutex(void); 160 | void FreeMutex(void); 161 | int32_t GetUserIdIndex(uint32_t UserId); 162 | bool ReleaseThreadPoolCore(uint32_t UserId,int32_t index,bool sleep,int8_t nPool,int8_t idxPool); 163 | bool StartThreadsCore(int8_t nPool); 164 | bool WaitThreadsEndCore(uint32_t UserId,int8_t nPool,int8_t idxPool); 165 | 166 | private : 167 | 168 | ThreadPoolInterface (const ThreadPoolInterface &other); 169 | ThreadPoolInterface& operator = (const ThreadPoolInterface &other); 170 | bool operator == (const ThreadPoolInterface &other) const; 171 | bool operator != (const ThreadPoolInterface &other) const; 172 | }; 173 | 174 | #endif // __ThreadPoolInterface_H__ 175 | -------------------------------------------------------------------------------- /Plugins_JPSDR/TransferFunctions.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TransferFunctions 3 | * 4 | * OOTF,EOTF,OETF, etc... HDR and SDR core functions. 5 | * Copyright (C) 2019 JPSDR 6 | * 7 | * HDRTools is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * HDRTools is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | #include 24 | 25 | static const double m1=0.1593017578125,im1=1.0/m1; 26 | static const double m2=78.84375,im2=1.0/m2; 27 | static const double c1=0.8359375; 28 | static const double c2=18.8515625; 29 | static const double c3=18.6875; 30 | 31 | static const double alpha=1.09929682680944,alpham1=alpha-1.0,ialpha=1.0/alpha; 32 | static const double beta=0.018053968510807; 33 | static const double alpha2=267.84,beta2=0.0003024,ialpha2=1.0/alpha2; 34 | static const double coeff_i12=1.0/12.0,coeff_i3=1.0/3.0,coeff_i45=1.0/0.45; 35 | static const double coeff_i24=1.0/2.404,coeff_i59=1.0/59.5208; 36 | static const double a=0.17883277; 37 | static const double b=1.0-4.0*a,c=0.5-a*log(4.0*a),ia=1.0/a; 38 | static double lm1=1.2-1.0,ilm1=(1.0/1.2)-1.0; 39 | 40 | void Set_l_HLG(double Lw) 41 | { 42 | lm1=(1.2+0.42*log10(Lw*0.001))-1.0; 43 | ilm1=(1.0/(1.2+0.42*log10(Lw*0.001)))-1.0; 44 | } 45 | 46 | double HLG_OETF(double x) 47 | { 48 | if (x<=coeff_i12) return(sqrt(3.0*x)); 49 | else return(a*log(12.0*x-b)+c); 50 | } 51 | 52 | double HLG_inv_OETF(double x) 53 | { 54 | if (x<=0.5) return(x*x*coeff_i3); 55 | else return((exp((x-c)*ia)+b)*coeff_i12); 56 | } 57 | 58 | double HLG_OOTF(double x) 59 | { 60 | return(x*pow(x,lm1)); 61 | } 62 | 63 | double HLG_inv_OOTF(double x) 64 | { 65 | return(x*pow(x,ilm1)); 66 | } 67 | 68 | double inv_OETF(double x) 69 | { 70 | if (x<(beta*4.5)) return(x*coeff_i45); 71 | else return(pow(((x+alpham1))*ialpha,coeff_i45)); 72 | } 73 | 74 | double OETF(double x) 75 | { 76 | if (x 53 | #include 54 | #include 55 | #include "config.h" 56 | 57 | #if defined(MSVC) && _MSC_VER<1400 58 | // needed for VS2013, otherwise C++11 'alignas' works 59 | #define avs_alignas(x) __declspec(align(x)) 60 | #else 61 | // assumes C++11 support 62 | #define avs_alignas(x) alignas(x) 63 | #endif 64 | 65 | template 66 | static bool IsPtrAligned(T* ptr, size_t align) 67 | { 68 | assert(IS_POWER2(align)); 69 | return (bool)IS_PTR_ALIGNED(ptr, align); 70 | } 71 | 72 | template 73 | static T AlignNumber(T n, T align) 74 | { 75 | assert(IS_POWER2(align)); 76 | return ALIGN_NUMBER(n, align); 77 | } 78 | 79 | template 80 | static T* AlignPointer(T* ptr, size_t align) 81 | { 82 | assert(IS_POWER2(align)); 83 | return (T*)ALIGN_POINTER(ptr, align); 84 | } 85 | 86 | extern "C" 87 | { 88 | #else 89 | #include 90 | #endif // __cplusplus 91 | 92 | // Returns a new buffer that is at least the size "nbytes". 93 | // The buffer will be aligned to "align" bytes. 94 | // Returns NULL on error. On successful allocation, 95 | // the returned buffer must be freed using "avs_free". 96 | inline void* avs_malloc(size_t nbytes, size_t align) 97 | { 98 | if (!IS_POWER2(align)) 99 | return NULL; 100 | 101 | size_t offset = sizeof(void*) + align - 1; 102 | 103 | void *orig = malloc(nbytes + offset); 104 | if (orig == NULL) 105 | return NULL; 106 | 107 | void **aligned = (void**)(((uintptr_t)orig + (uintptr_t)offset) & (~(uintptr_t)(align-1))); 108 | aligned[-1] = orig; 109 | return aligned; 110 | } 111 | 112 | // Buffers allocated using "avs_malloc" must be freed 113 | // using "avs_free" instead of "free". 114 | inline void avs_free(void *ptr) 115 | { 116 | // Mirroring free()'s semantic requires us to accept NULLs 117 | if (ptr == NULL) 118 | return; 119 | 120 | free(((void**)ptr)[-1]); 121 | } 122 | 123 | #ifdef __cplusplus 124 | } // extern "C" 125 | 126 | // The point of these undef's is to force using the template functions 127 | // if we are in C++ mode. For C, the user can rely only on the macros. 128 | #undef IS_PTR_ALIGNED 129 | #undef ALIGN_NUMBER 130 | #undef ALIGN_POINTER 131 | 132 | #endif // __cplusplus 133 | 134 | #endif //AVS_ALIGNMENT_H 135 | -------------------------------------------------------------------------------- /Plugins_JPSDR/avs/capi.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_CAPI_H 34 | #define AVS_CAPI_H 35 | 36 | #include "config.h" 37 | 38 | #ifdef AVS_POSIX 39 | // this is also defined in avs/posix.h 40 | #ifndef AVS_HAIKU 41 | #define __declspec(x) 42 | #endif 43 | #endif 44 | 45 | #ifdef __cplusplus 46 | # define EXTERN_C extern "C" 47 | #else 48 | # define EXTERN_C 49 | #endif 50 | 51 | #ifdef AVS_WINDOWS 52 | #ifdef BUILDING_AVSCORE 53 | # if defined(GCC) && defined(X86_32) 54 | # define AVSC_CC 55 | # else // MSVC builds and 64-bit GCC 56 | # ifndef AVSC_USE_STDCALL 57 | # define AVSC_CC __cdecl 58 | # else 59 | # define AVSC_CC __stdcall 60 | # endif 61 | # endif 62 | #else // needed for programs that talk to AviSynth+ 63 | # ifndef AVSC_WIN32_GCC32 // see comment below 64 | # ifndef AVSC_USE_STDCALL 65 | # define AVSC_CC __cdecl 66 | # else 67 | # define AVSC_CC __stdcall 68 | # endif 69 | # else 70 | # define AVSC_CC 71 | # endif 72 | #endif 73 | # else 74 | # define AVSC_CC 75 | #endif 76 | 77 | // On 64-bit Windows, there's only one calling convention, 78 | // so there is no difference between MSVC and GCC. On 32-bit, 79 | // this isn't true. The convention that GCC needs to use to 80 | // even build AviSynth+ as 32-bit makes anything that uses 81 | // it incompatible with 32-bit MSVC builds of AviSynth+. 82 | // The AVSC_WIN32_GCC32 define is meant to provide a user 83 | // switchable way to make builds of FFmpeg to test 32-bit 84 | // GCC builds of AviSynth+ without having to screw around 85 | // with alternate headers, while still default to the usual 86 | // situation of using 32-bit MSVC builds of AviSynth+. 87 | 88 | // Hopefully, this situation will eventually be resolved 89 | // and a broadly compatible solution will arise so the 90 | // same 32-bit FFmpeg build can handle either MSVC or GCC 91 | // builds of AviSynth+. 92 | 93 | #define AVSC_INLINE static __inline 94 | 95 | #ifdef BUILDING_AVSCORE 96 | #ifdef AVS_WINDOWS 97 | # ifndef AVS_STATIC_LIB 98 | # define AVSC_EXPORT __declspec(dllexport) 99 | # else 100 | # define AVSC_EXPORT 101 | # endif 102 | # define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name 103 | #else 104 | # define AVSC_EXPORT EXTERN_C 105 | # define AVSC_API(ret, name) EXTERN_C ret AVSC_CC name 106 | #endif 107 | #else 108 | # define AVSC_EXPORT EXTERN_C __declspec(dllexport) 109 | # ifndef AVS_STATIC_LIB 110 | # define AVSC_IMPORT __declspec(dllimport) 111 | # else 112 | # define AVSC_IMPORT 113 | # endif 114 | # ifndef AVSC_NO_DECLSPEC 115 | # define AVSC_API(ret, name) EXTERN_C AVSC_IMPORT ret AVSC_CC name 116 | # else 117 | # define AVSC_API(ret, name) typedef ret (AVSC_CC *name##_func) 118 | # endif 119 | #endif 120 | 121 | #endif //AVS_CAPI_H 122 | -------------------------------------------------------------------------------- /Plugins_JPSDR/avs/config.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_CONFIG_H 34 | #define AVS_CONFIG_H 35 | 36 | // Undefine this to get cdecl calling convention 37 | #define AVSC_USE_STDCALL 1 38 | 39 | // NOTE TO PLUGIN AUTHORS: 40 | // Because FRAME_ALIGN can be substantially higher than the alignment 41 | // a plugin actually needs, plugins should not use FRAME_ALIGN to check for 42 | // alignment. They should always request the exact alignment value they need. 43 | // This is to make sure that plugins work over the widest range of AviSynth 44 | // builds possible. 45 | #define FRAME_ALIGN 64 46 | 47 | #if defined(_M_AMD64) || defined(__x86_64) 48 | # define X86_64 49 | #elif defined(_M_IX86) || defined(__i386__) 50 | # define X86_32 51 | // VS2017 introduced _M_ARM64 52 | #elif defined(_M_ARM64) || defined(__aarch64__) 53 | # define ARM64 54 | #elif defined(_M_ARM) || defined(__arm__) 55 | # define ARM32 56 | #elif defined(__PPC64__) 57 | # define PPC64 58 | #elif defined(_M_PPC) || defined(__PPC__) || defined(__POWERPC__) 59 | # define PPC32 60 | #elif defined(__riscv) 61 | # define RISCV 62 | #elif defined(__loongarch__) 63 | # define LOONGARCH 64 | #elif defined(__sparc_v9__) 65 | # define SPARC 66 | #elif defined(__mips__) 67 | # define MIPS 68 | #else 69 | # error Unsupported CPU architecture. 70 | #endif 71 | 72 | // VC++ LLVM-Clang-cl MinGW-Gnu 73 | // MSVC x x 74 | // MSVC_PURE x 75 | // CLANG x 76 | // GCC x 77 | 78 | #if defined(__clang__) 79 | // Check clang first. clang-cl also defines __MSC_VER 80 | // We set MSVC because they are mostly compatible 81 | # define CLANG 82 | #if defined(_MSC_VER) 83 | # define MSVC 84 | # define AVS_FORCEINLINE __attribute__((always_inline)) 85 | #else 86 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline 87 | #endif 88 | #elif defined(_MSC_VER) 89 | # define MSVC 90 | # define MSVC_PURE 91 | # define AVS_FORCEINLINE __forceinline 92 | #elif defined(__GNUC__) 93 | # define GCC 94 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline 95 | #elif defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) 96 | // Intel C++ Compilers with MSVC command line interface will not appear here rather at _MSC_VER 97 | # define AVS_FORCEINLINE inline 98 | # undef __forceinline 99 | # define __forceinline inline 100 | #else 101 | # error Unsupported compiler. 102 | # define AVS_FORCEINLINE inline 103 | # undef __forceinline 104 | # define __forceinline inline 105 | #endif 106 | 107 | #if defined(_WIN32) 108 | # define AVS_WINDOWS 109 | #elif defined(__linux__) 110 | # define AVS_LINUX 111 | # define AVS_POSIX 112 | #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) 113 | # define AVS_BSD 114 | # define AVS_POSIX 115 | #elif defined(__APPLE__) 116 | # define AVS_MACOS 117 | # define AVS_POSIX 118 | #elif defined(__HAIKU__) 119 | # define AVS_HAIKU 120 | # define AVS_POSIX 121 | #else 122 | # error Operating system unsupported. 123 | #endif 124 | 125 | #if defined(AVS_WINDOWS) 126 | # if defined(X86_32) || defined(X86_64) 127 | # define AVS_WINDOWS_X86 128 | # elif defined(ARM64) || defined(ARM32) 129 | # define AVS_WINDOWS_ARM 130 | # endif 131 | #endif 132 | 133 | #if defined(MSVC) && !defined(AVS_WINDOWS_X86) 134 | # error Unsupported combination of compiler, operating system, and machine architecture. 135 | #endif 136 | 137 | // useful warnings disabler macros for supported compilers 138 | 139 | #if defined(_MSC_VER) 140 | #define DISABLE_WARNING_PUSH __pragma(warning( push )) 141 | #define DISABLE_WARNING_POP __pragma(warning( pop )) 142 | #define DISABLE_WARNING(warningNumber) __pragma(warning( disable : warningNumber )) 143 | 144 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(4101) 145 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(4505) 146 | // other warnings you want to deactivate... 147 | 148 | #elif defined(__GNUC__) || defined(__clang__) 149 | #define DO_PRAGMA(X) _Pragma(#X) 150 | #define DISABLE_WARNING_PUSH DO_PRAGMA(GCC diagnostic push) 151 | #define DISABLE_WARNING_POP DO_PRAGMA(GCC diagnostic pop) 152 | #define DISABLE_WARNING(warningName) DO_PRAGMA(GCC diagnostic ignored #warningName) 153 | 154 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(-Wunused-variable) 155 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(-Wunused-function) 156 | // other warnings you want to deactivate... 157 | 158 | #else 159 | #define DISABLE_WARNING_PUSH 160 | #define DISABLE_WARNING_POP 161 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE 162 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION 163 | // other warnings you want to deactivate... 164 | 165 | #endif 166 | 167 | #if defined(AVS_WINDOWS) && defined(_USING_V110_SDK71_) 168 | // Windows XP does not have proper initialization for 169 | // thread local variables. 170 | // Use workaround instead __declspec(thread) 171 | #define XP_TLS 172 | #endif 173 | 174 | #ifndef MSVC 175 | // GCC and Clang can be used on big endian systems, MSVC can't. 176 | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 177 | # define AVS_ENDIANNESS "little" 178 | # elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 179 | # define AVS_ENDIANNESS "big" 180 | # else 181 | # define AVS_ENDIANNESS "middle" 182 | # endif 183 | #else 184 | #define AVS_ENDIANNESS "little" 185 | #endif 186 | 187 | #endif //AVS_CONFIG_H 188 | -------------------------------------------------------------------------------- /Plugins_JPSDR/avs/cpuid.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_CPUID_H 33 | #define AVSCORE_CPUID_H 34 | 35 | // For GetCPUFlags. These are backwards-compatible with those in VirtualDub. 36 | // ending with SSE4_2 37 | // For emulation see https://software.intel.com/en-us/articles/intel-software-development-emulator 38 | enum { 39 | /* oldest CPU to support extension */ 40 | CPUF_FORCE = 0x01, // N/A 41 | CPUF_FPU = 0x02, // 386/486DX 42 | CPUF_MMX = 0x04, // P55C, K6, PII 43 | CPUF_INTEGER_SSE = 0x08, // PIII, Athlon 44 | CPUF_SSE = 0x10, // PIII, Athlon XP/MP 45 | CPUF_SSE2 = 0x20, // PIV, K8 46 | CPUF_3DNOW = 0x40, // K6-2 47 | CPUF_3DNOW_EXT = 0x80, // Athlon 48 | CPUF_X86_64 = 0xA0, // Hammer (note: equiv. to 3DNow + SSE2, which 49 | // only Hammer will have anyway) 50 | CPUF_SSE3 = 0x100, // PIV+, K8 Venice 51 | CPUF_SSSE3 = 0x200, // Core 2 52 | CPUF_SSE4 = 0x400, 53 | CPUF_SSE4_1 = 0x400, // Penryn, Wolfdale, Yorkfield 54 | CPUF_AVX = 0x800, // Sandy Bridge, Bulldozer 55 | CPUF_SSE4_2 = 0x1000, // Nehalem 56 | // AVS+ 57 | CPUF_AVX2 = 0x2000, // Haswell 58 | CPUF_FMA3 = 0x4000, 59 | CPUF_F16C = 0x8000, 60 | CPUF_MOVBE = 0x10000, // Big Endian move 61 | CPUF_POPCNT = 0x20000, 62 | CPUF_AES = 0x40000, 63 | CPUF_FMA4 = 0x80000, 64 | 65 | CPUF_AVX512F = 0x100000, // AVX-512 Foundation. 66 | CPUF_AVX512DQ = 0x200000, // AVX-512 DQ (Double/Quad granular) Instructions 67 | CPUF_AVX512PF = 0x400000, // AVX-512 Prefetch 68 | CPUF_AVX512ER = 0x800000, // AVX-512 Exponential and Reciprocal 69 | CPUF_AVX512CD = 0x1000000, // AVX-512 Conflict Detection 70 | CPUF_AVX512BW = 0x2000000, // AVX-512 BW (Byte/Word granular) Instructions 71 | CPUF_AVX512VL = 0x4000000, // AVX-512 VL (128/256 Vector Length) Extensions 72 | CPUF_AVX512IFMA = 0x8000000, // AVX-512 IFMA integer 52 bit 73 | CPUF_AVX512VBMI = 0x10000000,// AVX-512 VBMI 74 | }; 75 | 76 | #ifdef BUILDING_AVSCORE 77 | int GetCPUFlags(); 78 | void SetMaxCPU(int new_flags); 79 | #endif 80 | 81 | #endif // AVSCORE_CPUID_H 82 | -------------------------------------------------------------------------------- /Plugins_JPSDR/avs/filesystem.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Snippet copied from filesystem/README.md 4 | 5 | #if defined(__cplusplus) && __cplusplus >= 201703L && defined(__has_include) 6 | #if __has_include() 7 | #define GHC_USE_STD_FS 8 | #include 9 | namespace fs = std::filesystem; 10 | #endif 11 | #endif 12 | #ifndef GHC_USE_STD_FS 13 | #include 14 | namespace fs = ghc::filesystem; 15 | #endif 16 | -------------------------------------------------------------------------------- /Plugins_JPSDR/avs/minmax.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_MINMAX_H 33 | #define AVSCORE_MINMAX_H 34 | 35 | template 36 | T min(T v1, T v2) 37 | { 38 | return v1 < v2 ? v1 : v2; 39 | } 40 | 41 | template 42 | T max(T v1, T v2) 43 | { 44 | return v1 > v2 ? v1 : v2; 45 | } 46 | 47 | template 48 | T clamp(T n, T min, T max) 49 | { 50 | n = n > max ? max : n; 51 | return n < min ? min : n; 52 | } 53 | 54 | #endif // AVSCORE_MINMAX_H 55 | -------------------------------------------------------------------------------- /Plugins_JPSDR/avs/posix.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifdef AVS_POSIX 33 | #ifndef AVSCORE_POSIX_H 34 | #define AVSCORE_POSIX_H 35 | 36 | #ifdef __cplusplus 37 | #include 38 | #endif 39 | #include 40 | #include 41 | 42 | // Define these MSVC-extension used in Avisynth 43 | #define __single_inheritance 44 | 45 | // These things don't exist in Linux 46 | #if defined(AVS_HAIKU) 47 | #undef __declspec 48 | #endif 49 | #define __declspec(x) 50 | #define lstrlen strlen 51 | #define lstrcmp strcmp 52 | #define lstrcmpi strcasecmp 53 | #define _stricmp strcasecmp 54 | #define _strnicmp strncasecmp 55 | #define _strdup strdup 56 | #define SetCurrentDirectory(x) chdir(x) 57 | #define SetCurrentDirectoryW(x) chdir(x) 58 | #define GetCurrentDirectoryW(x) getcwd(x) 59 | #define _putenv putenv 60 | #define _alloca alloca 61 | 62 | // Borrowing some compatibility macros from AvxSynth, slightly modified 63 | #define UInt32x32To64(a, b) ((uint64_t)(((uint64_t)((uint32_t)(a))) * ((uint32_t)(b)))) 64 | #define Int64ShrlMod32(a, b) ((uint64_t)((uint64_t)(a) >> (b))) 65 | #define Int32x32To64(a, b) ((int64_t)(((int64_t)((long)(a))) * ((long)(b)))) 66 | 67 | #define InterlockedIncrement(x) __sync_add_and_fetch((x), 1) 68 | #define InterlockedDecrement(x) __sync_sub_and_fetch((x), 1) 69 | #define InterlockedExchangeAdd(x, v) __sync_add_and_fetch((x), (v)) 70 | 71 | #define MulDiv(nNumber, nNumerator, nDenominator) (int32_t) (((int64_t) (nNumber) * (int64_t) (nNumerator) + (int64_t) ((nDenominator)/2)) / (int64_t) (nDenominator)) 72 | 73 | #ifndef TRUE 74 | #define TRUE true 75 | #endif 76 | 77 | #ifndef FALSE 78 | #define FALSE false 79 | #endif 80 | 81 | #define S_FALSE (0x00000001) 82 | #define E_FAIL (0x80004005) 83 | #define FAILED(hr) ((hr) & 0x80000000) 84 | #define SUCCEEDED(hr) (!FAILED(hr)) 85 | 86 | // Statuses copied from comments in exception.cpp 87 | #define STATUS_GUARD_PAGE_VIOLATION 0x80000001 88 | #define STATUS_DATATYPE_MISALIGNMENT 0x80000002 89 | #define STATUS_BREAKPOINT 0x80000003 90 | #define STATUS_SINGLE_STEP 0x80000004 91 | #define STATUS_ACCESS_VIOLATION 0xc0000005 92 | #define STATUS_IN_PAGE_ERROR 0xc0000006 93 | #define STATUS_INVALID_HANDLE 0xc0000008 94 | #define STATUS_NO_MEMORY 0xc0000017 95 | #define STATUS_ILLEGAL_INSTRUCTION 0xc000001d 96 | #define STATUS_NONCONTINUABLE_EXCEPTION 0xc0000025 97 | #define STATUS_INVALID_DISPOSITION 0xc0000026 98 | #define STATUS_ARRAY_BOUNDS_EXCEEDED 0xc000008c 99 | #define STATUS_FLOAT_DENORMAL_OPERAND 0xc000008d 100 | #define STATUS_FLOAT_DIVIDE_BY_ZERO 0xc000008e 101 | #define STATUS_FLOAT_INEXACT_RESULT 0xc000008f 102 | #define STATUS_FLOAT_INVALID_OPERATION 0xc0000090 103 | #define STATUS_FLOAT_OVERFLOW 0xc0000091 104 | #define STATUS_FLOAT_STACK_CHECK 0xc0000092 105 | #define STATUS_FLOAT_UNDERFLOW 0xc0000093 106 | #define STATUS_INTEGER_DIVIDE_BY_ZERO 0xc0000094 107 | #define STATUS_INTEGER_OVERFLOW 0xc0000095 108 | #define STATUS_PRIVILEGED_INSTRUCTION 0xc0000096 109 | #define STATUS_STACK_OVERFLOW 0xc00000fd 110 | 111 | // Calling convension 112 | #ifndef AVS_HAIKU 113 | #define __stdcall 114 | #define __cdecl 115 | #endif 116 | 117 | // PowerPC OS X is really niche these days, but this painless equivocation 118 | // of the function/macro names used in posix_get_available_memory() 119 | // is all it takes to let it work. The G5 was 64-bit, and if 10.5 Leopard 120 | // can run in native 64-bit, it probably uses the names in that block as-is. 121 | #ifdef AVS_MACOS 122 | #ifdef PPC32 123 | #define vm_statistics64_data_t vm_statistics_data_t 124 | #define HOST_VM_INFO64_COUNT HOST_VM_INFO_COUNT 125 | #define HOST_VM_INFO64 HOST_VM_INFO 126 | #define host_statistics64 host_statistics 127 | #endif // PPC32 128 | #endif // AVS_MACOS 129 | 130 | #endif // AVSCORE_POSIX_H 131 | #endif // AVS_POSIX 132 | -------------------------------------------------------------------------------- /Plugins_JPSDR/avs/types.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_TYPES_H 34 | #define AVS_TYPES_H 35 | 36 | // Define all types necessary for interfacing with avisynth.dll 37 | #include 38 | //#include 39 | #ifdef __cplusplus 40 | #include 41 | #include 42 | #else 43 | #include 44 | #include 45 | #endif 46 | 47 | // Raster types used by VirtualDub & Avisynth 48 | typedef uint32_t Pixel32; 49 | typedef uint8_t BYTE; 50 | 51 | // Audio Sample information 52 | typedef float SFLOAT; 53 | 54 | #endif //AVS_TYPES_H 55 | -------------------------------------------------------------------------------- /Plugins_JPSDR/avs/win.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_WIN_H 33 | #define AVSCORE_WIN_H 34 | 35 | // Whenever you need windows headers, start by including this file, then the rest. 36 | 37 | // WWUUT? We require XP now? 38 | #if !defined(NTDDI_VERSION) && !defined(_WIN32_WINNT) 39 | #define NTDDI_VERSION 0x05020000 40 | #define _WIN32_WINNT 0x0502 41 | #endif 42 | 43 | #define WIN32_LEAN_AND_MEAN 44 | #define STRICT 45 | #if !defined(NOMINMAX) 46 | #define NOMINMAX 47 | #endif 48 | 49 | #include 50 | 51 | // Provision for UTF-8 max 4 bytes per code point 52 | #define AVS_MAX_PATH MAX_PATH*4 53 | 54 | #endif // AVSCORE_WIN_H 55 | -------------------------------------------------------------------------------- /Plugins_JPSDR/binary1.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jpsdr/plugins_JPSDR/7826b045cfc28a0738088ec03a5e6f2edd44c9f0/Plugins_JPSDR/binary1.bin -------------------------------------------------------------------------------- /Plugins_JPSDR/internal.h: -------------------------------------------------------------------------------- 1 | // Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. 2 | // http://www.avisynth.org 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // Linking Avisynth statically or dynamically with other modules is making a 20 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 21 | // General Public License cover the whole combination. 22 | // 23 | // As a special exception, the copyright holders of Avisynth give you 24 | // permission to link Avisynth with independent modules that communicate with 25 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 26 | // terms of these independent modules, and to copy and distribute the 27 | // resulting combined work under terms of your choice, provided that 28 | // every copy of the combined work is accompanied by a complete copy of 29 | // the source code of Avisynth (the version of Avisynth used to produce the 30 | // combined work), being distributed under the terms of the GNU General 31 | // Public License plus this exception. An independent module is a module 32 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 33 | // import and export plugins, or graphical user interfaces. 34 | 35 | 36 | #ifndef __Internal_H__ 37 | #define __Internal_H__ 38 | 39 | #define AVS_VERSION 2.56 40 | #define AVS_VERSTR "AviSynth 2.56, build:"__DATE__" ["__TIME__"]" 41 | 42 | 43 | // env->ManageCache() Non user keys definition 44 | // Define user accessible keys in avisynth.h 45 | // 46 | #define MC_ReturnVideoFrameBuffer 0xFFFF0001 47 | 48 | 49 | #include "./avisynth.h" 50 | 51 | 52 | 53 | int RGB2YUV(int rgb); 54 | 55 | PClip Create_MessageClip(const char* message, int width, int height, 56 | int pixel_type, bool shrink, int textcolor, int halocolor, int bgcolor, 57 | IScriptEnvironment* env); 58 | 59 | PClip new_Splice(PClip _child1, PClip _child2, bool realign_sound, IScriptEnvironment* env); 60 | PClip new_SeparateFields(PClip _child, IScriptEnvironment* env); 61 | PClip new_AssumeFrameBased(PClip _child); 62 | 63 | void BitBlt(BYTE* dstp, int dst_pitch, const BYTE* srcp, 64 | int src_pitch, int row_size, int height); 65 | 66 | void asm_BitBlt_ISSE(BYTE* dstp, int dst_pitch, const BYTE* srcp, int src_pitch, int row_size, int height); 67 | void asm_BitBlt_MMX(BYTE* dstp, int dst_pitch, const BYTE* srcp, int src_pitch, int row_size, int height); 68 | 69 | long GetCPUFlags(); 70 | 71 | 72 | class _PixelClip { 73 | enum { buffer=320 }; 74 | BYTE clip[256+buffer*2]; 75 | public: 76 | _PixelClip() { 77 | memset(clip, 0, buffer); 78 | for (int i=0; i<256; ++i) clip[i+buffer] = i; 79 | memset(clip+buffer+256, 255, buffer); 80 | } 81 | BYTE operator()(int i) { return clip[i+buffer]; } 82 | }; 83 | 84 | extern _PixelClip PixelClip; 85 | 86 | 87 | template 88 | static __inline void Relink(ListNode* newprev, ListNode* me, ListNode* newnext) { 89 | if (me == newprev || me == newnext) return; 90 | me->next->prev = me->prev; 91 | me->prev->next = me->next; 92 | me->prev = newprev; 93 | me->next = newnext; 94 | me->prev->next = me->next->prev = me; 95 | } 96 | 97 | 98 | 99 | /*** Inline helper methods ***/ 100 | 101 | 102 | static __inline BYTE ScaledPixelClip(int i) { 103 | return PixelClip((i+32768) >> 16); 104 | } 105 | 106 | 107 | static __inline bool IsClose(int a, int b, unsigned threshold) 108 | { return (unsigned(a-b+threshold) <= threshold*2); } 109 | 110 | 111 | 112 | 113 | #endif // __Internal_H__ 114 | -------------------------------------------------------------------------------- /Plugins_JPSDR/nnedi3.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** nnedi3 v0.9.4.65 for Avs+/Avisynth 2.6.x 3 | ** 4 | ** Copyright (C) 2010-2011 Kevin Stone 5 | ** 6 | ** This program is free software; you can redistribute it and/or modify 7 | ** it under the terms of the GNU General Public License as published by 8 | ** the Free Software Foundation; either version 2 of the License, or 9 | ** (at your option) any later version. 10 | ** 11 | ** This program is distributed in the hope that it will be useful, 12 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | ** GNU General Public License for more details. 15 | ** 16 | ** You should have received a copy of the GNU General Public License 17 | ** along with this program; if not, write to the Free Software 18 | ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 | */ 20 | 21 | #include 22 | #define _USE_MATH_DEFINES 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include "./avisynth.h" 28 | #include "./PlanarFrame.h" 29 | #include "./ThreadPoolInterface.h" 30 | 31 | #define NUM_NSIZE 7 32 | #define NUM_NNS 5 33 | const int xdiaTable[NUM_NSIZE] = {8,16,32,48,8,16,32}; 34 | const int ydiaTable[NUM_NSIZE] = {6,6,6,6,4,4,4}; 35 | const int nnsTable[NUM_NNS] = {16,32,64,128,256}; 36 | const int nnsTablePow2[NUM_NNS] = {4,5,6,7,8}; 37 | 38 | #ifndef clamp 39 | #define clamp(n,vmin,vmax) ((n>vmin)?((n 39 | #include 40 | #include "./avisynth.h" 41 | #include "./resample_functions.h" 42 | #include "./ThreadPoolInterface.h" 43 | 44 | #define RESAMPLE_MT_VERSION "ResampleMT 2.5.1 JPSDR" 45 | 46 | typedef enum ChromaLocation_e 47 | { 48 | AVS_CHROMA_UNUSED = -1, 49 | AVS_CHROMA_LEFT = 0, 50 | AVS_CHROMA_CENTER = 1, 51 | AVS_CHROMA_TOP_LEFT = 2, 52 | AVS_CHROMA_TOP = 3, 53 | AVS_CHROMA_BOTTOM_LEFT = 4, 54 | AVS_CHROMA_BOTTOM = 5, 55 | AVS_CHROMA_DV = 6 // Special to Avisynth 56 | } ChromaLocation_e; 57 | 58 | // Resizer function pointer 59 | typedef void (*ResamplerV)(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 60 | typedef void (*ResamplerH)(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2); 61 | 62 | 63 | typedef struct _MT_Data_Info_ResampleMT 64 | { 65 | const BYTE*src1,*src2,*src3,*src4; 66 | BYTE *dst1,*dst2,*dst3,*dst4; 67 | int src_pitch1,src_pitch2,src_pitch3,src_pitch4; 68 | int dst_pitch1,dst_pitch2,dst_pitch3,dst_pitch4; 69 | int32_t src_Y_h_min,src_Y_h_max,src_Y_w; 70 | int32_t src_UV_h_min,src_UV_h_max,src_UV_w; 71 | int32_t dst_Y_h_min,dst_Y_h_max,dst_Y_w; 72 | int32_t dst_UV_h_min,dst_UV_h_max,dst_UV_w; 73 | void *filter_storage_luma,*filter_storage_luma2,*filter_storage_luma3,*filter_storage_luma4; 74 | void *filter_storage_chromaU,*filter_storage_chromaV; 75 | int *src_pitch_table_luma,*src_pitch_table_chromaU,*src_pitch_table_chromaV; 76 | ResamplingProgram *resampling_program_luma,*resampling_program_chroma; 77 | bool top,bottom; 78 | } MT_Data_Info_ResampleMT; 79 | 80 | 81 | 82 | /** 83 | * Class to resize in the horizontal direction using a specified sampling filter 84 | * Helper for resample functions 85 | **/ 86 | class FilteredResizeH : public GenericVideoFilter 87 | { 88 | public: 89 | FilteredResizeH( PClip _child, double subrange_left, double subrange_width, int target_width, uint8_t _threads, 90 | bool _sleep,int range_mode,bool desample,int accuracy, bool negativePrefetch, 91 | bool _avsp,bool preserve_center,ChromaLocation_e chroma_placement, 92 | ResamplingFunction* func,IScriptEnvironment* env ); 93 | virtual ~FilteredResizeH(void); 94 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 95 | 96 | int __stdcall SetCacheHints(int cachehints, int frame_range); 97 | 98 | //static ResamplerH GetResampler(int CPU, bool aligned, int pixelsize, int bits_per_pixel, ResamplingProgram* program, IScriptEnvironment* env); 99 | ResamplerH GetResampler(bool aligned, ResamplingProgram* program, IScriptEnvironment* env); 100 | 101 | private: 102 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 103 | MT_Data_Info_ResampleMT MT_Data[MAX_MT_THREADS]; 104 | uint8_t threads,threads_number; 105 | bool sleep; 106 | uint32_t UserId; 107 | 108 | ThreadPoolFunction ResampleH_MT; 109 | 110 | static void StaticThreadpoolH(void *ptr); 111 | 112 | uint8_t CreateMTData(uint8_t max_threads,int32_t src_size_x,int32_t src_size_y,int32_t dst_size_x,int32_t dst_size_y, int UV_w, int UV_h); 113 | 114 | void FreeData(void); 115 | 116 | void ResamplerLumaMT(MT_Data_Info_ResampleMT *MT_DataGF); 117 | void ResamplerLumaMT2(MT_Data_Info_ResampleMT *MT_DataGF); 118 | void ResamplerLumaMT3(MT_Data_Info_ResampleMT *MT_DataGF); 119 | void ResamplerLumaMT4(MT_Data_Info_ResampleMT *MT_DataGF); 120 | void ResamplerUChromaMT(MT_Data_Info_ResampleMT *MT_DataGF); 121 | void ResamplerVChromaMT(MT_Data_Info_ResampleMT *MT_DataGF); 122 | 123 | 124 | // Resampling 125 | ResamplingProgram *resampling_program_luma; 126 | ResamplingProgram *resampling_program_chroma; 127 | 128 | // Note: these pointer are currently not used; they are used to pass data into run-time resampler. 129 | // They are kept because this may be needed later (like when we implemented actual horizontal resizer.) 130 | void* filter_storage_luma; 131 | void* filter_storage_chroma; 132 | 133 | int src_width, src_height, dst_width, dst_height; 134 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 135 | uint8_t pixelsize; // AVS16 136 | uint8_t bits_per_pixel; 137 | uint8_t plane_range[4]; 138 | bool mode_YUY2; 139 | bool Enable_MMX,Enable_SSE2,Enable_SSE3,Enable_SSSE3,Enable_SSE4_1,Enable_AVX2; 140 | 141 | ResamplerH resampler_h_luma; 142 | ResamplerH resampler_h_chroma; 143 | }; 144 | 145 | 146 | /** 147 | * Class to resize in the vertical direction using a specified sampling filter 148 | * Helper for resample functions 149 | **/ 150 | class FilteredResizeV : public GenericVideoFilter 151 | { 152 | public: 153 | FilteredResizeV( PClip _child, double subrange_top, double subrange_height, int target_height, uint8_t _threads, 154 | bool _sleep,int range_mode,bool desample,int accuracy,int ChromaS,uint8_t ShiftC,bool negativePrefetch, 155 | bool _avsp,bool preserve_center,ChromaLocation_e chroma_placement, 156 | bool ResizeH,ResamplingFunction* func,IScriptEnvironment* env); 157 | virtual ~FilteredResizeV(void); 158 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 159 | 160 | int __stdcall SetCacheHints(int cachehints, int frame_range); 161 | 162 | //static ResamplerV GetResampler(int CPU, bool aligned,int pixelsize, int bits_per_pixel, void*& storage, ResamplingProgram* program); 163 | //ResamplerV GetResampler(bool aligned,void*& storage, ResamplingProgram* program); 164 | ResamplerV GetResampler(bool aligned, ResamplingProgram* program, IScriptEnvironment* env); 165 | 166 | private: 167 | Public_MT_Data_Thread MT_Thread[MAX_MT_THREADS]; 168 | MT_Data_Info_ResampleMT MT_Data[MAX_MT_THREADS]; 169 | uint8_t threads,threads_number; 170 | bool sleep; 171 | uint32_t UserId; 172 | 173 | ThreadPoolFunction ResampleV_MT; 174 | 175 | static void StaticThreadpoolV(void *ptr); 176 | 177 | uint8_t CreateMTData(uint8_t max_threads,int32_t src_size_x,int32_t src_size_y,int32_t dst_size_x,int32_t dst_size_y, int UV_w, int UV_h); 178 | 179 | void FreeData(void); 180 | 181 | void ResamplerLumaAlignedMT(MT_Data_Info_ResampleMT *MT_DataGF); 182 | void ResamplerLumaUnalignedMT(MT_Data_Info_ResampleMT *MT_DataGF); 183 | void ResamplerLumaAlignedMT2(MT_Data_Info_ResampleMT *MT_DataGF); 184 | void ResamplerLumaUnalignedMT2(MT_Data_Info_ResampleMT *MT_DataGF); 185 | void ResamplerLumaAlignedMT3(MT_Data_Info_ResampleMT *MT_DataGF); 186 | void ResamplerLumaUnalignedMT3(MT_Data_Info_ResampleMT *MT_DataGF); 187 | void ResamplerLumaAlignedMT4(MT_Data_Info_ResampleMT *MT_DataGF); 188 | void ResamplerLumaUnalignedMT4(MT_Data_Info_ResampleMT *MT_DataGF); 189 | void ResamplerUChromaAlignedMT(MT_Data_Info_ResampleMT *MT_DataGF); 190 | void ResamplerUChromaUnalignedMT(MT_Data_Info_ResampleMT *MT_DataGF); 191 | void ResamplerVChromaAlignedMT(MT_Data_Info_ResampleMT *MT_DataGF); 192 | void ResamplerVChromaUnalignedMT(MT_Data_Info_ResampleMT *MT_DataGF); 193 | 194 | bool grey,avsp,isRGBPfamily,isAlphaChannel,has_at_least_v8; 195 | uint8_t pixelsize; // AVS16 196 | uint8_t bits_per_pixel; 197 | uint8_t plane_range[4]; 198 | bool mode_YUY2; 199 | bool Enable_MMX,Enable_SSE2,Enable_SSE3,Enable_SSSE3,Enable_SSE4_1,Enable_AVX2; 200 | 201 | ResamplingProgram *resampling_program_luma; 202 | ResamplingProgram *resampling_program_chroma; 203 | int *src_pitch_table_luma; 204 | int *src_pitch_table_chromaU; 205 | int *src_pitch_table_chromaV; 206 | int src_pitch_luma; 207 | int src_pitch_chromaU; 208 | int src_pitch_chromaV; 209 | 210 | // Note: these pointer are currently not used; they are used to pass data into run-time resampler. 211 | // They are kept because this may be needed later (like when we implemented actual horizontal resizer.) 212 | void* filter_storage_luma_aligned; 213 | void* filter_storage_luma_unaligned; 214 | void* filter_storage_chroma_aligned; 215 | void* filter_storage_chroma_unaligned; 216 | 217 | ResamplerV resampler_luma_aligned; 218 | ResamplerV resampler_luma_unaligned; 219 | ResamplerV resampler_chroma_aligned; 220 | ResamplerV resampler_chroma_unaligned; 221 | }; 222 | 223 | 224 | 225 | class FilteredResizeMT 226 | { 227 | public: 228 | static PClip CreateResizeH( PClip clip, double subrange_left, double subrange_width, int target_width, uint8_t _threads, 229 | bool _sleep,int range_mode,bool desample,int accuracy, bool negativePrefetch, 230 | bool _avsp, bool preserve_center,ChromaLocation_e chroma_placement, 231 | ResamplingFunction* func,IScriptEnvironment* env ); 232 | static PClip CreateResizeV( PClip clip, double subrange_top, double subrange_height, int target_height, uint8_t _threads, 233 | bool _sleep,int range_mode,bool desample,int accuracy,int ChromaS,uint8_t ShiftC, bool negativePrefetch, 234 | bool _avsp,bool preserve_center,ChromaLocation_e chroma_placement, 235 | bool ResizeH,ResamplingFunction* func,IScriptEnvironment* env ); 236 | 237 | static PClip CreateResize( PClip clip, int target_width, int target_height, int force, int _threads, 238 | bool _LogicalCores,bool _MaxPhysCores, bool _SetAffinity,bool _sleep,int prefetch,int range_mode, 239 | bool desample,int accuracy,int order,int thread_level, 240 | const AVSValue* args,ResamplingFunction* f, 241 | bool preserve_center,const char *placement_name,ChromaLocation_e forced_chroma_placement, 242 | IScriptEnvironment* env ); 243 | 244 | static AVSValue __cdecl Create_PointResize(AVSValue args, void*, IScriptEnvironment* env); 245 | 246 | static AVSValue __cdecl Create_BilinearResize(AVSValue args, void*, IScriptEnvironment* env); 247 | 248 | static AVSValue __cdecl Create_BicubicResize(AVSValue args, void*, IScriptEnvironment* env); 249 | 250 | // 09-14-2002 - Vlad59 - Lanczos3Resize - 251 | static AVSValue __cdecl Create_LanczosResize(AVSValue args, void*, IScriptEnvironment* env); 252 | 253 | static AVSValue __cdecl Create_Lanczos4Resize(AVSValue args, void*, IScriptEnvironment* env); 254 | 255 | static AVSValue __cdecl Create_BlackmanResize(AVSValue args, void*, IScriptEnvironment* env); 256 | 257 | static AVSValue __cdecl Create_Spline16Resize(AVSValue args, void*, IScriptEnvironment* env); 258 | 259 | static AVSValue __cdecl Create_Spline36Resize(AVSValue args, void*, IScriptEnvironment* env); 260 | 261 | static AVSValue __cdecl Create_Spline64Resize(AVSValue args, void*, IScriptEnvironment* env); 262 | 263 | static AVSValue __cdecl Create_GaussianResize(AVSValue args, void*, IScriptEnvironment* env); 264 | 265 | static AVSValue __cdecl Create_SincResize(AVSValue args, void*, IScriptEnvironment* env); 266 | 267 | static AVSValue __cdecl Create_SinPowerResize(AVSValue args, void*, IScriptEnvironment* env); 268 | 269 | static AVSValue __cdecl Create_SincLin2Resize(AVSValue args, void*, IScriptEnvironment* env); 270 | 271 | static AVSValue __cdecl Create_UserDefined2Resize(AVSValue args, void*, IScriptEnvironment* env); 272 | 273 | // Desample functions 274 | 275 | static AVSValue __cdecl Create_DeBilinearResize(AVSValue args, void*, IScriptEnvironment* env); 276 | 277 | static AVSValue __cdecl Create_DeBicubicResize(AVSValue args, void*, IScriptEnvironment* env); 278 | 279 | // 09-14-2002 - Vlad59 - Lanczos3Resize - 280 | static AVSValue __cdecl Create_DeLanczosResize(AVSValue args, void*, IScriptEnvironment* env); 281 | 282 | static AVSValue __cdecl Create_DeLanczos4Resize(AVSValue args, void*, IScriptEnvironment* env); 283 | 284 | static AVSValue __cdecl Create_DeBlackmanResize(AVSValue args, void*, IScriptEnvironment* env); 285 | 286 | static AVSValue __cdecl Create_DeSpline16Resize(AVSValue args, void*, IScriptEnvironment* env); 287 | 288 | static AVSValue __cdecl Create_DeSpline36Resize(AVSValue args, void*, IScriptEnvironment* env); 289 | 290 | static AVSValue __cdecl Create_DeSpline64Resize(AVSValue args, void*, IScriptEnvironment* env); 291 | 292 | static AVSValue __cdecl Create_DeGaussianResize(AVSValue args, void*, IScriptEnvironment* env); 293 | 294 | static AVSValue __cdecl Create_DeSincResize(AVSValue args, void*, IScriptEnvironment* env); 295 | 296 | static AVSValue __cdecl Create_DeSinPowerResize(AVSValue args, void*, IScriptEnvironment* env); 297 | 298 | static AVSValue __cdecl Create_DeSincLin2Resize(AVSValue args, void*, IScriptEnvironment* env); 299 | 300 | static AVSValue __cdecl Create_DeUserDefined2Resize(AVSValue args, void*, IScriptEnvironment* env); 301 | }; 302 | 303 | 304 | #endif // __Resample_H__ 305 | 306 | 307 | -------------------------------------------------------------------------------- /Plugins_JPSDR/resample_avx2.h: -------------------------------------------------------------------------------- 1 | // Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. 2 | // http://www.avisynth.org 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // Linking Avisynth statically or dynamically with other modules is making a 20 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 21 | // General Public License cover the whole combination. 22 | // 23 | // As a special exception, the copyright holders of Avisynth give you 24 | // permission to link Avisynth with independent modules that communicate with 25 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 26 | // terms of these independent modules, and to copy and distribute the 27 | // resulting combined work under terms of your choice, provided that 28 | // every copy of the combined work is accompanied by a complete copy of 29 | // the source code of Avisynth (the version of Avisynth used to produce the 30 | // combined work), being distributed under the terms of the GNU General 31 | // Public License plus this exception. An independent module is a module 32 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 33 | // import and export plugins, or graphical user interfaces. 34 | 35 | #ifndef __Resample_AVX2_H__ 36 | #define __Resample_AVX2_H__ 37 | 38 | #include "./resample_functions.h" 39 | 40 | template 41 | void resizer_h_avx2_generic_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2); 42 | 43 | template 44 | void resizer_h_avx2_generic_uint16_t(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2); 45 | 46 | void resizer_h_avx2_generic_uint8_t(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2); 47 | 48 | template 49 | void resize_v_avx2_planar_uint16_t(BYTE* dst0, const BYTE* src0, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 50 | 51 | void resize_v_avx2_planar_float(BYTE* dst0, const BYTE* src0, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 52 | 53 | void resize_v_avx2_planar_uint8_t(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 54 | 55 | #endif // __Resample_AVX2_H__ 56 | -------------------------------------------------------------------------------- /Plugins_JPSDR/resample_functions.h: -------------------------------------------------------------------------------- 1 | // Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. 2 | // http://www.avisynth.org 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // Linking Avisynth statically or dynamically with other modules is making a 20 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 21 | // General Public License cover the whole combination. 22 | // 23 | // As a special exception, the copyright holders of Avisynth give you 24 | // permission to link Avisynth with independent modules that communicate with 25 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 26 | // terms of these independent modules, and to copy and distribute the 27 | // resulting combined work under terms of your choice, provided that 28 | // every copy of the combined work is accompanied by a complete copy of 29 | // the source code of Avisynth (the version of Avisynth used to produce the 30 | // combined work), being distributed under the terms of the GNU General 31 | // Public License plus this exception. An independent module is a module 32 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 33 | // import and export plugins, or graphical user interfaces. 34 | 35 | #ifndef __Resample_Functions_H__ 36 | #define __Resample_Functions_H__ 37 | 38 | #include 39 | #include 40 | #include 41 | #include 42 | 43 | #include "./avisynth.h" 44 | #include "./MatrixClass.h" 45 | #include "./avs/alignment.h" 46 | 47 | #define myalignedfree(ptr) if (ptr!=NULL) { _aligned_free(ptr); ptr=NULL;} 48 | 49 | // Original value: 65536 50 | // 2 bits sacrificed because of 16 bit signed MMX multiplication 51 | // NOTE: Don't change this value. It's hard-coded in SIMD code. 52 | const int FPScale8bits = 14; // fixed point scaler 14 bit 53 | const int FPScale = 1 << FPScale8bits; // fixed point scaler (1<<14) 54 | // for 16 bits: one bit less 55 | const int FPScale16bits = 13; 56 | const int FPScale16 = 1 << FPScale16bits; // fixed point scaler for 10-16 bit SIMD signed operation 57 | const int ALIGN_RESIZER_TARGET_SIZE = 8; 58 | const int ALIGN_FLOAT_RESIZER_COEFF_SIZE = 8; // simd friendly 59 | 60 | // 09-14-2002 - Vlad59 - Lanczos3Resize - Constant added 61 | #define M_PI 3.14159265358979323846 62 | 63 | struct ResamplingProgram 64 | { 65 | IScriptEnvironment *Env; 66 | int source_size, target_size; 67 | double crop_start, crop_size; 68 | int filter_size; 69 | int filter_size_real; // maybe less than filter_size if dimensions are small 70 | int filter_size_alignment; // for info, 1 (C), 8 (sse or avx2) or 16 (avx2) 71 | 72 | // Array of Integer indicate starting point of sampling 73 | std::vector pixel_offset; 74 | 75 | int bits_per_pixel; 76 | 77 | // Array of array of coefficient for each pixel 78 | // {{pixel[0]_coeff}, {pixel[1]_coeff}, ...} 79 | short *pixel_coefficient; 80 | float *pixel_coefficient_float; 81 | // Array of real kernel size, handles edge cases! <= filter_size 82 | // for SIMD, coefficients are copied over a padded aligned storage 83 | std::vector kernel_sizes; 84 | // 3.7.4- can be different for each line but then they get equalized and aligned. 85 | 86 | // anti-overread helpers for float resizer simd code reading 8 pixels from a given offset 87 | bool overread_possible,StatusOk; 88 | int source_overread_offset; // offset from where reading 8 bytes requires masking garbage on the right side 89 | int source_overread_beyond_targetx; 90 | // in H resizers danger zone starts from here. 91 | // When reading aligned_filter_size elements from (src+offset) no longer fits image scanline dimensions 92 | 93 | ResamplingProgram(int filter_size, int source_size, int target_size, double crop_start, double crop_size, int bits_per_pixel, IScriptEnvironment* env) 94 | : Env(env), source_size(source_size), target_size(target_size), crop_start(crop_start), crop_size(crop_size), filter_size(filter_size), filter_size_real(filter_size), 95 | bits_per_pixel(bits_per_pixel), pixel_coefficient(NULL), pixel_coefficient_float(NULL) 96 | { 97 | StatusOk = true; 98 | overread_possible = false; 99 | source_overread_offset = -1; 100 | source_overread_beyond_targetx = -1; 101 | 102 | pixel_offset.resize(target_size); 103 | kernel_sizes.resize(target_size); 104 | 105 | // align target_size to 8 units to allow safe 8 pixels/cycle in H resizers 106 | // pixel_offset is in unrolled loop, 128/256bit simd size does not affect. 107 | filter_size_alignment = 1; // just info. nothing special, for C. resize_h_prepare_coeff_8or16 can override and realign the coefficients for SIMD processing 108 | if (bits_per_pixel<32) 109 | pixel_coefficient = (short*) _aligned_malloc(sizeof(short)*target_size*filter_size, 64); 110 | else 111 | pixel_coefficient_float = (float*) _aligned_malloc(sizeof(float)*target_size*filter_size, 64); 112 | 113 | if (((bits_per_pixel<32) && (pixel_coefficient==NULL)) || 114 | ((bits_per_pixel==32) && (pixel_coefficient_float==NULL))) 115 | { 116 | myalignedfree(pixel_coefficient_float); 117 | myalignedfree(pixel_coefficient); 118 | StatusOk = false; 119 | //env->ThrowError("ResamplingProgram: Could not reserve memory."); 120 | } 121 | 122 | // Set all values to 0 123 | if (bits_per_pixel<32) memset(pixel_coefficient,0,sizeof(short)*target_size*filter_size); 124 | else std::fill_n(pixel_coefficient_float, target_size*filter_size, 0.0f); 125 | }; 126 | 127 | ~ResamplingProgram() 128 | { 129 | myalignedfree(pixel_coefficient_float); 130 | myalignedfree(pixel_coefficient); 131 | }; 132 | }; 133 | 134 | typedef struct ResamplingProgram ResamplingProgram; 135 | 136 | void resize_prepare_coeffs(ResamplingProgram* p, IScriptEnvironment* env, int filter_size_alignment); 137 | 138 | /******************************************* 139 | *************************************** 140 | ** Helper classes for resample.cpp ** 141 | *************************************** 142 | *******************************************/ 143 | 144 | 145 | class ResamplingFunction 146 | /** 147 | * Pure virtual base class for resampling functions 148 | */ 149 | { 150 | public: 151 | virtual double f(double x) = 0; 152 | virtual double support() = 0; 153 | 154 | virtual ResamplingProgram* GetResamplingProgram(int source_size, double crop_start, double crop_size, int target_size, int bits_per_pixel, 155 | double center_pos_src, double center_pos_dst, IScriptEnvironment* env); 156 | virtual ResamplingProgram* GetDesamplingProgram(int source_size, double crop_start, double crop_size, int target_size, int bits_per_pixel, 157 | double center_pos_src, double center_pos_dst, uint8_t accuracy, int SizeY, uint8_t ShiftC, int &SizeOut,IScriptEnvironment* env); 158 | virtual int GetDesamplingData(int source_size, double crop_start, double crop_size, int target_size, int bits_per_pixel, 159 | double center_pos_src, double center_pos_dst, uint8_t ShiftC, IScriptEnvironment* env); 160 | }; 161 | 162 | class PointFilter : public ResamplingFunction 163 | /** 164 | * Nearest neighbour (point sampler), used in PointResize 165 | **/ 166 | { 167 | public: 168 | double f(double x); 169 | double support() { return 0.0; } // 0.0 crashes it. 170 | // Pre 3.7.4 : 0.0001. Comment: 0.0 crashes it. 171 | // 3.7.4- this 0 is specially handled in GetResamplingProgram 172 | }; 173 | 174 | 175 | class TriangleFilter : public ResamplingFunction 176 | /** 177 | * Simple triangle filter, used in BilinearResize 178 | **/ 179 | { 180 | public: 181 | double f(double x); 182 | double support() { return 1.0; } 183 | }; 184 | 185 | 186 | class MitchellNetravaliFilter : public ResamplingFunction 187 | /** 188 | * Mitchell-Netraveli filter, used in BicubicResize 189 | **/ 190 | { 191 | public: 192 | MitchellNetravaliFilter(double b=1.0/3.0, double c=1.0/3.0); 193 | double f(double x); 194 | double support() { return 2.0; } 195 | 196 | private: 197 | double p0,p2,p3,q0,q1,q2,q3; 198 | }; 199 | 200 | class LanczosFilter : public ResamplingFunction 201 | /** 202 | * Lanczos filter, used in LanczosResize 203 | **/ 204 | { 205 | public: 206 | LanczosFilter(int _taps=3); 207 | double f(double x); 208 | double support() { return taps; }; 209 | 210 | private: 211 | double sinc(double value); 212 | double taps; 213 | }; 214 | 215 | class BlackmanFilter : public ResamplingFunction 216 | /** 217 | * Blackman filter, used in BlackmanResize 218 | **/ 219 | { 220 | public: 221 | BlackmanFilter(int _taps=4); 222 | double f(double x); 223 | double support() { return taps; }; 224 | 225 | private: 226 | double taps, rtaps; 227 | }; 228 | 229 | // Spline16 230 | class Spline16Filter : public ResamplingFunction 231 | /** 232 | * Spline16 of Panorama Tools is a cubic-spline, with derivative set to 0 at the edges (4x4 pixels). 233 | **/ 234 | { 235 | public: 236 | double f(double x); 237 | double support() { return 2.0; }; 238 | 239 | private: 240 | }; 241 | 242 | // Spline36 243 | class Spline36Filter : public ResamplingFunction 244 | /** 245 | * Spline36 is like Spline16, except that it uses 6x6=36 pixels. 246 | **/ 247 | { 248 | public: 249 | double f(double x); 250 | double support() { return 3.0; }; 251 | 252 | private: 253 | }; 254 | 255 | // Spline64 256 | class Spline64Filter : public ResamplingFunction 257 | /** 258 | * Spline64 is like Spline36, except that it uses 8x8=64 pixels. 259 | **/ 260 | { 261 | public: 262 | double f(double x); 263 | double support() { return 4.0; }; 264 | 265 | private: 266 | }; 267 | 268 | 269 | class GaussianFilter : public ResamplingFunction 270 | /** 271 | * GaussianFilter, from swscale. 272 | **/ 273 | { 274 | public: 275 | GaussianFilter(double p=30.0, double _b=2.0, double _s=4.0); 276 | double f(double x); 277 | double support() { return s; }; // <3.7.4 was fixed at 4.0 278 | 279 | private: 280 | double param; 281 | double b; // base value since 3.7.4 282 | double s; // variable support since 3.7.4 283 | }; 284 | 285 | class SincFilter : public ResamplingFunction 286 | /** 287 | * Sinc filter, used in SincResize 288 | **/ 289 | { 290 | public: 291 | SincFilter(int _taps=4); 292 | double f(double x); 293 | double support() { return taps; }; 294 | 295 | private: 296 | double taps; 297 | }; 298 | 299 | class SinPowerFilter : public ResamplingFunction 300 | // SinPow kernel, used in SinPowResize 301 | { 302 | public: 303 | SinPowerFilter(double p = 2.5); 304 | double f(double x); 305 | double support() { return 2.0; }; // 2 very important, 4 cause bugs 306 | 307 | private: 308 | double param; 309 | }; 310 | 311 | class SincLin2Filter : public ResamplingFunction 312 | /** 313 | * SincLin2 filter, used in SincLin2Resize 314 | **/ 315 | { 316 | public: 317 | SincLin2Filter(int _taps = 15); 318 | double f(double x); 319 | double support() { return taps; }; 320 | 321 | private: 322 | double sinc(double value); 323 | double taps; 324 | }; 325 | 326 | class UserDefined2Filter : public ResamplingFunction 327 | /** 328 | * User-defined by 2 samples filter, used in UDef2Resize 329 | **/ 330 | { 331 | public: 332 | UserDefined2Filter(double _b = 121.0, double _c = 19.0, double _s = 2.3); 333 | double f(double x); 334 | double support() { return s; } 335 | 336 | private: 337 | double sinc(double value); 338 | double a, b, c; 339 | double s; // variable support 340 | }; 341 | 342 | #endif // __Reample_Functions_H__ 343 | -------------------------------------------------------------------------------- /Plugins_JPSDR/resample_sse.h: -------------------------------------------------------------------------------- 1 | // Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. 2 | // http://avisynth.nl 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // Linking Avisynth statically or dynamically with other modules is making a 20 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 21 | // General Public License cover the whole combination. 22 | // 23 | // As a special exception, the copyright holders of Avisynth give you 24 | // permission to link Avisynth with independent modules that communicate with 25 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 26 | // terms of these independent modules, and to copy and distribute the 27 | // resulting combined work under terms of your choice, provided that 28 | // every copy of the combined work is accompanied by a complete copy of 29 | // the source code of Avisynth (the version of Avisynth used to produce the 30 | // combined work), being distributed under the terms of the GNU General 31 | // Public License plus this exception. An independent module is a module 32 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 33 | // import and export plugins, or graphical user interfaces. 34 | 35 | #ifndef __Resample_SSE_H__ 36 | #define __Resample_SSE_H__ 37 | 38 | // Intrinsics for SSE4.1, SSSE3, SSE3, SSE2, ISSE and MMX 39 | #include 40 | #include 41 | 42 | #include "avisynth.h" 43 | #include "./avs/config.h" 44 | #include "./resample.h" 45 | 46 | /*************************************** 47 | ********* Templated SSE Loader ******** 48 | ***************************************/ 49 | 50 | typedef __m128i (SSELoader)(const __m128i*); 51 | typedef __m128 (SSELoader_ps)(const float*); 52 | 53 | __forceinline __m128i simd_load_aligned(const __m128i* adr) 54 | { 55 | return _mm_load_si128(adr); 56 | } 57 | 58 | __forceinline __m128i simd_load_unaligned(const __m128i* adr) 59 | { 60 | return _mm_loadu_si128(adr); 61 | } 62 | 63 | #if defined(CLANG) 64 | __attribute__((__target__("sse3"))) 65 | #endif 66 | __forceinline __m128i simd_load_unaligned_sse3(const __m128i* adr) 67 | { 68 | return _mm_lddqu_si128(adr); 69 | } 70 | 71 | #if defined(CLANG) 72 | __attribute__((__target__("sse4.1"))) 73 | #endif 74 | __forceinline __m128i simd_load_streaming(const __m128i* adr) 75 | { 76 | return _mm_stream_load_si128(const_cast<__m128i*>(adr)); 77 | } 78 | 79 | // float loaders 80 | __forceinline __m128 simd_loadps_aligned(const float * adr) 81 | { 82 | return _mm_load_ps(adr); 83 | } 84 | 85 | __forceinline __m128 simd_loadps_unaligned(const float* adr) 86 | { 87 | return _mm_loadu_ps(adr); 88 | } 89 | 90 | 91 | void resize_h_prepare_coeff_8or16(ResamplingProgram* p,IScriptEnvironment* env,int alignFilterSize8or16); 92 | 93 | #ifdef X86_32 94 | void resize_v_mmx_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 95 | #endif 96 | void resize_v_sse2_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 97 | template 98 | void resize_v_sse2_planarT(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 99 | template 100 | #if defined(CLANG) 101 | __attribute__((__target__("ssse3"))) 102 | #endif 103 | void resize_v_ssse3_planarT(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 104 | 105 | #if defined(GCC) || defined(CLANG) 106 | __attribute__((__target__("sse4.1"))) 107 | #endif 108 | void resize_v_sse41_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 109 | 110 | #if defined(GCC) || defined(CLANG) 111 | __attribute__((__target__("ssse3"))) 112 | #endif 113 | void resize_v_ssse3_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 114 | 115 | template 116 | #if defined(GCC) || defined(CLANG) 117 | __attribute__((__target__("ssse3"))) 118 | #endif 119 | void resizer_h_ssse3_generic_uint16_t(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2); 120 | 121 | template 122 | #if defined(GCC) || defined(CLANG) 123 | __attribute__((__target__("sse4.1"))) 124 | #endif 125 | void resizer_h_sse41_generic_uint16_t(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2); 126 | 127 | template 128 | #if defined(GCC) || defined(CLANG) 129 | __attribute__((__target__("ssse3"))) 130 | #endif 131 | void resizer_h_ssse3_generic_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2); 132 | 133 | template 134 | void resize_v_sse2_planar_uint16_t(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 135 | 136 | template 137 | #if defined(GCC) || defined(CLANG) 138 | __attribute__((__target__("sse4.1"))) 139 | #endif 140 | void resize_v_sse41_planar_uint16_t(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 141 | 142 | void resize_v_sse2_planar_float(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int bits_per_pixel, int MinY, int MaxY, const int* pitch_table, const void* storage,const uint8_t range,const bool mode_YUY2); 143 | 144 | #if defined(GCC) || defined(CLANG) 145 | __attribute__((__target__("ssse3"))) 146 | #endif 147 | void resizer_h_ssse3_generic(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2); 148 | 149 | #if defined(GCC) || defined(CLANG) 150 | __attribute__((__target__("ssse3"))) 151 | #endif 152 | void resizer_h_ssse3_8(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel,const uint8_t range,const bool mode_YUY2); 153 | 154 | 155 | #endif // __Resample_SSE_H__ 156 | -------------------------------------------------------------------------------- /Plugins_JPSDR/resource.h: -------------------------------------------------------------------------------- 1 | //{{NO_DEPENDENCIES}} 2 | // Microsoft Visual C++ generated include file. 3 | // Used by nnedi2.rc 4 | // 5 | #define IDR_BINARY1 101 6 | 7 | // Next default values for new objects 8 | // 9 | #ifdef APSTUDIO_INVOKED 10 | #ifndef APSTUDIO_READONLY_SYMBOLS 11 | #define _APS_NEXT_RESOURCE_VALUE 101 12 | #define _APS_NEXT_COMMAND_VALUE 40001 13 | #define _APS_NEXT_CONTROL_VALUE 1000 14 | #define _APS_NEXT_SYMED_VALUE 101 15 | #endif 16 | #endif 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # plugins_JPSDR 2 | Merge of all avisynth plugins 3 | -------------------------------------------------------------------------------- /plugins_JPSDR - Readme.txt: -------------------------------------------------------------------------------- 1 | Version 3.5.0 2 | 3 | Merge of : 4 | AutoYUY2 4.1.10 5 | NNEDI3 0.9.4.65 6 | ResampleMT 2.5.1 7 | aWarSharpMT 2.1.10 8 | HDRTools : 1.0.6 9 | --------------------------------------------------------------------------------