├── .gitattributes ├── .gitignore ├── CHANGELOG.md ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake_uninstall.cmake.in └── src ├── JincResize.cpp ├── JincResize.h ├── JincResize.rc ├── resize_plane_avx2.cpp ├── resize_plane_avx512.cpp └── resize_plane_sse41.cpp /.gitattributes: -------------------------------------------------------------------------------- 1 | #sources 2 | *.c text 3 | *.cc text 4 | *.cxx text 5 | *.cpp text 6 | *.c++ text 7 | *.hpp text 8 | *.h text 9 | *.h++ text 10 | *.hh text 11 | 12 | # Compiled Object files 13 | *.slo binary 14 | *.lo binary 15 | *.o binary 16 | *.obj binary 17 | 18 | # Precompiled Headers 19 | *.gch binary 20 | *.pch binary 21 | 22 | # Compiled Dynamic libraries 23 | *.so binary 24 | *.dylib binary 25 | *.dll binary 26 | 27 | # Compiled Static libraries 28 | *.lai binary 29 | *.la binary 30 | *.a binary 31 | *.lib binary 32 | 33 | # Executables 34 | *.exe binary 35 | *.out binary 36 | *.app binary 37 | ############################################################################### 38 | # Set default behavior to automatically normalize line endings. 39 | ############################################################################### 40 | * text=auto 41 | 42 | ############################################################################### 43 | # Set the merge driver for project and solution files 44 | # 45 | # Merging from the command prompt will add diff markers to the files if there 46 | # are conflicts (Merging from VS is not affected by the settings below, in VS 47 | # the diff markers are never inserted). Diff markers may cause the following 48 | # file extensions to fail to load in VS. An alternative would be to treat 49 | # these files as binary and thus will always conflict and require user 50 | # intervention with every merge. To do so, just comment the entries below and 51 | # uncomment the group further below 52 | ############################################################################### 53 | 54 | #*.sln text eol=crlf 55 | #*.csproj text eol=crlf 56 | #*.vbproj text eol=crlf 57 | #*.vcxproj text eol=crlf 58 | #*.vcproj text eol=crlf 59 | #*.dbproj text eol=crlf 60 | #*.fsproj text eol=crlf 61 | #*.lsproj text eol=crlf 62 | #*.wixproj text eol=crlf 63 | #*.modelproj text eol=crlf 64 | #*.sqlproj text eol=crlf 65 | #*.wmaproj text eol=crlf 66 | 67 | #*.xproj text eol=crlf 68 | #*.props text eol=crlf 69 | #*.filters text eol=crlf 70 | #*.vcxitems text eol=crlf 71 | 72 | 73 | *.sln merge=binary 74 | *.csproj merge=binary 75 | *.vbproj merge=binary 76 | *.vcxproj merge=binary 77 | *.vcproj merge=binary 78 | *.dbproj merge=binary 79 | *.fsproj merge=binary 80 | *.lsproj merge=binary 81 | *.wixproj merge=binary 82 | *.modelproj merge=binary 83 | *.sqlproj merge=binary 84 | *.wwaproj merge=binary 85 | 86 | *.xproj merge=binary 87 | *.props merge=binary 88 | *.filters merge=binary 89 | *.vcxitems merge=binary 90 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Build results 17 | [Dd]ebug/ 18 | [Dd]ebugPublic/ 19 | [Rr]elease/ 20 | [Rr]eleases/ 21 | x64/ 22 | x86/ 23 | [Aa][Rr][Mm]/ 24 | [Aa][Rr][Mm]64/ 25 | bld/ 26 | [Bb]in/ 27 | [Oo]bj/ 28 | [Ll]og/ 29 | 30 | # Visual Studio 2015/2017 cache/options directory 31 | .vs/ 32 | # Uncomment if you have tasks that create the project's static files in wwwroot 33 | #wwwroot/ 34 | 35 | # Visual Studio 2017 auto generated files 36 | Generated\ Files/ 37 | 38 | # MSTest test Results 39 | [Tt]est[Rr]esult*/ 40 | [Bb]uild[Ll]og.* 41 | 42 | # NUNIT 43 | *.VisualState.xml 44 | TestResult.xml 45 | 46 | # Build Results of an ATL Project 47 | [Dd]ebugPS/ 48 | [Rr]eleasePS/ 49 | dlldata.c 50 | 51 | # Benchmark Results 52 | BenchmarkDotNet.Artifacts/ 53 | 54 | # .NET Core 55 | project.lock.json 56 | project.fragment.lock.json 57 | artifacts/ 58 | 59 | # StyleCop 60 | StyleCopReport.xml 61 | 62 | # Files built by Visual Studio 63 | *_i.c 64 | *_p.c 65 | *_h.h 66 | *.ilk 67 | *.meta 68 | *.obj 69 | *.iobj 70 | *.pch 71 | *.pdb 72 | *.ipdb 73 | *.pgc 74 | *.pgd 75 | *.rsp 76 | *.sbr 77 | *.tlb 78 | *.tli 79 | *.tlh 80 | *.tmp 81 | *.tmp_proj 82 | *_wpftmp.csproj 83 | *.log 84 | *.vspscc 85 | *.vssscc 86 | .builds 87 | *.pidb 88 | *.svclog 89 | *.scc 90 | 91 | # Chutzpah Test files 92 | _Chutzpah* 93 | 94 | # Visual C++ cache files 95 | ipch/ 96 | *.aps 97 | *.ncb 98 | *.opendb 99 | *.opensdf 100 | *.sdf 101 | *.cachefile 102 | *.VC.db 103 | *.VC.VC.opendb 104 | 105 | # Visual Studio profiler 106 | *.psess 107 | *.vsp 108 | *.vspx 109 | *.sap 110 | 111 | # Visual Studio Trace Files 112 | *.e2e 113 | 114 | # TFS 2012 Local Workspace 115 | $tf/ 116 | 117 | # Guidance Automation Toolkit 118 | *.gpState 119 | 120 | # ReSharper is a .NET coding add-in 121 | _ReSharper*/ 122 | *.[Rr]e[Ss]harper 123 | *.DotSettings.user 124 | 125 | # JustCode is a .NET coding add-in 126 | .JustCode 127 | 128 | # TeamCity is a build add-in 129 | _TeamCity* 130 | 131 | # DotCover is a Code Coverage Tool 132 | *.dotCover 133 | 134 | # AxoCover is a Code Coverage Tool 135 | .axoCover/* 136 | !.axoCover/settings.json 137 | 138 | # Visual Studio code coverage results 139 | *.coverage 140 | *.coveragexml 141 | 142 | # NCrunch 143 | _NCrunch_* 144 | .*crunch*.local.xml 145 | nCrunchTemp_* 146 | 147 | # MightyMoose 148 | *.mm.* 149 | AutoTest.Net/ 150 | 151 | # Web workbench (sass) 152 | .sass-cache/ 153 | 154 | # Installshield output folder 155 | [Ee]xpress/ 156 | 157 | # DocProject is a documentation generator add-in 158 | DocProject/buildhelp/ 159 | DocProject/Help/*.HxT 160 | DocProject/Help/*.HxC 161 | DocProject/Help/*.hhc 162 | DocProject/Help/*.hhk 163 | DocProject/Help/*.hhp 164 | DocProject/Help/Html2 165 | DocProject/Help/html 166 | 167 | # Click-Once directory 168 | publish/ 169 | 170 | # Publish Web Output 171 | *.[Pp]ublish.xml 172 | *.azurePubxml 173 | # Note: Comment the next line if you want to checkin your web deploy settings, 174 | # but database connection strings (with potential passwords) will be unencrypted 175 | *.pubxml 176 | *.publishproj 177 | 178 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 179 | # checkin your Azure Web App publish settings, but sensitive information contained 180 | # in these scripts will be unencrypted 181 | PublishScripts/ 182 | 183 | # NuGet Packages 184 | *.nupkg 185 | # The packages folder can be ignored because of Package Restore 186 | **/[Pp]ackages/* 187 | # except build/, which is used as an MSBuild target. 188 | !**/[Pp]ackages/build/ 189 | # Uncomment if necessary however generally it will be regenerated when needed 190 | #!**/[Pp]ackages/repositories.config 191 | # NuGet v3's project.json files produces more ignorable files 192 | *.nuget.props 193 | *.nuget.targets 194 | 195 | # Microsoft Azure Build Output 196 | csx/ 197 | *.build.csdef 198 | 199 | # Microsoft Azure Emulator 200 | ecf/ 201 | rcf/ 202 | 203 | # Windows Store app package directories and files 204 | AppPackages/ 205 | BundleArtifacts/ 206 | Package.StoreAssociation.xml 207 | _pkginfo.txt 208 | *.appx 209 | 210 | # Visual Studio cache files 211 | # files ending in .cache can be ignored 212 | *.[Cc]ache 213 | # but keep track of directories ending in .cache 214 | !*.[Cc]ache/ 215 | 216 | # Others 217 | ClientBin/ 218 | ~$* 219 | *~ 220 | *.dbmdl 221 | *.dbproj.schemaview 222 | *.jfm 223 | *.pfx 224 | *.publishsettings 225 | orleans.codegen.cs 226 | 227 | # Including strong name files can present a security risk 228 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 229 | #*.snk 230 | 231 | # Since there are multiple workflows, uncomment next line to ignore bower_components 232 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 233 | #bower_components/ 234 | # ASP.NET Core default setup: bower directory is configured as wwwroot/lib/ and bower restore is true 235 | **/wwwroot/lib/ 236 | 237 | # RIA/Silverlight projects 238 | Generated_Code/ 239 | 240 | # Backup & report files from converting an old project file 241 | # to a newer Visual Studio version. Backup files are not needed, 242 | # because we have git ;-) 243 | _UpgradeReport_Files/ 244 | Backup*/ 245 | UpgradeLog*.XML 246 | UpgradeLog*.htm 247 | ServiceFabricBackup/ 248 | *.rptproj.bak 249 | 250 | # SQL Server files 251 | *.mdf 252 | *.ldf 253 | *.ndf 254 | 255 | # Business Intelligence projects 256 | *.rdl.data 257 | *.bim.layout 258 | *.bim_*.settings 259 | *.rptproj.rsuser 260 | 261 | # Microsoft Fakes 262 | FakesAssemblies/ 263 | 264 | # GhostDoc plugin setting file 265 | *.GhostDoc.xml 266 | 267 | # Node.js Tools for Visual Studio 268 | .ntvs_analysis.dat 269 | node_modules/ 270 | 271 | # Visual Studio 6 build log 272 | *.plg 273 | 274 | # Visual Studio 6 workspace options file 275 | *.opt 276 | 277 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 278 | *.vbw 279 | 280 | # Visual Studio LightSwitch build output 281 | **/*.HTMLClient/GeneratedArtifacts 282 | **/*.DesktopClient/GeneratedArtifacts 283 | **/*.DesktopClient/ModelManifest.xml 284 | **/*.Server/GeneratedArtifacts 285 | **/*.Server/ModelManifest.xml 286 | _Pvt_Extensions 287 | 288 | # Paket dependency manager 289 | .paket/paket.exe 290 | paket-files/ 291 | 292 | # FAKE - F# Make 293 | .fake/ 294 | 295 | # JetBrains Rider 296 | .idea/ 297 | *.sln.iml 298 | 299 | # CodeRush personal settings 300 | .cr/personal 301 | 302 | # Python Tools for Visual Studio (PTVS) 303 | __pycache__/ 304 | *.pyc 305 | 306 | # Cake - Uncomment if you are using it 307 | # tools/** 308 | # !tools/packages.config 309 | 310 | # Tabs Studio 311 | *.tss 312 | 313 | # Telerik's JustMock configuration file 314 | *.jmconfig 315 | 316 | # BizTalk build output 317 | *.btp.cs 318 | *.btm.cs 319 | *.odx.cs 320 | *.xsd.cs 321 | 322 | # OpenCover UI analysis results 323 | OpenCover/ 324 | 325 | # Azure Stream Analytics local run output 326 | ASALocalRun/ 327 | 328 | # MSBuild Binary and Structured Log 329 | *.binlog 330 | 331 | # NVidia Nsight GPU debugger configuration file 332 | *.nvuser 333 | 334 | # MFractors (Xamarin productivity tool) working folder 335 | .mfractor/ 336 | 337 | # Local History for Visual Studio 338 | .localhistory/ 339 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ##### 2.1.4: 2 | Added parameters `initial_capacity`, `initial_factor`. 3 | Updated minimum AviSynth+ version to r3688. 4 | 5 | ##### 2.1.3: 6 | Fixed bug that can cause vertical lines. 7 | Reduced used memory (thanks DTL2020 for the ideas). 8 | Fixed JincXXXResize calling. 9 | Used AviStynh+ API changed from C++ to C. 10 | 11 | ##### 2.1.2: 12 | Set frame property `_ChromaLocation` only for 420, 422, 411 clips. 13 | 14 | ##### 2.1.1: 15 | Changed back the behavior of parameter `blur`. 16 | Set frame property `_ChromaLocation`. 17 | 18 | ##### 2.1.0: 19 | Added parameter cplace. 20 | Changed omp parallel execution to C++17 parallel execution (better speed). 21 | 22 | ##### 2.0.2: 23 | Fixed output for SIMD and threads > 1 24 | 25 | ##### 2.0.1: 26 | Used MSVC instead Intel C++ for faster binaries. 27 | 28 | ##### 2.0.0: 29 | Added OpenMP support to main processing loops. (DTL2020) 30 | Added parameter 'threads'. 31 | 32 | ##### 1.2.0: 33 | AVX-512 code is not used as default when AVX-512 CPU instructions are available. 34 | Fixed AVX-512 output. 35 | Prevent 'nan' values for the float input (SIMD). 36 | Fixed JincXXXResize parameters 'quant_x' and 'quant_y' when called by name. 37 | 38 | ##### 1.1.0: 39 | Added AVX-512 code. 40 | 41 | ##### 1.0.1: 42 | Fixed 8..16-bit processing when C++ routine is used. 43 | Changed blur parameter. 44 | Registered as MT_MULTI_INSTANCE. 45 | 46 | ##### 1.0.0: 47 | Port of the VapourSynth plugin JincResize r7.1. 48 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | 3 | if (NOT CMAKE_GENERATOR MATCHES "Visual Studio") 4 | if (NOT CMAKE_BUILD_TYPE) 5 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) 6 | endif() 7 | endif() 8 | 9 | project(JincResize LANGUAGES CXX) 10 | 11 | add_library(JincResize SHARED) 12 | 13 | target_sources(JincResize PRIVATE 14 | ${CMAKE_CURRENT_SOURCE_DIR}/src/JincResize.h 15 | ${CMAKE_CURRENT_SOURCE_DIR}/src/JincResize.cpp 16 | ${CMAKE_CURRENT_SOURCE_DIR}/src/resize_plane_sse41.cpp 17 | ${CMAKE_CURRENT_SOURCE_DIR}/src/resize_plane_avx2.cpp 18 | ${CMAKE_CURRENT_SOURCE_DIR}/src/resize_plane_avx512.cpp 19 | ) 20 | 21 | if (WIN32) 22 | target_sources(JincResize PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/JincResize.rc) 23 | endif() 24 | 25 | if (UNIX) 26 | target_include_directories(JincResize PRIVATE 27 | /usr/local/include/avisynth 28 | /usr/local/include 29 | ) 30 | endif() 31 | 32 | if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") 33 | target_link_libraries(JincResize PRIVATE libmmds) 34 | endif() 35 | 36 | if (NOT CMAKE_GENERATOR MATCHES "Visual Studio") 37 | string(TOLOWER ${CMAKE_BUILD_TYPE} build_type) 38 | if (build_type STREQUAL Debug) 39 | target_compile_definitions(JincResize PRIVATE DEBUG_BUILD) 40 | else (build_type STREQUAL Release) 41 | target_compile_definitions(JincResize PRIVATE RELEASE_BUILD) 42 | endif() 43 | 44 | if (NOT MSVC) 45 | target_compile_options(JincResize PRIVATE $<$:-s>) 46 | endif() 47 | 48 | message(STATUS "Build type - ${CMAKE_BUILD_TYPE}") 49 | endif() 50 | 51 | if (MSVC) 52 | set_source_files_properties(src/resize_plane_avx2.cpp PROPERTIES COMPILE_OPTIONS "/arch:AVX2") 53 | set_source_files_properties(src/resize_plane_avx512.cpp PROPERTIES COMPILE_OPTIONS "/arch:AVX512") 54 | if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") 55 | set_source_files_properties(src/resize_plane_sse41.cpp PROPERTIES COMPILE_OPTIONS "-mfpmath=sse;-msse4.1") 56 | endif() 57 | else() 58 | set_source_files_properties(src/resize_plane_sse41.cpp PROPERTIES COMPILE_OPTIONS "-mfpmath=sse;-msse4.1") 59 | set_source_files_properties(src/resize_plane_avx2.cpp PROPERTIES COMPILE_OPTIONS "-mavx2;-mfma") 60 | set_source_files_properties(src/resize_plane_avx512.cpp PROPERTIES COMPILE_OPTIONS "-mavx512f;-mavx512bw;-mavx512dq;-mavx512vl;-mfma") 61 | endif() 62 | 63 | target_link_libraries(JincResize PRIVATE avisynth) 64 | 65 | target_compile_features(JincResize PRIVATE cxx_std_17) 66 | 67 | if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") 68 | target_compile_options(JincResize PRIVATE "/fp:precise") 69 | endif() 70 | 71 | if (UNIX) 72 | find_package (Git) 73 | 74 | if (GIT_FOUND) 75 | execute_process (COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 76 | OUTPUT_VARIABLE ver 77 | OUTPUT_STRIP_TRAILING_WHITESPACE 78 | ) 79 | set_target_properties(JincResize PROPERTIES OUTPUT_NAME "jincresize.${ver}") 80 | else () 81 | message (STATUS "GIT not found") 82 | endif () 83 | 84 | include(GNUInstallDirs) 85 | 86 | INSTALL(TARGETS JincResize LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/avisynth") 87 | 88 | # uninstall target 89 | if(NOT TARGET uninstall) 90 | configure_file( 91 | "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in" 92 | "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" 93 | IMMEDIATE @ONLY) 94 | 95 | add_custom_target(uninstall 96 | COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) 97 | endif() 98 | endif() 99 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Lypheo 4 | Copyright (c) 2019-2020 Kiyamou 5 | Copyright (c) 2020 luglio 6 | Copyright (c) 2020-2025 Asd-g 7 | Copyright (c) 2020 DTL2020 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | Jinc (EWA Lanczos) resampling plugin for AviSynth 2.6 / AviSynth+. 4 | 5 | This is [a port of the VapourSynth plugin JincResize](https://github.com/Kiyamou/VapourSynth-JincResize). 6 | 7 | SSE / AVX Intrinsics taken from [the other AviSynth plugin JincResize](https://github.com/AviSynth/jinc-resize). 8 | 9 | NOTE: The 32-bit version is not supported. If you still want to use it keep in mind that the OS memory limit can be easily hit. (#10) 10 | 11 | ### Requirements: 12 | 13 | - AviSynth+ r3688 or later ([1](https://github.com/AviSynth/AviSynthPlus/releases) / [2](https://forum.doom9.org/showthread.php?t=181351) / [3](https://gitlab.com/uvz/AviSynthPlus-Builds)) 14 | 15 | - Microsoft VisualC++ Redistributable Package 2022 (can be downloaded from [here](https://github.com/abbodi1406/vcredist/releases)) 16 | 17 | ### Usage: 18 | 19 | ``` 20 | JincResize (clip, int target_width, int target_height, float "src_left", float "src_top", float "src_width", float "src_height", int "quant_x", int "quant_y", int "tap", float "blur", string "cplace", int "threads", int "opt", int "initial_capacity", float "initial_factor") 21 | ``` 22 | 23 | ##### There are 4 additional functions: 24 | Jinc36Resize is an alias for JincResize(tap=3). 25 | Jinc64Resize is an alias for JincResize(tap=4). 26 | Jinc144Resize is an alias for JincResize(tap=6). 27 | Jinc256Resize is an alias for JincResize(tap=8). 28 | 29 | ``` 30 | Jinc36Resize / Jinc64Resize / Jinc144Resize / Jinc256Resize (clip, int target_width, int target_height, float "src_left", float "src_top", float "src_width", float "src_height", int "quant_x", int "quant_y", string "cplace", int "threads") 31 | ``` 32 | 33 | ### Parameters: 34 | 35 | - clip
36 | A clip to process. All planar formats are supported. 37 | 38 | - target_width
39 | The width of the output. 40 | 41 | - target_height
42 | The height of the output. 43 | 44 | - src_left
45 | Cropping of the left edge.
46 | Default: 0.0. 47 | 48 | - src_top
49 | Cropping of the top edge.
50 | Default: 0.0. 51 | 52 | - src_width
53 | If > 0.0 it sets the width of the clip before resizing.
54 | If <= 0.0 it sets the cropping of the right edges before resizing.
55 | Default: Source width. 56 | 57 | - src_height
58 | If > 0.0 it sets the height of the clip before resizing.
59 | If <= 0.0 it sets the cropping of the bottom edges before resizing.
60 | Default: Source height. 61 | 62 | - quant_x, quant_y
63 | Controls the sub-pixel quantization.
64 | Must be between 1 and 256.
65 | Default: 256. 66 | 67 | - tap (JincResize only)
68 | Corresponding to different zero points of Jinc function.
69 | Must be between 1 and 16.
70 | Default: 3. 71 | 72 | - blur (JincResize only)
73 | Blur processing, it can reduce side effects.
74 | To achieve blur, the value should be less than 1.0.
75 | Default: 1.0. 76 | 77 | - threads
78 | Whether to use maximum logical processors.
79 | 0: Maximum logical processors are used.
80 | 1: Only one thread is used.
81 | Default: 0. 82 | 83 | - cplace
84 | The location of the chroma samples.
85 | "MPEG1": Chroma samples are located on the center of each group of 4 pixels.
86 | "MPEG2": Chroma samples are located on the left pixel column of the group.
87 | "topleft": Chroma samples are located on the left pixel column and the first row of the group.
88 | Default: If frame properties are supported and frame property "_ChromaLocation" exists - "_ChromaLocation" value of the first frame is used. 89 | If frame properties aren't supported or there is no property "_ChromaLocation" - "MPEG2". 90 | 91 | - opt (JincResize only)
92 | Sets which cpu optimizations to use.
93 | -1: Auto-detect without AVX-512.
94 | 0: Use C++ code.
95 | 1: Use SSE4.1 code.
96 | 2: Use AVX2 code.
97 | 3: Use AVX-512 code.
98 | Default: -1. 99 | 100 | - initial_capacity (JincResize only)
101 | Initial memory allocation size.
102 | Lower size forces more further memory reallocating that leads to initial slower startup but avoids excessive memory allocation.
103 | Must be greater than 0.
104 | Default: Max(target_width * target_height, src_width * src_height). 105 | 106 | - initial_factor (JincResize only)
107 | The initial factor used for the first memory reallocation.
108 | After the first memory reallocation the factor starts to lower for the next reallocations.
109 | `initial_factor=1` ensures that the next memory allocation is the minimal possible.
110 | Must be equal to or greater than 1.0.
111 | Default: 1.5. 112 | 113 | ### Building: 114 | 115 | ``` 116 | Requirements: 117 | - Git 118 | - C++17 compiler 119 | - CMake >= 3.16 120 | - Ninja 121 | ``` 122 | 123 | ``` 124 | git clone https://github.com/Asd-g/AviSynth-JincResize && \ 125 | cd AviSynth-JincResize 126 | cmake -B build -G Ninja 127 | ninja -C build 128 | ``` 129 | 130 | Example of building on Windows with MSVC: 131 | 132 | 1. Open x64 Native Tools Command Prompt for VS xxxx. 133 | 2. Type - `set LIB=%LIB%;path_to_avisynth.lib` 134 | 3. Navigate to the jincresize source folder. 135 | 4. Type - `cmake -B name_of_the_folder_containing_building_files -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=path_to_avisynth_c.h` 136 | 137 | By default Visual Studio solution files will be created. 138 | 139 | Example of building on Windows with Mingw: 140 | 141 | 1. Open UCRT64/MINGW64 shell. 142 | 2. Type - `export LIBRARY_PATH=$LIBRARY_PATH:path_to_the_avs_lib` 143 | 3. Navigate to the jincresize source folder. 144 | 4. Type - `cmake -B name_of_the_folder_containing_building_files -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=path_to_avisynth_c.h` 145 | -------------------------------------------------------------------------------- /cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | if(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt") 2 | message(FATAL_ERROR "Cannot find install manifest: @CMAKE_BINARY_DIR@/install_manifest.txt") 3 | endif() 4 | 5 | file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files) 6 | string(REGEX REPLACE "\n" ";" files "${files}") 7 | foreach(file ${files}) 8 | message(STATUS "Uninstalling $ENV{DESTDIR}${file}") 9 | if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 10 | exec_program( 11 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 12 | OUTPUT_VARIABLE rm_out 13 | RETURN_VALUE rm_retval 14 | ) 15 | if(NOT "${rm_retval}" STREQUAL 0) 16 | message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") 17 | endif() 18 | else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 19 | message(STATUS "File $ENV{DESTDIR}${file} does not exist.") 20 | endif() 21 | endforeach() 22 | -------------------------------------------------------------------------------- /src/JincResize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "JincResize.h" 5 | 6 | AVS_FORCEINLINE void* aligned_malloc(size_t size, size_t align) 7 | { 8 | void* result = [&]() 9 | { 10 | #ifdef _WIN32 11 | return _aligned_malloc(size, align); 12 | #else 13 | if (posix_memalign(&result, align, size)) 14 | return result = nullptr; 15 | else 16 | return result; 17 | #endif 18 | }(); 19 | 20 | return result; 21 | } 22 | 23 | AVS_FORCEINLINE void aligned_free(void* ptr) 24 | { 25 | #ifdef _WIN32 26 | _aligned_free(ptr); 27 | #else 28 | free(ptr); 29 | #endif 30 | } 31 | 32 | static AVS_FORCEINLINE unsigned portable_clz(size_t x) 33 | { 34 | #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) 35 | unsigned long index; 36 | return (_BitScanReverse(&index, static_cast(x))) ? (31 - index) : 32; 37 | #elif defined(_WIN32) && defined(_M_ARM64) 38 | return static_cast(__clz(static_cast(x))); 39 | #else 40 | return (x == 0) ? 32 : __builtin_clz(static_cast(x)); 41 | #endif 42 | } 43 | 44 | #ifndef M_PI // GCC seems to have it 45 | static constexpr double M_PI = 3.14159265358979323846; 46 | #endif 47 | 48 | // Taylor series coefficients of 2*BesselJ1(pi*x)/(pi*x) as (x^2) -> 0 49 | static constexpr double jinc_taylor_series[31] = 50 | { 51 | 1.0, 52 | -1.23370055013616982735431137, 53 | 0.507339015802096027273126733, 54 | -0.104317403816764804365258186, 55 | 0.0128696438477519721233840271, 56 | -0.00105848577966854543020422691, 57 | 6.21835470803998638484476598e-05, 58 | -2.73985272294670461142756204e-06, 59 | 9.38932725442064547796003405e-08, 60 | -2.57413737759717407304931036e-09, 61 | 5.77402672521402031756429343e-11, 62 | -1.07930605263598241754572977e-12, 63 | 1.70710316782347356046974552e-14, 64 | -2.31434518382749184406648762e-16, 65 | 2.71924659665997312120515390e-18, 66 | -2.79561335187943028518083529e-20, 67 | 2.53599244866299622352138464e-22, 68 | -2.04487273140961494085786452e-24, 69 | 1.47529860450204338866792475e-26, 70 | -9.57935105257523453155043307e-29, 71 | 5.62764317309979254140393917e-31, 72 | -3.00555258814860366342363867e-33, 73 | 1.46559362903641161989338221e-35, 74 | -6.55110024064596600335624426e-38, 75 | 2.69403199029404093412381643e-40, 76 | -1.02265499954159964097119923e-42, 77 | 3.59444454568084324694180635e-45, 78 | -1.17313973900539982313119019e-47, 79 | 3.56478606255557746426034301e-50, 80 | -1.01100655781438313239513538e-52, 81 | 2.68232117541264485328658605e-55 82 | }; 83 | 84 | static constexpr double jinc_zeros[16] = 85 | { 86 | 1.2196698912665045, 87 | 2.2331305943815286, 88 | 3.2383154841662362, 89 | 4.2410628637960699, 90 | 5.2427643768701817, 91 | 6.2439216898644877, 92 | 7.2447598687199570, 93 | 8.2453949139520427, 94 | 9.2458926849494673, 95 | 10.246293348754916, 96 | 11.246622794877883, 97 | 12.246898461138105, 98 | 13.247132522181061, 99 | 14.247333735806849, 100 | 15.247508563037300, 101 | 16.247661874700962 102 | }; 103 | 104 | // Modified from boost package math/tools/`rational.hpp` 105 | // 106 | // (C) Copyright John Maddock 2006. 107 | // Use, modification and distribution are subject to the 108 | // Boost Software License, Version 1.0. (See accompanying file 109 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 110 | static double evaluate_rational(const double* num, const double* denom, double z, int count) 111 | { 112 | double s1, s2; 113 | if (z <= 1.0) 114 | { 115 | s1 = num[count - 1]; 116 | s2 = denom[count - 1]; 117 | for (auto i = count - 2; i >= 0; --i) 118 | { 119 | s1 *= z; 120 | s2 *= z; 121 | s1 += num[i]; 122 | s2 += denom[i]; 123 | } 124 | } 125 | else 126 | { 127 | z = 1.0f / z; 128 | s1 = num[0]; 129 | s2 = denom[0]; 130 | for (auto i = 1; i < count; ++i) 131 | { 132 | s1 *= z; 133 | s2 *= z; 134 | s1 += num[i]; 135 | s2 += denom[i]; 136 | } 137 | } 138 | 139 | return s1 / s2; 140 | } 141 | 142 | // Modified from boost package `BesselJ1.hpp` 143 | // 144 | // Copyright (c) 2006 Xiaogang Zhang 145 | // Use, modification and distribution are subject to the 146 | // Boost Software License, Version 1.0. (See accompanying file 147 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 148 | static double jinc_sqr_boost_l(double x2) 149 | { 150 | constexpr double bPC[7] = 151 | { 152 | -4.4357578167941278571e+06, 153 | -9.9422465050776411957e+06, 154 | -6.6033732483649391093e+06, 155 | -1.5235293511811373833e+06, 156 | -1.0982405543459346727e+05, 157 | -1.6116166443246101165e+03, 158 | 0.0 159 | }; 160 | constexpr double bQC[7] = 161 | { 162 | -4.4357578167941278568e+06, 163 | -9.9341243899345856590e+06, 164 | -6.5853394797230870728e+06, 165 | -1.5118095066341608816e+06, 166 | -1.0726385991103820119e+05, 167 | -1.4550094401904961825e+03, 168 | 1.0 169 | }; 170 | constexpr double bPS[7] = 171 | { 172 | 3.3220913409857223519e+04, 173 | 8.5145160675335701966e+04, 174 | 6.6178836581270835179e+04, 175 | 1.8494262873223866797e+04, 176 | 1.7063754290207680021e+03, 177 | 3.5265133846636032186e+01, 178 | 0.0 179 | }; 180 | constexpr double bQS[7] = 181 | { 182 | 7.0871281941028743574e+05, 183 | 1.8194580422439972989e+06, 184 | 1.4194606696037208929e+06, 185 | 4.0029443582266975117e+05, 186 | 3.7890229745772202641e+04, 187 | 8.6383677696049909675e+02, 188 | 1.0 189 | }; 190 | 191 | const auto y2 = M_PI * M_PI * x2; 192 | const auto xp = sqrt(y2); 193 | const auto y2p = 64.0 / y2; 194 | const auto sx = sin(xp); 195 | const auto cx = cos(xp); 196 | 197 | return (sqrt(xp / M_PI) * 2.0 / y2) * (evaluate_rational(bPC, bQC, y2p, 7) * (sx - cx) + (8.0 / xp) * evaluate_rational(bPS, bQS, y2p, 7) * (sx + cx)); 198 | } 199 | 200 | // jinc(sqrt(x2)) 201 | static double jinc_sqr(double x2) 202 | { 203 | if (x2 < 1.49) // the 1-tap radius 204 | { 205 | double res = 0.0; 206 | for (auto j = 16; j > 0; --j) 207 | res = res * x2 + jinc_taylor_series[j - 1]; 208 | return res; 209 | } 210 | else if (x2 < 4.97) // the 2-tap radius 211 | { 212 | double res = 0.0; 213 | for (auto j = 21; j > 0; --j) 214 | res = res * x2 + jinc_taylor_series[j - 1]; 215 | return res; 216 | } 217 | else if (x2 < 10.49) // the 3-tap radius 218 | { 219 | double res = 0.0; 220 | for (auto j = 26; j > 0; --j) 221 | res = res * x2 + jinc_taylor_series[j - 1]; 222 | return res; 223 | } 224 | else if (x2 < 17.99) // the 4-tap radius 225 | { 226 | double res = 0.0; 227 | for (auto j = 31; j > 0; --j) 228 | res = res * x2 + jinc_taylor_series[j - 1]; 229 | return res; 230 | } 231 | else if (x2 < 52.57) // the 5~7-tap radius 232 | { 233 | const auto x = M_PI * sqrt(x2); 234 | return 2.0 * std::cyl_bessel_j(1, x) / x; 235 | } 236 | else if (x2 < 68.07) // the 8-tap radius // Modify from pull request #4 237 | { 238 | return jinc_sqr_boost_l(x2); 239 | } 240 | else // the 9~16-tap radius 241 | { 242 | const auto x = M_PI * sqrt(x2); 243 | return 2.0 * std::cyl_bessel_j(1, x) / x; 244 | } 245 | } 246 | 247 | static double sample_sqr(double (*filter)(double), double x2, double blur2, double radius2) 248 | { 249 | if (blur2 > 0.0) 250 | x2 /= blur2; 251 | 252 | if (x2 < radius2) 253 | return filter(x2); 254 | 255 | return 0.0; 256 | } 257 | 258 | constexpr double JINC_ZERO_SQR = 1.48759464366204680005356; 259 | 260 | Lut::Lut() 261 | { 262 | lut = new double[lut_size]; 263 | } 264 | 265 | void Lut::InitLut(int lut_size, double radius, double blur) 266 | { 267 | const auto radius2 = radius * radius; 268 | const auto blur2 = blur * blur; 269 | 270 | for (auto i = 0; i < lut_size; ++i) 271 | { 272 | const auto t2 = i / (lut_size - 1.0); 273 | lut[i] = sample_sqr(jinc_sqr, radius2 * t2, blur2, radius2) * sample_sqr(jinc_sqr, JINC_ZERO_SQR * t2, 1.0, radius2); 274 | } 275 | } 276 | 277 | float Lut::GetFactor(int index) 278 | { 279 | if (index >= lut_size) 280 | return 0.f; 281 | return static_cast(lut[index]); 282 | } 283 | 284 | constexpr double DOUBLE_ROUND_MAGIC_NUMBER = 6755399441055744.0; 285 | 286 | static void init_coeff_table(EWAPixelCoeff* out, int quantize_x, int quantize_y, 287 | int filter_size, int dst_width, int dst_height) 288 | { 289 | out->filter_size = filter_size; 290 | out->coeff_stride = (filter_size + 15) & ~15; 291 | 292 | // Allocate metadata 293 | out->meta = new EWAPixelCoeffMeta[static_cast(dst_width) * dst_height]; 294 | 295 | // Alocate factor map 296 | out->factor_map = new int[static_cast(quantize_x) * quantize_y]; 297 | 298 | // This will be reserved to exact size in coff generating procedure 299 | out->factor = nullptr; 300 | 301 | // Zeroed memory 302 | if (out->factor_map != nullptr) 303 | memset(out->factor_map, 0, static_cast(quantize_x) * quantize_y * sizeof(int)); 304 | 305 | memset(out->meta, 0, static_cast(dst_width) * dst_height * sizeof(EWAPixelCoeffMeta)); 306 | } 307 | 308 | static void delete_coeff_table(EWAPixelCoeff* out) 309 | { 310 | aligned_free(out->factor); 311 | delete[] out->meta; 312 | delete[] out->factor_map; 313 | } 314 | 315 | struct generate_coeff_params 316 | { 317 | Lut* func; 318 | EWAPixelCoeff* out; 319 | int quantize_x; 320 | int quantize_y; 321 | int samples; 322 | int src_width; 323 | int src_height; 324 | int dst_width; 325 | int dst_height; 326 | double radius; 327 | double crop_left; 328 | double crop_top; 329 | double crop_width; 330 | double crop_height; 331 | int initial_capacity; 332 | double initial_factor; 333 | }; 334 | 335 | /* Coefficient table generation */ 336 | static void generate_coeff_table_c(const generate_coeff_params& params) 337 | { 338 | Lut* func = params.func; 339 | EWAPixelCoeff* out = params.out; 340 | int quantize_x = params.quantize_x; 341 | int quantize_y = params.quantize_y; 342 | int samples = params.samples; 343 | int src_width = params.src_width; 344 | int src_height = params.src_height; 345 | int dst_width = params.dst_width; 346 | int dst_height = params.dst_height; 347 | double radius = params.radius; 348 | 349 | const double filter_step_x = min(static_cast(dst_width) / params.crop_width, 1.0); 350 | const double filter_step_y = min(static_cast(dst_height) / params.crop_height, 1.0); 351 | 352 | const float filter_support_x = static_cast(radius / filter_step_x); 353 | const float filter_support_y = static_cast(radius / filter_step_y); 354 | 355 | const float filter_support = max(filter_support_x, filter_support_y); 356 | const int filter_size = max(static_cast(ceil(filter_support_x * 2.0)), static_cast(ceil(filter_support_y * 2.0))); 357 | 358 | const float start_x = static_cast(params.crop_left + (params.crop_width / dst_width - 1.0) / 2.0); 359 | 360 | const float x_step = static_cast(params.crop_width / dst_width); 361 | const float y_step = static_cast(params.crop_height / dst_height); 362 | 363 | float xpos = start_x; 364 | float ypos = static_cast(params.crop_top + (params.crop_height - dst_height) / (dst_height * static_cast(2))); 365 | 366 | // Initialize EWAPixelCoeff data structure 367 | init_coeff_table(out, quantize_x, quantize_y, filter_size, dst_width, dst_height); 368 | 369 | size_t tmp_array_capacity = params.initial_capacity; 370 | float* tmp_array = static_cast(aligned_malloc(tmp_array_capacity * sizeof(float), 64)); 371 | if (!tmp_array) 372 | throw "JincResize: failed to allocate tmp_array."; 373 | size_t tmp_array_size = 0; 374 | int tmp_array_top = 0; 375 | unsigned base_clz = portable_clz(tmp_array_capacity); 376 | const double initial_growth_factor = params.initial_factor; 377 | const double radius2 = radius * radius; 378 | 379 | // Use to advance the coeff pointer 380 | const int coeff_per_pixel = out->coeff_stride * filter_size; 381 | 382 | for (int y = 0; y < dst_height; ++y) 383 | { 384 | for (int x = 0; x < dst_width; ++x) 385 | { 386 | bool is_border = false; 387 | 388 | EWAPixelCoeffMeta* meta = &out->meta[y * dst_width + x]; 389 | 390 | // Here, the window_*** variable specified a begin/size/end 391 | // of EWA window to process. 392 | int window_end_x = static_cast(xpos + filter_support); 393 | int window_end_y = static_cast(ypos + filter_support); 394 | 395 | if (window_end_x >= src_width) 396 | { 397 | window_end_x = src_width - 1; 398 | is_border = true; 399 | } 400 | if (window_end_y >= src_height) 401 | { 402 | window_end_y = src_height - 1; 403 | is_border = true; 404 | } 405 | 406 | int window_begin_x = window_end_x - filter_size + 1; 407 | int window_begin_y = window_end_y - filter_size + 1; 408 | 409 | if (window_begin_x < 0) 410 | { 411 | window_begin_x = 0; 412 | is_border = true; 413 | } 414 | if (window_begin_y < 0) 415 | { 416 | window_begin_y = 0; 417 | is_border = true; 418 | } 419 | 420 | meta->start_x = window_begin_x; 421 | meta->start_y = window_begin_y; 422 | 423 | // Quantize xpos and ypos 424 | const int quantized_x_int = static_cast(xpos * quantize_x); 425 | const int quantized_y_int = static_cast(ypos * quantize_y); 426 | const int quantized_x_value = quantized_x_int % quantize_x; 427 | const int quantized_y_value = quantized_y_int % quantize_y; 428 | const float quantized_xpos = static_cast(quantized_x_int) / quantize_x; 429 | const float quantized_ypos = static_cast(quantized_y_int) / quantize_y; 430 | 431 | if (!is_border && out->factor_map[quantized_y_value * quantize_x + quantized_x_value] != 0) 432 | { 433 | // Not border pixel and already have coefficient calculated at this quantized position 434 | meta->coeff_meta = out->factor_map[quantized_y_value * quantize_x + quantized_x_value] - 1; 435 | } 436 | else 437 | { 438 | // then need computation 439 | float divider = 0.f; 440 | 441 | // This is the location of current target pixel in source pixel 442 | // Quantized 443 | //const float current_x = clamp(is_border ? xpos : quantized_xpos, 0.f, src_width - 1.f); 444 | //const float current_y = clamp(is_border ? ypos : quantized_ypos, 0.f, src_height - 1.f); 445 | 446 | if (!is_border) 447 | { 448 | // Change window position to quantized position 449 | window_begin_x = static_cast(quantized_xpos + filter_support) - filter_size + 1; 450 | window_begin_y = static_cast(quantized_ypos + filter_support) - filter_size + 1; 451 | } 452 | 453 | // Windowing positon 454 | int window_x = window_begin_x; 455 | int window_y = window_begin_y; 456 | 457 | // First loop calcuate coeff 458 | const size_t new_size = tmp_array_size + coeff_per_pixel; 459 | if (new_size > tmp_array_capacity) 460 | { 461 | size_t new_capacity = tmp_array_capacity * (1.0 + (initial_growth_factor - 1.0) 462 | * (1.0 - static_cast(max(0, static_cast(base_clz - portable_clz(tmp_array_capacity)))) / 32.0)); 463 | if (new_capacity < new_size) 464 | new_capacity = new_size; 465 | float* new_tmp = static_cast(aligned_malloc(new_capacity * sizeof(float), 64)); 466 | if (!new_tmp) 467 | { 468 | aligned_free(tmp_array); 469 | throw "JincResize: failed to allocate new_tmp."; 470 | } 471 | memcpy(new_tmp, tmp_array, tmp_array_size * sizeof(float)); 472 | aligned_free(tmp_array); 473 | tmp_array = new_tmp; 474 | tmp_array_capacity = new_capacity; 475 | } 476 | memset(tmp_array + tmp_array_size, 0, coeff_per_pixel * sizeof(float)); 477 | int curr_factor_ptr = tmp_array_top; 478 | tmp_array_size = new_size; 479 | 480 | for (int ly = 0; ly < filter_size; ++ly) 481 | { 482 | for (int lx = 0; lx < filter_size; ++lx) 483 | { 484 | // Euclidean distance to sampling pixel 485 | const double dx = (clamp(is_border ? xpos : quantized_xpos, 0.f, static_cast(src_width - 1)) - window_x) * filter_step_x; 486 | const double dy = (clamp(is_border ? ypos : quantized_ypos, 0.f, static_cast(src_height - 1)) - window_y) * filter_step_y; 487 | 488 | int index = static_cast(llround((samples - 1) * (dx * dx + dy * dy) / radius2 + DOUBLE_ROUND_MAGIC_NUMBER)); 489 | 490 | const float factor = func->GetFactor(index); 491 | 492 | tmp_array[curr_factor_ptr + static_cast(lx)] = factor; 493 | divider += factor; 494 | 495 | ++window_x; 496 | } 497 | 498 | curr_factor_ptr += out->coeff_stride; 499 | 500 | window_x = window_begin_x; 501 | ++window_y; 502 | } 503 | 504 | // Second loop to divide the coeff 505 | curr_factor_ptr = tmp_array_top; 506 | for (int ly = 0; ly < filter_size; ++ly) 507 | { 508 | for (int lx = 0; lx < filter_size; ++lx) 509 | { 510 | tmp_array[curr_factor_ptr + static_cast(lx)] /= divider; 511 | } 512 | 513 | curr_factor_ptr += out->coeff_stride; 514 | } 515 | 516 | // Save factor to table 517 | if (!is_border) 518 | out->factor_map[quantized_y_value * quantize_x + quantized_x_value] = tmp_array_top + 1; 519 | 520 | meta->coeff_meta = tmp_array_top; 521 | tmp_array_top += coeff_per_pixel; 522 | } 523 | 524 | xpos += x_step; 525 | } 526 | 527 | ypos += y_step; 528 | xpos = start_x; 529 | } 530 | 531 | // Copy from tmp_array to real array 532 | out->factor = tmp_array; 533 | } 534 | 535 | /* Planar resampling with coeff table */ 536 | template 537 | void JincResize::resize_plane_c(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi) 538 | { 539 | const int planes_y[4] = { AVS_PLANAR_Y, AVS_PLANAR_U, AVS_PLANAR_V, AVS_PLANAR_A }; 540 | const int planes_r[4] = { AVS_PLANAR_G, AVS_PLANAR_B, AVS_PLANAR_R, AVS_PLANAR_A }; 541 | const int* current_planes = (avs_is_rgb(vi)) ? planes_r : planes_y; 542 | for (int i = 0; i < planecount; ++i) 543 | { 544 | const int plane = current_planes[i]; 545 | 546 | const int src_stride = avs_get_pitch_p(src, plane) / sizeof(T); 547 | const int dst_stride = avs_get_pitch_p(dst, plane) / sizeof(T); 548 | const int dst_width = avs_get_row_size_p(dst, plane) / sizeof(T); 549 | const int dst_height = avs_get_height_p(dst, plane); 550 | const T* srcp = reinterpret_cast(avs_get_read_ptr_p(src, plane)); 551 | 552 | EWAPixelCoeff* out = [&]() 553 | { 554 | if constexpr (subsampled) 555 | return (i) ? (i == 3) ? JincResize::out[0] : JincResize::out[1] : JincResize::out[0]; 556 | else 557 | return JincResize::out[0]; 558 | }(); 559 | 560 | auto loop = [&](int y) 561 | { 562 | T* __restrict dstp = reinterpret_cast(avs_get_write_ptr_p(dst, plane)) + static_cast(y) * dst_stride; 563 | 564 | for (int x = 0; x < dst_width; ++x) 565 | { 566 | EWAPixelCoeffMeta* meta = out->meta + static_cast(y) * dst_width + x; 567 | const T* src_ptr = srcp + meta->start_y * static_cast(src_stride) + meta->start_x; 568 | const float* coeff_ptr = out->factor + meta->coeff_meta; 569 | 570 | float result = 0.f; 571 | 572 | for (int ly = 0; ly < out->filter_size; ++ly) 573 | { 574 | for (int lx = 0; lx < out->filter_size; ++lx) 575 | result += src_ptr[lx] * coeff_ptr[lx]; 576 | 577 | coeff_ptr += out->coeff_stride; 578 | src_ptr += src_stride; 579 | } 580 | 581 | if constexpr (std::is_integral_v) 582 | dstp[x] = static_cast(lrintf(clamp(result, 0.f, peak))); 583 | else 584 | dstp[x] = result; 585 | 586 | } 587 | }; 588 | 589 | if constexpr (thr) 590 | { 591 | for (intptr_t i = 0; i < dst_height; ++i) 592 | loop(i); 593 | } 594 | else 595 | { 596 | std::vector l(dst_height); 597 | std::iota(std::begin(l), std::end(l), 0); 598 | std::for_each(std::execution::par, std::begin(l), std::end(l), loop); 599 | } 600 | } 601 | } 602 | 603 | static AVS_VideoFrame* AVSC_CC JincResize_GetFrame(AVS_FilterInfo* fi, int n) 604 | { 605 | JincResize* d = reinterpret_cast(fi->user_data); 606 | AVS_ScriptEnvironment* env = fi->env; 607 | AVS_VideoInfo* vi = &fi->vi; 608 | 609 | AVS_VideoFrame* src = avs_get_frame(fi->child, n); 610 | if (!src) 611 | return nullptr; 612 | 613 | AVS_VideoFrame* dst = avs_new_video_frame_p(env, vi, src); 614 | 615 | (d->*d->process_frame)(src, dst, vi); 616 | 617 | if ((avs_is_420(vi) || avs_is_422(vi) || avs_is_yv411(vi))) 618 | { 619 | if (d->cplace == "mpeg2") 620 | avs_prop_set_int(env, avs_get_frame_props_rw(env, dst), "_ChromaLocation", 0, 0); 621 | else if (d->cplace == "mpeg1") 622 | avs_prop_set_int(env, avs_get_frame_props_rw(env, dst), "_ChromaLocation", 1, 0); 623 | else 624 | avs_prop_set_int(env, avs_get_frame_props_rw(env, dst), "_ChromaLocation", 2, 0); 625 | } 626 | 627 | avs_release_video_frame(src); 628 | 629 | return dst; 630 | } 631 | 632 | static void AVSC_CC free_JincResize(AVS_FilterInfo* fi) 633 | { 634 | JincResize* d = reinterpret_cast(fi->user_data); 635 | std::vector* out = &d->out; 636 | 637 | for (int i = 0; i < static_cast(out->size()); ++i) 638 | { 639 | delete_coeff_table((*out)[i]); 640 | delete (*out)[i]; 641 | } 642 | 643 | delete[] d->init_lut->lut; 644 | delete d->init_lut; 645 | 646 | delete d; 647 | } 648 | 649 | static int AVSC_CC set_cache_hints_JincResize(AVS_FilterInfo* fi, int cachehints, int frame_range) 650 | { 651 | return cachehints == AVS_CACHE_GET_MTMODE ? 2 : 0; 652 | } 653 | 654 | static AVS_Value AVSC_CC Create_JincResize(AVS_ScriptEnvironment* env, AVS_Value args, void* param) 655 | { 656 | enum 657 | { 658 | Clip, 659 | Target_width, 660 | Target_height, 661 | Src_left, 662 | Src_top, 663 | Src_width, 664 | Src_height, 665 | Quant_x, 666 | Quant_y, 667 | Tap, 668 | Blur, 669 | Cplace, 670 | Threads, 671 | Opt, 672 | Initial_capacity, 673 | Initial_factor 674 | }; 675 | 676 | JincResize* d = reinterpret_cast(new JincResize()); 677 | 678 | AVS_FilterInfo* fi; 679 | AVS_Clip* clip = avs_new_c_filter(env, &fi, avs_array_elt(args, Clip), 1); 680 | AVS_VideoInfo* vi = &fi->vi; 681 | 682 | const auto set_error = [&](AVS_Clip* clip, const char* msg) 683 | { 684 | avs_release_clip(clip); 685 | 686 | return avs_new_value_error(msg); 687 | }; 688 | 689 | if (!avs_check_version(env, 9)) 690 | { 691 | if (avs_check_version(env, 10)) 692 | { 693 | if (avs_get_env_property(env, AVS_AEP_INTERFACE_BUGFIX) < 2) 694 | return set_error(clip, "JincResize: AviSynth+ version must be r3688 or later."); 695 | } 696 | } 697 | else 698 | return set_error(clip, "JincResize: AviSynth+ version must be r3688 or later."); 699 | 700 | if (!avs_is_planar(vi)) 701 | return set_error(clip, "JincResize: clip must be in planar format."); 702 | 703 | const int tap = avs_defined(avs_array_elt(args, Tap)) ? avs_as_int(avs_array_elt(args, Tap)) : 3; 704 | if (tap < 1 || tap > 16) 705 | return set_error(clip, "JincResize: tap must be between 1..16."); 706 | 707 | const int quant_x = avs_defined(avs_array_elt(args, Quant_x)) ? avs_as_int(avs_array_elt(args, Quant_x)) : 256; 708 | if (quant_x < 1 || quant_x > 256) 709 | return set_error(clip, "JincResize: quant_x must be between 1..256."); 710 | 711 | const int quant_y = avs_defined(avs_array_elt(args, Quant_y)) ? avs_as_int(avs_array_elt(args, Quant_y)) : 256; 712 | if (quant_y < 1 || quant_y > 256) 713 | return set_error(clip, "JincResize: quant_y must be between 1..256."); 714 | 715 | std::string cplace = avs_defined(avs_array_elt(args, Cplace)) ? avs_as_string(avs_array_elt(args, Cplace)) : ""; 716 | 717 | if (!cplace.empty()) 718 | { 719 | for (auto& c : cplace) 720 | c = tolower(c); 721 | 722 | if (cplace != "mpeg2" && cplace != "mpeg1" && cplace != "topleft") 723 | return set_error(clip, "JincResize: cplace must be MPEG2, MPEG1 or topleft."); 724 | } 725 | else 726 | { 727 | AVS_VideoFrame* frame0 = avs_get_frame(clip, 0); 728 | const AVS_Map* props = avs_get_frame_props_ro(env, frame0); 729 | 730 | if (avs_prop_get_type(env, props, "_ChromaLocation") == 'i') 731 | { 732 | switch (avs_prop_get_int(env, props, "_ChromaLocation", 0, nullptr)) 733 | { 734 | case 0: cplace = "mpeg2"; break; 735 | case 1: cplace = "mpeg1"; break; 736 | case 2: cplace = "topleft"; break; 737 | default: return set_error(clip, "JincResize: invalid _ChromaLocation"); break; 738 | } 739 | } 740 | else 741 | cplace = "mpeg2"; 742 | } 743 | 744 | if (cplace == "topleft" && !avs_is_420(vi)) 745 | return set_error(clip, "JincResize: topleft must be used only for 4:2:0 chroma subsampling."); 746 | 747 | const int opt = avs_defined(avs_array_elt(args, Opt)) ? avs_as_int(avs_array_elt(args, Opt)) : -1; 748 | const int cpu_flags = avs_get_cpu_flags(env); 749 | if (opt > 3) 750 | return set_error(clip, "JincResize: opt higher than 3 is not allowed."); 751 | if (opt == 3 && !(cpu_flags & AVS_CPUF_AVX512F)) 752 | return set_error(clip, "JincResize: opt=3 requires AVX-512F."); 753 | if (opt == 2 && !(cpu_flags & AVS_CPUF_AVX2)) 754 | return set_error(clip, "JincResize: opt=2 requires AVX2."); 755 | if (opt == 1 && !(cpu_flags & AVS_CPUF_SSE4_1)) 756 | return set_error(clip, "JincResize: opt=1 requires SSE4.1."); 757 | 758 | const int threads = avs_defined(avs_array_elt(args, Threads)) ? avs_as_int(avs_array_elt(args, Threads)) : 0; 759 | if (threads < 0 || threads > 1) 760 | return set_error(clip, "JincResize: threads must be either 0 or 1."); 761 | 762 | double crop_left = avs_defined(avs_array_elt(args, Src_left)) ? avs_as_float(avs_array_elt(args, Src_left)) : 0.0; 763 | double crop_width = avs_defined(avs_array_elt(args, Src_width)) ? avs_as_float(avs_array_elt(args, Src_width)) : static_cast(vi->width); 764 | if (crop_width <= 0.0) 765 | crop_width = vi->width - crop_left + crop_width; 766 | 767 | double crop_top = avs_defined(avs_array_elt(args, Src_top)) ? avs_as_float(avs_array_elt(args, Src_top)) : 0.0; 768 | double crop_height = avs_defined(avs_array_elt(args, Src_height)) ? avs_as_float(avs_array_elt(args, Src_height)) : static_cast(vi->height); 769 | if (crop_height <= 0.0) 770 | crop_height = vi->height - crop_top + crop_height; 771 | 772 | double blur = avs_defined(avs_array_elt(args, Blur)) ? avs_as_float(avs_array_elt(args, Blur)) : 0.0; 773 | if (!blur) 774 | blur = 1.0; 775 | 776 | const int target_width = avs_as_int(avs_array_elt(args, Target_width)); 777 | const int target_height = avs_as_int(avs_array_elt(args, Target_height)); 778 | 779 | const double initial_factor = avs_defined(avs_array_elt(args, Initial_factor)) ? avs_as_float(avs_array_elt(args, Initial_factor)) : 1.50; 780 | if (initial_factor < 1.0) 781 | return set_error(clip, "JincResize: initial_factor must be eqaul to or greater than 1.0."); 782 | 783 | const int src_width = vi->width; 784 | const int src_height = vi->height; 785 | 786 | const int initial_capacity = avs_defined(avs_array_elt(args, Initial_capacity)) ? avs_as_int(avs_array_elt(args, Initial_capacity)) 787 | : max(target_width * target_height, src_width * src_height); 788 | if (initial_capacity <= 0) 789 | return set_error(clip, "JincResize: initial_capacity must be greater than 0."); 790 | 791 | vi->width = target_width; 792 | vi->height = target_height; 793 | d->peak = static_cast((1 << avs_bits_per_component(vi)) - 1); 794 | const double radius = jinc_zeros[tap - 1]; 795 | constexpr int samples = 1024; // should be a multiple of 4 796 | d->init_lut = new Lut(); 797 | d->init_lut->InitLut(samples, radius, blur); 798 | d->planecount = avs_num_components(vi); 799 | bool subsampled = false; 800 | std::vector* out = &d->out; 801 | out->emplace_back(new EWAPixelCoeff()); 802 | generate_coeff_params params = 803 | { 804 | d->init_lut, 805 | d->out[0], 806 | quant_x, 807 | quant_y, 808 | samples, 809 | src_width, 810 | src_height, 811 | target_width, 812 | target_height, 813 | radius, 814 | crop_left, 815 | crop_top, 816 | crop_width, 817 | crop_height, 818 | initial_capacity, 819 | initial_factor 820 | }; 821 | 822 | try 823 | { 824 | if (d->planecount > 1) 825 | { 826 | if (avs_is_444(vi) || avs_is_rgb(vi)) 827 | generate_coeff_table_c(params); 828 | else 829 | { 830 | out->emplace_back(new EWAPixelCoeff()); 831 | 832 | subsampled = true; 833 | const int sub_w = avs_get_plane_width_subsampling(vi, AVS_PLANAR_U); 834 | const int sub_h = avs_get_plane_height_subsampling(vi, AVS_PLANAR_U); 835 | const double div_w = 1 << sub_w; 836 | const double div_h = 1 << sub_h; 837 | 838 | const double crop_left_uv = (cplace == "mpeg2" || cplace == "topleft") ? 839 | (0.5 * (1.0 - static_cast(src_width) / target_width) + crop_left) / div_w : crop_left / div_w; 840 | const double crop_top_uv = (cplace == "topleft") ? 841 | (0.5 * (1.0 - static_cast(src_height) / target_height) + crop_top) / div_h : crop_top / div_h; 842 | 843 | generate_coeff_table_c(params); 844 | params = { 845 | d->init_lut, 846 | (*out)[1], 847 | quant_x, 848 | quant_y, 849 | samples, 850 | src_width >> sub_w, 851 | src_height >> sub_h, 852 | target_width >> sub_w, 853 | target_height >> sub_h, 854 | radius, 855 | crop_left_uv, 856 | crop_top_uv, 857 | crop_width / div_w, 858 | crop_height / div_h, 859 | initial_capacity / (static_cast(div_w) * static_cast(div_h)), 860 | initial_factor 861 | }; 862 | generate_coeff_table_c(params); 863 | } 864 | } 865 | else 866 | generate_coeff_table_c(params); 867 | } 868 | catch (const std::exception& e) 869 | { 870 | std::vector* out = &d->out; 871 | for (int i = 0; i < static_cast(d->out.size()); ++i) 872 | { 873 | delete_coeff_table((*out)[i]); 874 | delete (*out)[i]; 875 | } 876 | 877 | delete[] d->init_lut->lut; 878 | delete d->init_lut; 879 | 880 | return set_error(clip, e.what()); 881 | } 882 | catch (const char* e) 883 | { 884 | std::vector* out = &d->out; 885 | for (int i = 0; i < static_cast(d->out.size()); ++i) 886 | { 887 | delete_coeff_table((*out)[i]); 888 | delete (*out)[i]; 889 | } 890 | 891 | delete[] d->init_lut->lut; 892 | delete d->init_lut; 893 | 894 | return set_error(clip, e); 895 | } 896 | 897 | const bool avx512 = (opt == 3); 898 | const bool avx2 = (!!(cpu_flags & AVS_CPUF_AVX2) && opt < 0) || opt == 2; 899 | const bool sse41 = (!!(cpu_flags & AVS_CPUF_SSE4_1) && opt < 0) || opt == 1; 900 | 901 | if (threads) 902 | { 903 | switch (avs_component_size(vi)) 904 | { 905 | case 1: 906 | if (avx512) 907 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512 : &JincResize::resize_plane_avx512; 908 | else if (avx2) 909 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2 : &JincResize::resize_plane_avx2; 910 | else if (sse41) 911 | d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41 : &JincResize::resize_plane_sse41; 912 | else 913 | d->process_frame = (subsampled) ? &JincResize::resize_plane_c : &JincResize::resize_plane_c; 914 | break; 915 | case 2: 916 | if (avx512) 917 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512 : &JincResize::resize_plane_avx512; 918 | else if (avx2) 919 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2 : &JincResize::resize_plane_avx2; 920 | else if (sse41) 921 | d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41 : &JincResize::resize_plane_sse41; 922 | else 923 | d->process_frame = (subsampled) ? &JincResize::resize_plane_c : &JincResize::resize_plane_c; 924 | break; 925 | default: 926 | if (avx512) 927 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512 : &JincResize::resize_plane_avx512; 928 | else if (avx2) 929 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2 : &JincResize::resize_plane_avx2; 930 | else if (sse41) 931 | d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41 : &JincResize::resize_plane_sse41; 932 | else 933 | d->process_frame = (subsampled) ? &JincResize::resize_plane_c : &JincResize::resize_plane_c; 934 | break; 935 | } 936 | } 937 | else 938 | { 939 | switch (avs_component_size(vi)) 940 | { 941 | case 1: 942 | if (avx512) 943 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512 : &JincResize::resize_plane_avx512; 944 | else if (avx2) 945 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2 : &JincResize::resize_plane_avx2; 946 | else if (sse41) 947 | d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41 : &JincResize::resize_plane_sse41; 948 | else 949 | d->process_frame = (subsampled) ? &JincResize::resize_plane_c : &JincResize::resize_plane_c; 950 | break; 951 | case 2: 952 | if (avx512) 953 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512 : &JincResize::resize_plane_avx512; 954 | else if (avx2) 955 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2 : &JincResize::resize_plane_avx2; 956 | else if (sse41) 957 | d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41 : &JincResize::resize_plane_sse41; 958 | else 959 | d->process_frame = (subsampled) ? &JincResize::resize_plane_c : &JincResize::resize_plane_c; 960 | break; 961 | default: 962 | if (avx512) 963 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512 : &JincResize::resize_plane_avx512; 964 | else if (avx2) 965 | d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2 : &JincResize::resize_plane_avx2; 966 | else if (sse41) 967 | d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41 : &JincResize::resize_plane_sse41; 968 | else 969 | d->process_frame = (subsampled) ? &JincResize::resize_plane_c : &JincResize::resize_plane_c; 970 | break; 971 | } 972 | } 973 | 974 | AVS_Value v = avs_new_value_clip(clip); 975 | 976 | fi->user_data = reinterpret_cast(d); 977 | fi->get_frame = JincResize_GetFrame; 978 | fi->set_cache_hints = set_cache_hints_JincResize; 979 | fi->free_filter = free_JincResize; 980 | 981 | avs_release_clip(clip); 982 | 983 | return v; 984 | } 985 | 986 | class Arguments 987 | { 988 | AVS_Value m_args[12]; 989 | const char* m_arg_names[12]; 990 | int m_idx; 991 | 992 | public: 993 | Arguments() : m_args{}, m_arg_names{}, m_idx{} {} 994 | 995 | void add(AVS_Value arg, const char* arg_name = nullptr) 996 | { 997 | m_args[m_idx] = arg; 998 | m_arg_names[m_idx] = arg_name; 999 | ++m_idx; 1000 | } 1001 | 1002 | AVS_Value args() { return avs_new_value_array(m_args, m_idx); }; 1003 | 1004 | const char** arg_names() { return m_arg_names; }; 1005 | }; 1006 | 1007 | static void resizer(const AVS_Value& args, Arguments* out_args, int src_left_idx = 3) 1008 | { 1009 | out_args->add(avs_array_elt(args, 0)); 1010 | out_args->add(avs_array_elt(args, 1)); 1011 | out_args->add(avs_array_elt(args, 2)); 1012 | 1013 | if (avs_defined(avs_array_elt(args, src_left_idx + 0))) 1014 | out_args->add(avs_array_elt(args, src_left_idx + 0), "src_left"); 1015 | if (avs_defined(avs_array_elt(args, src_left_idx + 1))) 1016 | out_args->add(avs_array_elt(args, src_left_idx + 1), "src_top"); 1017 | if (avs_defined(avs_array_elt(args, src_left_idx + 2))) 1018 | out_args->add(avs_array_elt(args, src_left_idx + 2), "src_width"); 1019 | if (avs_defined(avs_array_elt(args, src_left_idx + 3))) 1020 | out_args->add(avs_array_elt(args, src_left_idx + 3), "src_height"); 1021 | if (avs_defined(avs_array_elt(args, src_left_idx + 4))) 1022 | out_args->add(avs_array_elt(args, src_left_idx + 4), "quant_x"); 1023 | if (avs_defined(avs_array_elt(args, src_left_idx + 5))) 1024 | out_args->add(avs_array_elt(args, src_left_idx + 5), "quant_y"); 1025 | if (avs_defined(avs_array_elt(args, src_left_idx + 6))) 1026 | out_args->add(avs_array_elt(args, src_left_idx + 6), "cplace"); 1027 | if (avs_defined(avs_array_elt(args, src_left_idx + 7))) 1028 | out_args->add(avs_array_elt(args, src_left_idx + 7), "threads"); 1029 | } 1030 | 1031 | template 1032 | static AVS_Value AVSC_CC resizer_jincresize(AVS_ScriptEnvironment* env, AVS_Value args, void* param) 1033 | { 1034 | Arguments mapped_args; 1035 | 1036 | resizer(args, &mapped_args); 1037 | mapped_args.add(avs_new_value_int(taps), "tap"); 1038 | 1039 | return avs_invoke(env, "JincResize", mapped_args.args(), mapped_args.arg_names()); 1040 | } 1041 | 1042 | const char* AVSC_CC avisynth_c_plugin_init(AVS_ScriptEnvironment* env) 1043 | { 1044 | avs_add_function(env, "JincResize", 1045 | "c" 1046 | "i" 1047 | "i" 1048 | "[src_left]f" 1049 | "[src_top]f" 1050 | "[src_width]f" 1051 | "[src_height]f" 1052 | "[quant_x]i" 1053 | "[quant_y]i" 1054 | "[tap]i" 1055 | "[blur]f" 1056 | "[cplace]s" 1057 | "[threads]i" 1058 | "[opt]i" 1059 | "[initial_capacity]i" 1060 | "[initial_factor]f", Create_JincResize, 0); 1061 | avs_add_function(env, "Jinc36Resize", 1062 | "c" 1063 | "i" 1064 | "i" 1065 | "[src_left]f" 1066 | "[src_top]f" 1067 | "[src_width]f" 1068 | "[src_height]f" 1069 | "[quant_x]i" 1070 | "[quant_y]i" 1071 | "[cplace]s" 1072 | "[threads]i", resizer_jincresize<3>, 0); 1073 | avs_add_function(env, "Jinc64Resize", 1074 | "c" 1075 | "i" 1076 | "i" 1077 | "[src_left]f" 1078 | "[src_top]f" 1079 | "[src_width]f" 1080 | "[src_height]f" 1081 | "[quant_x]i" 1082 | "[quant_y]i" 1083 | "[cplace]s" 1084 | "[threads]i", resizer_jincresize<4>, 0); 1085 | avs_add_function(env, "Jinc144Resize", 1086 | "c" 1087 | "i" 1088 | "i" 1089 | "[src_left]f" 1090 | "[src_top]f" 1091 | "[src_width]f" 1092 | "[src_height]f" 1093 | "[quant_x]i" 1094 | "[quant_y]i" 1095 | "[cplace]s" 1096 | "[threads]i", resizer_jincresize<6>, 0); 1097 | avs_add_function(env, "Jinc256Resize", 1098 | "c" 1099 | "i" 1100 | "i" 1101 | "[src_left]f" 1102 | "[src_top]f" 1103 | "[src_width]f" 1104 | "[src_height]f" 1105 | "[quant_x]i" 1106 | "[quant_y]i" 1107 | "[cplace]s" 1108 | "[threads]i", resizer_jincresize<8>, 0); 1109 | 1110 | return "JincResize"; 1111 | } 1112 | -------------------------------------------------------------------------------- /src/JincResize.h: -------------------------------------------------------------------------------- 1 | #ifndef __JINCRESIZE_H__ 2 | #define __JINCRESIZE_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "avisynth_c.h" 9 | #include "avs/minmax.h" 10 | 11 | struct EWAPixelCoeffMeta 12 | { 13 | int start_x; 14 | int start_y; 15 | int coeff_meta; 16 | }; 17 | 18 | struct EWAPixelCoeff 19 | { 20 | float* factor; 21 | EWAPixelCoeffMeta* meta; 22 | int* factor_map; 23 | int filter_size; 24 | int coeff_stride; 25 | }; 26 | 27 | class Lut 28 | { 29 | int lut_size = 1024; 30 | 31 | public: 32 | Lut(); 33 | void InitLut(int lut_size, double radius, double blur); 34 | float GetFactor(int index); 35 | 36 | double* lut; 37 | }; 38 | 39 | struct JincResize 40 | { 41 | std::string cplace; 42 | Lut* init_lut; 43 | std::vector out; 44 | int planecount; 45 | float peak; 46 | 47 | template 48 | void resize_plane_c(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 49 | template 50 | void resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 51 | template 52 | void resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 53 | template 54 | void resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 55 | 56 | void(JincResize::* process_frame)(AVS_VideoFrame*, AVS_VideoFrame*, AVS_VideoInfo*); 57 | }; 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /src/JincResize.rc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 1 VERSIONINFO 4 | FILEVERSION 2,1,4,0 5 | PRODUCTVERSION 2,1,4,0 6 | FILEFLAGSMASK VS_FFI_FILEFLAGSMASK 7 | FILETYPE VFT_DLL 8 | BEGIN 9 | BLOCK "StringFileInfo" 10 | BEGIN 11 | BLOCK "040904E4" 12 | BEGIN 13 | VALUE "Comments", "Jinc (EWA Lanczos) resampling filter." 14 | VALUE "FileDescription", "JincResize for AviSynth+" 15 | VALUE "FileVersion", "2.1.4" 16 | VALUE "InternalName", "JincResize" 17 | VALUE "OriginalFilename", "JincResize.dll" 18 | VALUE "ProductName", "JincResize" 19 | VALUE "ProductVersion", "2.1.4" 20 | END 21 | END 22 | BLOCK "VarFileInfo" 23 | BEGIN 24 | VALUE "Translation", 0x409, 1252 25 | END 26 | END 27 | -------------------------------------------------------------------------------- /src/resize_plane_avx2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "JincResize.h" 4 | 5 | #if !defined(__AVX2__) 6 | #error "AVX2 option needed" 7 | #endif 8 | 9 | template 10 | void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi) 11 | { 12 | const int planes_y[4] = { AVS_PLANAR_Y, AVS_PLANAR_U, AVS_PLANAR_V, AVS_PLANAR_A }; 13 | const int planes_r[4] = { AVS_PLANAR_G, AVS_PLANAR_B, AVS_PLANAR_R, AVS_PLANAR_A }; 14 | const int* current_planes = (avs_is_rgb(vi)) ? planes_r : planes_y; 15 | for (int i = 0; i < planecount; ++i) 16 | { 17 | const int plane = current_planes[i]; 18 | 19 | const int src_stride = avs_get_pitch_p(src, plane) / sizeof(T); 20 | const int dst_stride = avs_get_pitch_p(dst, plane) / sizeof(T); 21 | const int dst_width = avs_get_row_size_p(dst, plane) / sizeof(T); 22 | const int dst_height = avs_get_height_p(dst, plane); 23 | const T* srcp = reinterpret_cast(avs_get_read_ptr_p(src, plane)); 24 | const __m256 min_val = (i && !avs_is_rgb(vi)) ? _mm256_set1_ps(-0.5f) : _mm256_setzero_ps(); 25 | 26 | EWAPixelCoeff* out = [&]() 27 | { 28 | if constexpr (subsampled) 29 | return (i) ? (i == 3) ? JincResize::out[0] : JincResize::out[1] : JincResize::out[0]; 30 | else 31 | return JincResize::out[0]; 32 | }(); 33 | 34 | auto loop = [&](int y) 35 | { 36 | T* __restrict dstp = reinterpret_cast(avs_get_write_ptr_p(dst, plane)) + static_cast(y) * dst_stride; 37 | 38 | for (int x = 0; x < dst_width; ++x) 39 | { 40 | EWAPixelCoeffMeta* meta = out->meta + static_cast(y) * dst_width + x; 41 | const T* src_ptr = srcp + (meta->start_y * static_cast(src_stride)) + meta->start_x; 42 | const float* coeff_ptr = out->factor + meta->coeff_meta; 43 | __m256 result = _mm256_setzero_ps(); 44 | 45 | if constexpr (std::is_same_v) 46 | { 47 | for (int ly = 0; ly < out->filter_size; ++ly) 48 | { 49 | for (int lx = 0; lx < out->filter_size; lx += 8) 50 | { 51 | const __m256 src_ps = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_loadu_si128(const_cast<__m128i*>(reinterpret_cast(src_ptr + lx))))); 52 | const __m256 coeff = _mm256_load_ps(coeff_ptr + lx); 53 | result = _mm256_fmadd_ps(src_ps, coeff, result); 54 | } 55 | 56 | coeff_ptr += out->coeff_stride; 57 | src_ptr += src_stride; 58 | } 59 | 60 | __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(result), _mm256_extractf128_ps(result, 1)); 61 | hsum = _mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum)); 62 | dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128()), _mm_setzero_si128())); 63 | } 64 | else if constexpr (std::is_same_v) 65 | { 66 | for (int ly = 0; ly < out->filter_size; ++ly) 67 | { 68 | for (int lx = 0; lx < out->filter_size; lx += 8) 69 | { 70 | const __m256 src_ps = _mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(_mm_loadu_si128(const_cast<__m128i*>(reinterpret_cast(src_ptr + lx))))); 71 | const __m256 coeff = _mm256_load_ps(coeff_ptr + lx); 72 | result = _mm256_fmadd_ps(src_ps, coeff, result); 73 | } 74 | 75 | coeff_ptr += out->coeff_stride; 76 | src_ptr += src_stride; 77 | } 78 | 79 | __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(result), _mm256_extractf128_ps(result, 1)); 80 | hsum = _mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum)); 81 | dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128())); 82 | } 83 | else 84 | { 85 | for (int ly = 0; ly < out->filter_size; ++ly) 86 | { 87 | for (int lx = 0; lx < out->filter_size; lx += 8) 88 | { 89 | const __m256 src_ps = _mm256_max_ps(_mm256_loadu_ps(src_ptr + lx), min_val); 90 | const __m256 coeff = _mm256_load_ps(coeff_ptr + lx); 91 | result = _mm256_fmadd_ps(src_ps, coeff, result); 92 | } 93 | 94 | coeff_ptr += out->coeff_stride; 95 | src_ptr += src_stride; 96 | } 97 | 98 | __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(result), _mm256_extractf128_ps(result, 1)); 99 | dstp[x] = _mm_cvtss_f32(_mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum))); 100 | } 101 | } 102 | }; 103 | 104 | if constexpr (thr) 105 | { 106 | for (intptr_t i = 0; i < dst_height; ++i) 107 | loop(i); 108 | } 109 | else 110 | { 111 | std::vector l(dst_height); 112 | std::iota(std::begin(l), std::end(l), 0); 113 | std::for_each(std::execution::par, std::begin(l), std::end(l), loop); 114 | } 115 | } 116 | } 117 | 118 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 119 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 120 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 121 | 122 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 123 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 124 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 125 | 126 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 127 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 128 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 129 | 130 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 131 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 132 | template void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 133 | -------------------------------------------------------------------------------- /src/resize_plane_avx512.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "JincResize.h" 4 | 5 | #if !defined(__AVX512F__ ) && !defined(__INTEL_COMPILER) 6 | #error "AVX512 option needed" 7 | #endif 8 | 9 | template 10 | void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi) 11 | { 12 | const int planes_y[4] = { AVS_PLANAR_Y, AVS_PLANAR_U, AVS_PLANAR_V, AVS_PLANAR_A }; 13 | const int planes_r[4] = { AVS_PLANAR_G, AVS_PLANAR_B, AVS_PLANAR_R, AVS_PLANAR_A }; 14 | const int* current_planes = (avs_is_rgb(vi)) ? planes_r : planes_y; 15 | for (int i = 0; i < planecount; ++i) 16 | { 17 | const int plane = current_planes[i]; 18 | 19 | const int src_stride = avs_get_pitch_p(src, plane) / sizeof(T); 20 | const int dst_stride = avs_get_pitch_p(dst, plane) / sizeof(T); 21 | const int dst_width = avs_get_row_size_p(dst, plane) / sizeof(T); 22 | const int dst_height = avs_get_height_p(dst, plane); 23 | const T* srcp = reinterpret_cast(avs_get_read_ptr_p(src, plane)); 24 | const __m512 min_val = (i && !avs_is_rgb(vi)) ? _mm512_set1_ps(-0.5f) : _mm512_setzero_ps(); 25 | 26 | EWAPixelCoeff* out = [&]() 27 | { 28 | if constexpr (subsampled) 29 | return (i) ? (i == 3) ? JincResize::out[0] : JincResize::out[1] : JincResize::out[0]; 30 | else 31 | return JincResize::out[0]; 32 | }(); 33 | 34 | auto loop = [&](int y) 35 | { 36 | T* __restrict dstp = reinterpret_cast(avs_get_write_ptr_p(dst, plane)) + static_cast(y) * dst_stride; 37 | 38 | for (int x = 0; x < dst_width; ++x) 39 | { 40 | EWAPixelCoeffMeta* meta = out->meta + static_cast(y) * dst_width + x; 41 | const T* src_ptr = srcp + (meta->start_y * static_cast(src_stride)) + meta->start_x; 42 | const float* coeff_ptr = out->factor + meta->coeff_meta; 43 | __m512 result = _mm512_setzero_ps(); 44 | 45 | if constexpr (std::is_same_v) 46 | { 47 | for (int ly = 0; ly < out->filter_size; ++ly) 48 | { 49 | for (int lx = 0; lx < out->filter_size; lx += 16) 50 | { 51 | const __m512 src_ps = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm_loadu_si128(reinterpret_cast(src_ptr + lx)))); 52 | const __m512 coeff = _mm512_load_ps(coeff_ptr + lx); 53 | result = _mm512_fmadd_ps(src_ps, coeff, result); 54 | } 55 | 56 | coeff_ptr += out->coeff_stride; 57 | src_ptr += src_stride; 58 | } 59 | 60 | const __m256 lo_hi_256 = _mm256_add_ps(_mm512_castps512_ps256(result), _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(result), 1))); 61 | __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(lo_hi_256), _mm256_extractf128_ps(lo_hi_256, 1)); 62 | hsum = _mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum)); 63 | dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128()), _mm_setzero_si128())); 64 | } 65 | else if constexpr (std::is_same_v) 66 | { 67 | for (int ly = 0; ly < out->filter_size; ++ly) 68 | { 69 | for (int lx = 0; lx < out->filter_size; lx += 16) 70 | { 71 | const __m512 src_ps = _mm512_cvtepi32_ps(_mm512_cvtepu16_epi32(_mm256_loadu_si256(reinterpret_cast(src_ptr + lx)))); 72 | const __m512 coeff = _mm512_load_ps(coeff_ptr + lx); 73 | result = _mm512_fmadd_ps(src_ps, coeff, result); 74 | } 75 | 76 | coeff_ptr += out->coeff_stride; 77 | src_ptr += src_stride; 78 | } 79 | 80 | const __m256 lo_hi_256 = _mm256_add_ps(_mm512_castps512_ps256(result), _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(result), 1))); 81 | __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(lo_hi_256), _mm256_extractf128_ps(lo_hi_256, 1)); 82 | hsum = _mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum)); 83 | dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128())); 84 | } 85 | else 86 | { 87 | for (int ly = 0; ly < out->filter_size; ++ly) 88 | { 89 | for (int lx = 0; lx < out->filter_size; lx += 16) 90 | { 91 | const __m512 src_ps = _mm512_max_ps(_mm512_loadu_ps(src_ptr + lx), min_val); 92 | const __m512 coeff = _mm512_load_ps(coeff_ptr + lx); 93 | result = _mm512_fmadd_ps(src_ps, coeff, result); 94 | } 95 | 96 | coeff_ptr += out->coeff_stride; 97 | src_ptr += src_stride; 98 | } 99 | 100 | const __m256 lo_hi_256 = _mm256_add_ps(_mm512_castps512_ps256(result), _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(result), 1))); 101 | __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(lo_hi_256), _mm256_extractf128_ps(lo_hi_256, 1)); 102 | dstp[x] = _mm_cvtss_f32(_mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum))); 103 | } 104 | } 105 | }; 106 | 107 | if constexpr (thr) 108 | { 109 | for (intptr_t i = 0; i < dst_height; ++i) 110 | loop(i); 111 | } 112 | else 113 | { 114 | std::vector l(dst_height); 115 | std::iota(std::begin(l), std::end(l), 0); 116 | std::for_each(std::execution::par, std::begin(l), std::end(l), loop); 117 | } 118 | } 119 | } 120 | 121 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 122 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 123 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 124 | 125 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 126 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 127 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 128 | 129 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 130 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 131 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 132 | 133 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 134 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 135 | template void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 136 | -------------------------------------------------------------------------------- /src/resize_plane_sse41.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "JincResize.h" 4 | 5 | template 6 | void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi) 7 | { 8 | const int planes_y[4] = { AVS_PLANAR_Y, AVS_PLANAR_U, AVS_PLANAR_V, AVS_PLANAR_A }; 9 | const int planes_r[4] = { AVS_PLANAR_G, AVS_PLANAR_B, AVS_PLANAR_R, AVS_PLANAR_A }; 10 | const int* current_planes = (avs_is_rgb(vi)) ? planes_r : planes_y; 11 | for (int i = 0; i < planecount; ++i) 12 | { 13 | const int plane = current_planes[i]; 14 | 15 | const int src_stride = avs_get_pitch_p(src, plane) / sizeof(T); 16 | const int dst_stride = avs_get_pitch_p(dst, plane) / sizeof(T); 17 | const int dst_width = avs_get_row_size_p(dst, plane) / sizeof(T); 18 | const int dst_height = avs_get_height_p(dst, plane); 19 | const T* srcp = reinterpret_cast(avs_get_read_ptr_p(src, plane)); 20 | const __m128 min_val = (i && !avs_is_rgb(vi)) ? _mm_set_ps1(-0.5f) : _mm_setzero_ps(); 21 | 22 | EWAPixelCoeff* out = [&]() 23 | { 24 | if constexpr (subsampled) 25 | return (i) ? (i == 3) ? JincResize::out[0] : JincResize::out[1] : JincResize::out[0]; 26 | else 27 | return JincResize::out[0]; 28 | }(); 29 | 30 | auto loop = [&](int y) 31 | { 32 | T* __restrict dstp = reinterpret_cast(avs_get_write_ptr_p(dst, plane)) + static_cast(y) * dst_stride; 33 | 34 | for (int x = 0; x < dst_width; ++x) 35 | { 36 | EWAPixelCoeffMeta* meta = out->meta + static_cast(y) * dst_width + x; 37 | const T* src_ptr = srcp + (meta->start_y * static_cast(src_stride)) + meta->start_x; 38 | const float* coeff_ptr = out->factor + meta->coeff_meta; 39 | __m128 result = _mm_setzero_ps(); 40 | 41 | if constexpr (std::is_same_v) 42 | { 43 | for (int ly = 0; ly < out->filter_size; ++ly) 44 | { 45 | for (int lx = 0; lx < out->filter_size; lx += 4) 46 | { 47 | const __m128 src_ps = _mm_cvtepi32_ps(_mm_cvtepu8_epi32(_mm_cvtsi32_si128(*(reinterpret_cast(src_ptr + lx))))); 48 | const __m128 coeff = _mm_load_ps(coeff_ptr + lx); 49 | result = _mm_add_ps(result, _mm_mul_ps(src_ps, coeff)); 50 | } 51 | 52 | coeff_ptr += out->coeff_stride; 53 | src_ptr += src_stride; 54 | } 55 | 56 | const __m128 hsum = _mm_hadd_ps(_mm_hadd_ps(result, result), _mm_hadd_ps(result, result)); 57 | dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128()), _mm_setzero_si128())); 58 | } 59 | else if constexpr (std::is_same_v) 60 | { 61 | for (int ly = 0; ly < out->filter_size; ++ly) 62 | { 63 | for (int lx = 0; lx < out->filter_size; lx += 4) 64 | { 65 | const __m128 src_ps = _mm_cvtepi32_ps(_mm_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast(src_ptr + lx)))); 66 | const __m128 coeff = _mm_load_ps(coeff_ptr + lx); 67 | result = _mm_add_ps(result, _mm_mul_ps(src_ps, coeff)); 68 | } 69 | 70 | coeff_ptr += out->coeff_stride; 71 | src_ptr += src_stride; 72 | } 73 | 74 | const __m128 hsum = _mm_hadd_ps(_mm_hadd_ps(result, result), _mm_hadd_ps(result, result)); 75 | dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128())); 76 | } 77 | else 78 | { 79 | for (int ly = 0; ly < out->filter_size; ++ly) 80 | { 81 | for (int lx = 0; lx < out->filter_size; lx += 4) 82 | { 83 | const __m128 src_ps = _mm_max_ps(_mm_loadu_ps(src_ptr + lx), min_val); 84 | const __m128 coeff = _mm_load_ps(coeff_ptr + lx); 85 | result = _mm_add_ps(result, _mm_mul_ps(src_ps, coeff)); 86 | } 87 | 88 | coeff_ptr += out->coeff_stride; 89 | src_ptr += src_stride; 90 | } 91 | 92 | dstp[x] = _mm_cvtss_f32(_mm_hadd_ps(_mm_hadd_ps(result, result), _mm_hadd_ps(result, result))); 93 | } 94 | } 95 | }; 96 | 97 | if constexpr (thr) 98 | { 99 | for (intptr_t i = 0; i < dst_height; ++i) 100 | loop(i); 101 | } 102 | else 103 | { 104 | std::vector l(dst_height); 105 | std::iota(std::begin(l), std::end(l), 0); 106 | std::for_each(std::execution::par, std::begin(l), std::end(l), loop); 107 | } 108 | } 109 | } 110 | 111 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 112 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 113 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 114 | 115 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 116 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 117 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 118 | 119 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 120 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 121 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 122 | 123 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 124 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 125 | template void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi); 126 | --------------------------------------------------------------------------------