├── .gitattributes
├── .gitignore
├── CHANGELOG.md
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cmake_uninstall.cmake.in
└── src
    ├── JincResize.cpp
    ├── JincResize.h
    ├── JincResize.rc
    ├── resize_plane_avx2.cpp
    ├── resize_plane_avx512.cpp
    └── resize_plane_sse41.cpp


/.gitattributes:
--------------------------------------------------------------------------------
 1 | #sources
 2 | *.c text
 3 | *.cc text
 4 | *.cxx text
 5 | *.cpp text
 6 | *.c++ text
 7 | *.hpp text
 8 | *.h text
 9 | *.h++ text
10 | *.hh text
11 | 
12 | # Compiled Object files
13 | *.slo binary
14 | *.lo binary
15 | *.o binary
16 | *.obj binary
17 | 
18 | # Precompiled Headers
19 | *.gch binary
20 | *.pch binary
21 | 
22 | # Compiled Dynamic libraries
23 | *.so binary
24 | *.dylib binary
25 | *.dll binary
26 | 
27 | # Compiled Static libraries
28 | *.lai binary
29 | *.la binary
30 | *.a binary
31 | *.lib binary
32 | 
33 | # Executables
34 | *.exe binary
35 | *.out binary
36 | *.app binary
37 | ###############################################################################
38 | # Set default behavior to automatically normalize line endings.
39 | ###############################################################################
40 | * text=auto
41 | 
42 | ###############################################################################
43 | # Set the merge driver for project and solution files
44 | #
45 | # Merging from the command prompt will add diff markers to the files if there
46 | # are conflicts (Merging from VS is not affected by the settings below, in VS
47 | # the diff markers are never inserted). Diff markers may cause the following 
48 | # file extensions to fail to load in VS. An alternative would be to treat
49 | # these files as binary and thus will always conflict and require user
50 | # intervention with every merge. To do so, just comment the entries below and
51 | # uncomment the group further below
52 | ###############################################################################
53 | 
54 | #*.sln        text eol=crlf
55 | #*.csproj     text eol=crlf
56 | #*.vbproj     text eol=crlf
57 | #*.vcxproj    text eol=crlf
58 | #*.vcproj     text eol=crlf
59 | #*.dbproj     text eol=crlf
60 | #*.fsproj     text eol=crlf
61 | #*.lsproj     text eol=crlf
62 | #*.wixproj    text eol=crlf
63 | #*.modelproj  text eol=crlf
64 | #*.sqlproj    text eol=crlf
65 | #*.wmaproj    text eol=crlf
66 | 
67 | #*.xproj      text eol=crlf
68 | #*.props      text eol=crlf
69 | #*.filters    text eol=crlf
70 | #*.vcxitems   text eol=crlf
71 | 
72 | 
73 | *.sln       merge=binary
74 | *.csproj    merge=binary
75 | *.vbproj    merge=binary
76 | *.vcxproj   merge=binary
77 | *.vcproj    merge=binary
78 | *.dbproj    merge=binary
79 | *.fsproj    merge=binary
80 | *.lsproj    merge=binary
81 | *.wixproj   merge=binary
82 | *.modelproj merge=binary
83 | *.sqlproj   merge=binary
84 | *.wwaproj   merge=binary
85 | 
86 | *.xproj     merge=binary
87 | *.props     merge=binary
88 | *.filters   merge=binary
89 | *.vcxitems  merge=binary
90 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.rsuser
  8 | *.suo
  9 | *.user
 10 | *.userosscache
 11 | *.sln.docstates
 12 | 
 13 | # User-specific files (MonoDevelop/Xamarin Studio)
 14 | *.userprefs
 15 | 
 16 | # Build results
 17 | [Dd]ebug/
 18 | [Dd]ebugPublic/
 19 | [Rr]elease/
 20 | [Rr]eleases/
 21 | x64/
 22 | x86/
 23 | [Aa][Rr][Mm]/
 24 | [Aa][Rr][Mm]64/
 25 | bld/
 26 | [Bb]in/
 27 | [Oo]bj/
 28 | [Ll]og/
 29 | 
 30 | # Visual Studio 2015/2017 cache/options directory
 31 | .vs/
 32 | # Uncomment if you have tasks that create the project's static files in wwwroot
 33 | #wwwroot/
 34 | 
 35 | # Visual Studio 2017 auto generated files
 36 | Generated\ Files/
 37 | 
 38 | # MSTest test Results
 39 | [Tt]est[Rr]esult*/
 40 | [Bb]uild[Ll]og.*
 41 | 
 42 | # NUNIT
 43 | *.VisualState.xml
 44 | TestResult.xml
 45 | 
 46 | # Build Results of an ATL Project
 47 | [Dd]ebugPS/
 48 | [Rr]eleasePS/
 49 | dlldata.c
 50 | 
 51 | # Benchmark Results
 52 | BenchmarkDotNet.Artifacts/
 53 | 
 54 | # .NET Core
 55 | project.lock.json
 56 | project.fragment.lock.json
 57 | artifacts/
 58 | 
 59 | # StyleCop
 60 | StyleCopReport.xml
 61 | 
 62 | # Files built by Visual Studio
 63 | *_i.c
 64 | *_p.c
 65 | *_h.h
 66 | *.ilk
 67 | *.meta
 68 | *.obj
 69 | *.iobj
 70 | *.pch
 71 | *.pdb
 72 | *.ipdb
 73 | *.pgc
 74 | *.pgd
 75 | *.rsp
 76 | *.sbr
 77 | *.tlb
 78 | *.tli
 79 | *.tlh
 80 | *.tmp
 81 | *.tmp_proj
 82 | *_wpftmp.csproj
 83 | *.log
 84 | *.vspscc
 85 | *.vssscc
 86 | .builds
 87 | *.pidb
 88 | *.svclog
 89 | *.scc
 90 | 
 91 | # Chutzpah Test files
 92 | _Chutzpah*
 93 | 
 94 | # Visual C++ cache files
 95 | ipch/
 96 | *.aps
 97 | *.ncb
 98 | *.opendb
 99 | *.opensdf
100 | *.sdf
101 | *.cachefile
102 | *.VC.db
103 | *.VC.VC.opendb
104 | 
105 | # Visual Studio profiler
106 | *.psess
107 | *.vsp
108 | *.vspx
109 | *.sap
110 | 
111 | # Visual Studio Trace Files
112 | *.e2e
113 | 
114 | # TFS 2012 Local Workspace
115 | $tf/
116 | 
117 | # Guidance Automation Toolkit
118 | *.gpState
119 | 
120 | # ReSharper is a .NET coding add-in
121 | _ReSharper*/
122 | *.[Rr]e[Ss]harper
123 | *.DotSettings.user
124 | 
125 | # JustCode is a .NET coding add-in
126 | .JustCode
127 | 
128 | # TeamCity is a build add-in
129 | _TeamCity*
130 | 
131 | # DotCover is a Code Coverage Tool
132 | *.dotCover
133 | 
134 | # AxoCover is a Code Coverage Tool
135 | .axoCover/*
136 | !.axoCover/settings.json
137 | 
138 | # Visual Studio code coverage results
139 | *.coverage
140 | *.coveragexml
141 | 
142 | # NCrunch
143 | _NCrunch_*
144 | .*crunch*.local.xml
145 | nCrunchTemp_*
146 | 
147 | # MightyMoose
148 | *.mm.*
149 | AutoTest.Net/
150 | 
151 | # Web workbench (sass)
152 | .sass-cache/
153 | 
154 | # Installshield output folder
155 | [Ee]xpress/
156 | 
157 | # DocProject is a documentation generator add-in
158 | DocProject/buildhelp/
159 | DocProject/Help/*.HxT
160 | DocProject/Help/*.HxC
161 | DocProject/Help/*.hhc
162 | DocProject/Help/*.hhk
163 | DocProject/Help/*.hhp
164 | DocProject/Help/Html2
165 | DocProject/Help/html
166 | 
167 | # Click-Once directory
168 | publish/
169 | 
170 | # Publish Web Output
171 | *.[Pp]ublish.xml
172 | *.azurePubxml
173 | # Note: Comment the next line if you want to checkin your web deploy settings,
174 | # but database connection strings (with potential passwords) will be unencrypted
175 | *.pubxml
176 | *.publishproj
177 | 
178 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
179 | # checkin your Azure Web App publish settings, but sensitive information contained
180 | # in these scripts will be unencrypted
181 | PublishScripts/
182 | 
183 | # NuGet Packages
184 | *.nupkg
185 | # The packages folder can be ignored because of Package Restore
186 | **/[Pp]ackages/*
187 | # except build/, which is used as an MSBuild target.
188 | !**/[Pp]ackages/build/
189 | # Uncomment if necessary however generally it will be regenerated when needed
190 | #!**/[Pp]ackages/repositories.config
191 | # NuGet v3's project.json files produces more ignorable files
192 | *.nuget.props
193 | *.nuget.targets
194 | 
195 | # Microsoft Azure Build Output
196 | csx/
197 | *.build.csdef
198 | 
199 | # Microsoft Azure Emulator
200 | ecf/
201 | rcf/
202 | 
203 | # Windows Store app package directories and files
204 | AppPackages/
205 | BundleArtifacts/
206 | Package.StoreAssociation.xml
207 | _pkginfo.txt
208 | *.appx
209 | 
210 | # Visual Studio cache files
211 | # files ending in .cache can be ignored
212 | *.[Cc]ache
213 | # but keep track of directories ending in .cache
214 | !*.[Cc]ache/
215 | 
216 | # Others
217 | ClientBin/
218 | ~$*
219 | *~
220 | *.dbmdl
221 | *.dbproj.schemaview
222 | *.jfm
223 | *.pfx
224 | *.publishsettings
225 | orleans.codegen.cs
226 | 
227 | # Including strong name files can present a security risk
228 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
229 | #*.snk
230 | 
231 | # Since there are multiple workflows, uncomment next line to ignore bower_components
232 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
233 | #bower_components/
234 | # ASP.NET Core default setup: bower directory is configured as wwwroot/lib/ and bower restore is true
235 | **/wwwroot/lib/
236 | 
237 | # RIA/Silverlight projects
238 | Generated_Code/
239 | 
240 | # Backup & report files from converting an old project file
241 | # to a newer Visual Studio version. Backup files are not needed,
242 | # because we have git ;-)
243 | _UpgradeReport_Files/
244 | Backup*/
245 | UpgradeLog*.XML
246 | UpgradeLog*.htm
247 | ServiceFabricBackup/
248 | *.rptproj.bak
249 | 
250 | # SQL Server files
251 | *.mdf
252 | *.ldf
253 | *.ndf
254 | 
255 | # Business Intelligence projects
256 | *.rdl.data
257 | *.bim.layout
258 | *.bim_*.settings
259 | *.rptproj.rsuser
260 | 
261 | # Microsoft Fakes
262 | FakesAssemblies/
263 | 
264 | # GhostDoc plugin setting file
265 | *.GhostDoc.xml
266 | 
267 | # Node.js Tools for Visual Studio
268 | .ntvs_analysis.dat
269 | node_modules/
270 | 
271 | # Visual Studio 6 build log
272 | *.plg
273 | 
274 | # Visual Studio 6 workspace options file
275 | *.opt
276 | 
277 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
278 | *.vbw
279 | 
280 | # Visual Studio LightSwitch build output
281 | **/*.HTMLClient/GeneratedArtifacts
282 | **/*.DesktopClient/GeneratedArtifacts
283 | **/*.DesktopClient/ModelManifest.xml
284 | **/*.Server/GeneratedArtifacts
285 | **/*.Server/ModelManifest.xml
286 | _Pvt_Extensions
287 | 
288 | # Paket dependency manager
289 | .paket/paket.exe
290 | paket-files/
291 | 
292 | # FAKE - F# Make
293 | .fake/
294 | 
295 | # JetBrains Rider
296 | .idea/
297 | *.sln.iml
298 | 
299 | # CodeRush personal settings
300 | .cr/personal
301 | 
302 | # Python Tools for Visual Studio (PTVS)
303 | __pycache__/
304 | *.pyc
305 | 
306 | # Cake - Uncomment if you are using it
307 | # tools/**
308 | # !tools/packages.config
309 | 
310 | # Tabs Studio
311 | *.tss
312 | 
313 | # Telerik's JustMock configuration file
314 | *.jmconfig
315 | 
316 | # BizTalk build output
317 | *.btp.cs
318 | *.btm.cs
319 | *.odx.cs
320 | *.xsd.cs
321 | 
322 | # OpenCover UI analysis results
323 | OpenCover/
324 | 
325 | # Azure Stream Analytics local run output
326 | ASALocalRun/
327 | 
328 | # MSBuild Binary and Structured Log
329 | *.binlog
330 | 
331 | # NVidia Nsight GPU debugger configuration file
332 | *.nvuser
333 | 
334 | # MFractors (Xamarin productivity tool) working folder
335 | .mfractor/
336 | 
337 | # Local History for Visual Studio
338 | .localhistory/
339 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ##### 2.1.4:
 2 |     Added parameters `initial_capacity`, `initial_factor`.
 3 |     Updated minimum AviSynth+ version to r3688.
 4 | 
 5 | ##### 2.1.3:
 6 |     Fixed bug that can cause vertical lines.
 7 |     Reduced used memory (thanks DTL2020 for the ideas).
 8 |     Fixed JincXXXResize calling.
 9 |     Used AviStynh+ API changed from C++ to C.
10 | 
11 | ##### 2.1.2:
12 |     Set frame property `_ChromaLocation` only for 420, 422, 411 clips.
13 | 
14 | ##### 2.1.1:
15 |     Changed back the behavior of parameter `blur`.
16 |     Set frame property `_ChromaLocation`.
17 | 
18 | ##### 2.1.0:
19 |     Added parameter cplace.
20 |     Changed omp parallel execution to C++17 parallel execution (better speed).
21 | 
22 | ##### 2.0.2:
23 |     Fixed output for SIMD and threads > 1
24 | 
25 | ##### 2.0.1:
26 |     Used MSVC instead Intel C++ for faster binaries.
27 | 
28 | ##### 2.0.0:
29 |     Added OpenMP support to main processing loops. (DTL2020)
30 |     Added parameter 'threads'.
31 | 
32 | ##### 1.2.0:
33 |     AVX-512 code is not used as default when AVX-512 CPU instructions are available.
34 |     Fixed AVX-512 output.
35 |     Prevent 'nan' values for the float input (SIMD).
36 |     Fixed JincXXXResize parameters 'quant_x' and 'quant_y' when called by name.
37 | 
38 | ##### 1.1.0:
39 |     Added AVX-512 code.
40 | 
41 | ##### 1.0.1:
42 |     Fixed 8..16-bit processing when C++ routine is used.
43 |     Changed blur parameter.
44 |     Registered as MT_MULTI_INSTANCE.
45 | 
46 | ##### 1.0.0:
47 |     Port of the VapourSynth plugin JincResize r7.1.
48 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.16)
 2 | 
 3 | if (NOT CMAKE_GENERATOR MATCHES "Visual Studio")
 4 |     if (NOT CMAKE_BUILD_TYPE)
 5 |         set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
 6 |     endif()
 7 | endif()
 8 | 
 9 | project(JincResize LANGUAGES CXX)
10 | 
11 | add_library(JincResize SHARED)
12 | 
13 | target_sources(JincResize PRIVATE
14 |     ${CMAKE_CURRENT_SOURCE_DIR}/src/JincResize.h
15 |     ${CMAKE_CURRENT_SOURCE_DIR}/src/JincResize.cpp
16 |     ${CMAKE_CURRENT_SOURCE_DIR}/src/resize_plane_sse41.cpp
17 |     ${CMAKE_CURRENT_SOURCE_DIR}/src/resize_plane_avx2.cpp
18 |     ${CMAKE_CURRENT_SOURCE_DIR}/src/resize_plane_avx512.cpp
19 | )
20 | 
21 | if (WIN32)
22 |     target_sources(JincResize PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/JincResize.rc)
23 | endif()
24 | 
25 | if (UNIX)
26 |     target_include_directories(JincResize PRIVATE
27 |         /usr/local/include/avisynth
28 |         /usr/local/include
29 |     )
30 | endif()
31 | 
32 | if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
33 |     target_link_libraries(JincResize PRIVATE libmmds)
34 | endif()
35 | 
36 | if (NOT CMAKE_GENERATOR MATCHES "Visual Studio")
37 |     string(TOLOWER ${CMAKE_BUILD_TYPE} build_type)
38 |     if (build_type STREQUAL Debug)
39 |         target_compile_definitions(JincResize PRIVATE DEBUG_BUILD)
40 |     else (build_type STREQUAL Release)
41 |         target_compile_definitions(JincResize PRIVATE RELEASE_BUILD)
42 |     endif()
43 | 
44 |     if (NOT MSVC)
45 |         target_compile_options(JincResize PRIVATE $<$<CONFIG:Release>:-s>)
46 |     endif()
47 | 
48 |     message(STATUS "Build type - ${CMAKE_BUILD_TYPE}")
49 | endif()
50 | 
51 | if (MSVC)
52 |     set_source_files_properties(src/resize_plane_avx2.cpp PROPERTIES COMPILE_OPTIONS "/arch:AVX2")
53 |     set_source_files_properties(src/resize_plane_avx512.cpp PROPERTIES COMPILE_OPTIONS "/arch:AVX512")
54 |     if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
55 |         set_source_files_properties(src/resize_plane_sse41.cpp PROPERTIES COMPILE_OPTIONS "-mfpmath=sse;-msse4.1")
56 |     endif()
57 | else()
58 |     set_source_files_properties(src/resize_plane_sse41.cpp PROPERTIES COMPILE_OPTIONS "-mfpmath=sse;-msse4.1")
59 |     set_source_files_properties(src/resize_plane_avx2.cpp PROPERTIES COMPILE_OPTIONS "-mavx2;-mfma")
60 |     set_source_files_properties(src/resize_plane_avx512.cpp PROPERTIES COMPILE_OPTIONS "-mavx512f;-mavx512bw;-mavx512dq;-mavx512vl;-mfma")
61 | endif()
62 | 
63 | target_link_libraries(JincResize PRIVATE avisynth)
64 | 
65 | target_compile_features(JincResize PRIVATE cxx_std_17)
66 | 
67 | if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
68 |     target_compile_options(JincResize PRIVATE "/fp:precise")
69 | endif()
70 | 
71 | if (UNIX)
72 |     find_package (Git)
73 | 
74 |     if (GIT_FOUND)
75 |         execute_process (COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0
76 |             OUTPUT_VARIABLE ver
77 |             OUTPUT_STRIP_TRAILING_WHITESPACE
78 |         )
79 |         set_target_properties(JincResize PROPERTIES OUTPUT_NAME "jincresize.${ver}")
80 |     else ()
81 |         message (STATUS "GIT not found")
82 |     endif ()
83 | 
84 |     include(GNUInstallDirs)
85 | 
86 |     INSTALL(TARGETS JincResize LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/avisynth")
87 | 
88 |     # uninstall target
89 |     if(NOT TARGET uninstall)
90 |     configure_file(
91 |         "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in"
92 |         "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake"
93 |         IMMEDIATE @ONLY)
94 | 
95 |     add_custom_target(uninstall
96 |         COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
97 |     endif()
98 | endif()
99 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Lypheo
 4 | Copyright (c) 2019-2020 Kiyamou
 5 | Copyright (c) 2020 luglio
 6 | Copyright (c) 2020-2025 Asd-g
 7 | Copyright (c) 2020 DTL2020
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a copy
10 | of this software and associated documentation files (the "Software"), to deal
11 | in the Software without restriction, including without limitation the rights
12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | copies of the Software, and to permit persons to whom the Software is
14 | furnished to do so, subject to the following conditions:
15 | 
16 | The above copyright notice and this permission notice shall be included in all
17 | copies or substantial portions of the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Description
  2 | 
  3 | Jinc (EWA Lanczos) resampling plugin for AviSynth 2.6 / AviSynth+.
  4 | 
  5 | This is [a port of the VapourSynth plugin JincResize](https://github.com/Kiyamou/VapourSynth-JincResize).
  6 | 
  7 | SSE / AVX Intrinsics taken from [the other AviSynth plugin JincResize](https://github.com/AviSynth/jinc-resize).
  8 | 
  9 | NOTE: The 32-bit version is not supported. If you still want to use it keep in mind that the OS memory limit can be easily hit. (#10)
 10 | 
 11 | ### Requirements:
 12 | 
 13 | - AviSynth+ r3688 or later ([1](https://github.com/AviSynth/AviSynthPlus/releases) / [2](https://forum.doom9.org/showthread.php?t=181351) / [3](https://gitlab.com/uvz/AviSynthPlus-Builds))
 14 | 
 15 | - Microsoft VisualC++ Redistributable Package 2022 (can be downloaded from [here](https://github.com/abbodi1406/vcredist/releases))
 16 | 
 17 | ### Usage:
 18 | 
 19 | ```
 20 | JincResize (clip, int target_width, int target_height, float "src_left", float "src_top", float "src_width", float "src_height", int "quant_x", int "quant_y", int "tap", float "blur", string "cplace", int "threads", int "opt", int "initial_capacity", float "initial_factor")
 21 | ```
 22 | 
 23 | ##### There are 4 additional functions:
 24 |     Jinc36Resize is an alias for JincResize(tap=3).
 25 |     Jinc64Resize is an alias for JincResize(tap=4).
 26 |     Jinc144Resize is an alias for JincResize(tap=6).
 27 |     Jinc256Resize is an alias for JincResize(tap=8).
 28 | 
 29 | ```
 30 | Jinc36Resize / Jinc64Resize / Jinc144Resize / Jinc256Resize (clip, int target_width, int target_height, float "src_left", float "src_top", float "src_width", float "src_height", int "quant_x", int "quant_y", string "cplace", int "threads")
 31 | ```
 32 | 
 33 | ### Parameters:
 34 | 
 35 | - clip<br>
 36 |     A clip to process. All planar formats are supported.
 37 | 
 38 | - target_width<br>
 39 |     The width of the output.
 40 | 
 41 | - target_height<br>
 42 |     The height of the output.
 43 | 
 44 | - src_left<br>
 45 |     Cropping of the left edge.<br>
 46 |     Default: 0.0.
 47 | 
 48 | - src_top<br>
 49 |     Cropping of the top edge.<br>
 50 |     Default: 0.0.
 51 | 
 52 | - src_width<br>
 53 |     If > 0.0 it sets the width of the clip before resizing.<br>
 54 |     If <= 0.0 it sets the cropping of the right edges before resizing.<br>
 55 |     Default: Source width.
 56 | 
 57 | - src_height<br>
 58 |     If > 0.0 it sets the height of the clip before resizing.<br>
 59 |     If <= 0.0 it sets the cropping of the bottom edges before resizing.<br>
 60 |     Default: Source height.
 61 | 
 62 | - quant_x, quant_y<br>
 63 |     Controls the sub-pixel quantization.<br>
 64 |     Must be between 1 and 256.<br>
 65 |     Default: 256.
 66 | 
 67 | - tap (JincResize only)<br>
 68 |     Corresponding to different zero points of Jinc function.<br>
 69 |     Must be between 1 and 16.<br>
 70 |     Default: 3.
 71 | 
 72 | - blur (JincResize only)<br>
 73 |     Blur processing, it can reduce side effects.<br>
 74 |     To achieve blur, the value should be less than 1.0.<br>
 75 |     Default: 1.0.
 76 | 
 77 | - threads<br>
 78 |     Whether to use maximum logical processors.<br>
 79 |     0: Maximum logical processors are used.<br>
 80 |     1: Only one thread is used.<br>
 81 |     Default: 0.
 82 | 
 83 | - cplace<br>
 84 |     The location of the chroma samples.<br>
 85 |     "MPEG1": Chroma samples are located on the center of each group of 4 pixels.<br>
 86 |     "MPEG2": Chroma samples are located on the left pixel column of the group.<br>
 87 |     "topleft": Chroma samples are located on the left pixel column and the first row of the group.<br>
 88 |     Default: If frame properties are supported and frame property "_ChromaLocation" exists - "_ChromaLocation" value of the first frame is used.
 89 |     If frame properties aren't supported or there is no property "_ChromaLocation" - "MPEG2".
 90 | 
 91 | - opt (JincResize only)<br>
 92 |     Sets which cpu optimizations to use.<br>
 93 |     -1: Auto-detect without AVX-512.<br>
 94 |     0: Use C++ code.<br>
 95 |     1: Use SSE4.1 code.<br>
 96 |     2: Use AVX2 code.<br>
 97 |     3: Use AVX-512 code.<br>
 98 |     Default: -1.
 99 | 
100 | - initial_capacity (JincResize only)<br>
101 |     Initial memory allocation size.<br>
102 |     Lower size forces more further memory reallocating that leads to initial slower startup but avoids excessive memory allocation.<br>
103 |     Must be greater than 0.<br>
104 |     Default: Max(target_width * target_height, src_width * src_height).
105 | 
106 | - initial_factor (JincResize only)<br>
107 |     The initial factor used for the first memory reallocation.<br>
108 |     After the first memory reallocation the factor starts to lower for the next reallocations.<br>
109 |     `initial_factor=1` ensures that the next memory allocation is the minimal possible.<br>
110 |     Must be equal to or greater than 1.0.<br>
111 |     Default: 1.5.
112 | 
113 | ### Building:
114 | 
115 | ```
116 | Requirements:
117 | - Git
118 | - C++17 compiler
119 | - CMake >= 3.16
120 | - Ninja
121 | ```
122 | 
123 | ```
124 | git clone https://github.com/Asd-g/AviSynth-JincResize && \
125 | cd AviSynth-JincResize
126 | cmake -B build -G Ninja
127 | ninja -C build
128 | ```
129 | 
130 | Example of building on Windows with MSVC:
131 | 
132 | 1. Open x64 Native Tools Command Prompt for VS xxxx.
133 | 2. Type - `set LIB=%LIB%;path_to_avisynth.lib`
134 | 3. Navigate to the jincresize source folder.
135 | 4. Type - `cmake -B name_of_the_folder_containing_building_files -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=path_to_avisynth_c.h`
136 | 
137 | By default Visual Studio solution files will be created.
138 | 
139 | Example of building on Windows with Mingw:
140 | 
141 | 1. Open UCRT64/MINGW64 shell.
142 | 2. Type - `export LIBRARY_PATH=$LIBRARY_PATH:path_to_the_avs_lib`
143 | 3. Navigate to the jincresize source folder.
144 | 4. Type - `cmake -B name_of_the_folder_containing_building_files -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=path_to_avisynth_c.h`
145 | 


--------------------------------------------------------------------------------
/cmake_uninstall.cmake.in:
--------------------------------------------------------------------------------
 1 | if(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt")
 2 |   message(FATAL_ERROR "Cannot find install manifest: @CMAKE_BINARY_DIR@/install_manifest.txt")
 3 | endif()
 4 | 
 5 | file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files)
 6 | string(REGEX REPLACE "\n" ";" files "${files}")
 7 | foreach(file ${files})
 8 |   message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
 9 |   if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
10 |     exec_program(
11 |       "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
12 |       OUTPUT_VARIABLE rm_out
13 |       RETURN_VALUE rm_retval
14 |       )
15 |     if(NOT "${rm_retval}" STREQUAL 0)
16 |       message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
17 |     endif()
18 |   else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
19 |     message(STATUS "File $ENV{DESTDIR}${file} does not exist.")
20 |   endif()
21 | endforeach()
22 | 


--------------------------------------------------------------------------------
/src/JincResize.cpp:
--------------------------------------------------------------------------------
   1 | #include <cmath>
   2 | #include <cstring>
   3 | 
   4 | #include "JincResize.h"
   5 | 
   6 | AVS_FORCEINLINE void* aligned_malloc(size_t size, size_t align)
   7 | {
   8 |     void* result = [&]()
   9 |     {
  10 | #ifdef _WIN32
  11 |         return _aligned_malloc(size, align);
  12 | #else
  13 |         if (posix_memalign(&result, align, size))
  14 |             return result = nullptr;
  15 |         else
  16 |             return result;
  17 | #endif
  18 |     }();
  19 | 
  20 |     return result;
  21 | }
  22 | 
  23 | AVS_FORCEINLINE void aligned_free(void* ptr)
  24 | {
  25 | #ifdef _WIN32
  26 |     _aligned_free(ptr);
  27 | #else
  28 |     free(ptr);
  29 | #endif
  30 | }
  31 | 
  32 | static AVS_FORCEINLINE unsigned portable_clz(size_t x)
  33 | {
  34 | #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
  35 |     unsigned long index;
  36 |     return (_BitScanReverse(&index, static_cast<unsigned long>(x))) ? (31 - index) : 32;
  37 | #elif defined(_WIN32) && defined(_M_ARM64)
  38 |     return static_cast<unsigned>(__clz(static_cast<unsigned long>(x)));
  39 | #else
  40 |     return (x == 0) ? 32 : __builtin_clz(static_cast<unsigned>(x));
  41 | #endif
  42 | }
  43 | 
  44 | #ifndef M_PI // GCC seems to have it
  45 | static constexpr double M_PI = 3.14159265358979323846;
  46 | #endif
  47 | 
  48 | // Taylor series coefficients of 2*BesselJ1(pi*x)/(pi*x) as (x^2) -> 0
  49 | static constexpr double jinc_taylor_series[31] =
  50 | {
  51 |     1.0,
  52 |     -1.23370055013616982735431137,
  53 |     0.507339015802096027273126733,
  54 |     -0.104317403816764804365258186,
  55 |     0.0128696438477519721233840271,
  56 |     -0.00105848577966854543020422691,
  57 |     6.21835470803998638484476598e-05,
  58 |     -2.73985272294670461142756204e-06,
  59 |     9.38932725442064547796003405e-08,
  60 |     -2.57413737759717407304931036e-09,
  61 |     5.77402672521402031756429343e-11,
  62 |     -1.07930605263598241754572977e-12,
  63 |     1.70710316782347356046974552e-14,
  64 |     -2.31434518382749184406648762e-16,
  65 |     2.71924659665997312120515390e-18,
  66 |     -2.79561335187943028518083529e-20,
  67 |     2.53599244866299622352138464e-22,
  68 |     -2.04487273140961494085786452e-24,
  69 |     1.47529860450204338866792475e-26,
  70 |     -9.57935105257523453155043307e-29,
  71 |     5.62764317309979254140393917e-31,
  72 |     -3.00555258814860366342363867e-33,
  73 |     1.46559362903641161989338221e-35,
  74 |     -6.55110024064596600335624426e-38,
  75 |     2.69403199029404093412381643e-40,
  76 |     -1.02265499954159964097119923e-42,
  77 |     3.59444454568084324694180635e-45,
  78 |     -1.17313973900539982313119019e-47,
  79 |     3.56478606255557746426034301e-50,
  80 |     -1.01100655781438313239513538e-52,
  81 |     2.68232117541264485328658605e-55
  82 | };
  83 | 
  84 | static constexpr double jinc_zeros[16] =
  85 | {
  86 |     1.2196698912665045,
  87 |     2.2331305943815286,
  88 |     3.2383154841662362,
  89 |     4.2410628637960699,
  90 |     5.2427643768701817,
  91 |     6.2439216898644877,
  92 |     7.2447598687199570,
  93 |     8.2453949139520427,
  94 |     9.2458926849494673,
  95 |     10.246293348754916,
  96 |     11.246622794877883,
  97 |     12.246898461138105,
  98 |     13.247132522181061,
  99 |     14.247333735806849,
 100 |     15.247508563037300,
 101 |     16.247661874700962
 102 | };
 103 | 
 104 | //  Modified from boost package math/tools/`rational.hpp`
 105 | //
 106 | //  (C) Copyright John Maddock 2006.
 107 | //  Use, modification and distribution are subject to the
 108 | //  Boost Software License, Version 1.0. (See accompanying file
 109 | //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 110 | static double evaluate_rational(const double* num, const double* denom, double z, int count)
 111 | {
 112 |     double s1, s2;
 113 |     if (z <= 1.0)
 114 |     {
 115 |         s1 = num[count - 1];
 116 |         s2 = denom[count - 1];
 117 |         for (auto i = count - 2; i >= 0; --i)
 118 |         {
 119 |             s1 *= z;
 120 |             s2 *= z;
 121 |             s1 += num[i];
 122 |             s2 += denom[i];
 123 |         }
 124 |     }
 125 |     else
 126 |     {
 127 |         z = 1.0f / z;
 128 |         s1 = num[0];
 129 |         s2 = denom[0];
 130 |         for (auto i = 1; i < count; ++i)
 131 |         {
 132 |             s1 *= z;
 133 |             s2 *= z;
 134 |             s1 += num[i];
 135 |             s2 += denom[i];
 136 |         }
 137 |     }
 138 | 
 139 |     return s1 / s2;
 140 | }
 141 | 
 142 | //  Modified from boost package `BesselJ1.hpp`
 143 | //
 144 | //  Copyright (c) 2006 Xiaogang Zhang
 145 | //  Use, modification and distribution are subject to the
 146 | //  Boost Software License, Version 1.0. (See accompanying file
 147 | //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 148 | static double jinc_sqr_boost_l(double x2)
 149 | {
 150 |     constexpr double bPC[7] =
 151 |     {
 152 |         -4.4357578167941278571e+06,
 153 |         -9.9422465050776411957e+06,
 154 |         -6.6033732483649391093e+06,
 155 |         -1.5235293511811373833e+06,
 156 |         -1.0982405543459346727e+05,
 157 |         -1.6116166443246101165e+03,
 158 |         0.0
 159 |     };
 160 |     constexpr double bQC[7] =
 161 |     {
 162 |         -4.4357578167941278568e+06,
 163 |         -9.9341243899345856590e+06,
 164 |         -6.5853394797230870728e+06,
 165 |         -1.5118095066341608816e+06,
 166 |         -1.0726385991103820119e+05,
 167 |         -1.4550094401904961825e+03,
 168 |         1.0
 169 |     };
 170 |     constexpr double bPS[7] =
 171 |     {
 172 |         3.3220913409857223519e+04,
 173 |         8.5145160675335701966e+04,
 174 |         6.6178836581270835179e+04,
 175 |         1.8494262873223866797e+04,
 176 |         1.7063754290207680021e+03,
 177 |         3.5265133846636032186e+01,
 178 |         0.0
 179 |     };
 180 |     constexpr double bQS[7] =
 181 |     {
 182 |         7.0871281941028743574e+05,
 183 |         1.8194580422439972989e+06,
 184 |         1.4194606696037208929e+06,
 185 |         4.0029443582266975117e+05,
 186 |         3.7890229745772202641e+04,
 187 |         8.6383677696049909675e+02,
 188 |         1.0
 189 |     };
 190 | 
 191 |     const auto y2 = M_PI * M_PI * x2;
 192 |     const auto xp = sqrt(y2);
 193 |     const auto y2p = 64.0 / y2;
 194 |     const auto sx = sin(xp);
 195 |     const auto cx = cos(xp);
 196 | 
 197 |     return (sqrt(xp / M_PI) * 2.0 / y2) * (evaluate_rational(bPC, bQC, y2p, 7) * (sx - cx) + (8.0 / xp) * evaluate_rational(bPS, bQS, y2p, 7) * (sx + cx));
 198 | }
 199 | 
 200 | // jinc(sqrt(x2))
 201 | static double jinc_sqr(double x2)
 202 | {
 203 |     if (x2 < 1.49)        // the 1-tap radius
 204 |     {
 205 |         double res = 0.0;
 206 |         for (auto j = 16; j > 0; --j)
 207 |             res = res * x2 + jinc_taylor_series[j - 1];
 208 |         return res;
 209 |     }
 210 |     else if (x2 < 4.97)   // the 2-tap radius
 211 |     {
 212 |         double res = 0.0;
 213 |         for (auto j = 21; j > 0; --j)
 214 |             res = res * x2 + jinc_taylor_series[j - 1];
 215 |         return res;
 216 |     }
 217 |     else if (x2 < 10.49)  // the 3-tap radius
 218 |     {
 219 |         double res = 0.0;
 220 |         for (auto j = 26; j > 0; --j)
 221 |             res = res * x2 + jinc_taylor_series[j - 1];
 222 |         return res;
 223 |     }
 224 |     else if (x2 < 17.99)  // the 4-tap radius
 225 |     {
 226 |         double res = 0.0;
 227 |         for (auto j = 31; j > 0; --j)
 228 |             res = res * x2 + jinc_taylor_series[j - 1];
 229 |         return res;
 230 |     }
 231 |     else if (x2 < 52.57)  // the 5~7-tap radius
 232 |     {
 233 |         const auto x = M_PI * sqrt(x2);
 234 |         return 2.0 * std::cyl_bessel_j(1, x) / x;
 235 |     }
 236 |     else if (x2 < 68.07)  // the 8-tap radius // Modify from pull request #4
 237 |     {
 238 |         return jinc_sqr_boost_l(x2);
 239 |     }
 240 |     else                  // the 9~16-tap radius
 241 |     {
 242 |         const auto x = M_PI * sqrt(x2);
 243 |         return 2.0 * std::cyl_bessel_j(1, x) / x;
 244 |     }
 245 | }
 246 | 
 247 | static double sample_sqr(double (*filter)(double), double x2, double blur2, double radius2)
 248 | {
 249 |     if (blur2 > 0.0)
 250 |         x2 /= blur2;
 251 | 
 252 |     if (x2 < radius2)
 253 |         return filter(x2);
 254 | 
 255 |     return 0.0;
 256 | }
 257 | 
 258 | constexpr double JINC_ZERO_SQR = 1.48759464366204680005356;
 259 | 
 260 | Lut::Lut()
 261 | {
 262 |     lut = new double[lut_size];
 263 | }
 264 | 
 265 | void Lut::InitLut(int lut_size, double radius, double blur)
 266 | {
 267 |     const auto radius2 = radius * radius;
 268 |     const auto blur2 = blur * blur;
 269 | 
 270 |     for (auto i = 0; i < lut_size; ++i)
 271 |     {
 272 |         const auto t2 = i / (lut_size - 1.0);
 273 |         lut[i] = sample_sqr(jinc_sqr, radius2 * t2, blur2, radius2) * sample_sqr(jinc_sqr, JINC_ZERO_SQR * t2, 1.0, radius2);
 274 |     }
 275 | }
 276 | 
 277 | float Lut::GetFactor(int index)
 278 | {
 279 |     if (index >= lut_size)
 280 |         return 0.f;
 281 |     return static_cast<float>(lut[index]);
 282 | }
 283 | 
 284 | constexpr double DOUBLE_ROUND_MAGIC_NUMBER = 6755399441055744.0;
 285 | 
 286 | static void init_coeff_table(EWAPixelCoeff* out, int quantize_x, int quantize_y,
 287 |     int filter_size, int dst_width, int dst_height)
 288 | {
 289 |     out->filter_size = filter_size;
 290 |     out->coeff_stride = (filter_size + 15) & ~15;
 291 | 
 292 |     // Allocate metadata
 293 |     out->meta = new EWAPixelCoeffMeta[static_cast<int64_t>(dst_width) * dst_height];
 294 | 
 295 |     // Alocate factor map
 296 |     out->factor_map = new int[static_cast<int64_t>(quantize_x) * quantize_y];
 297 | 
 298 |     // This will be reserved to exact size in coff generating procedure
 299 |     out->factor = nullptr;
 300 | 
 301 |     // Zeroed memory
 302 |     if (out->factor_map != nullptr)
 303 |         memset(out->factor_map, 0, static_cast<int64_t>(quantize_x) * quantize_y * sizeof(int));
 304 | 
 305 |     memset(out->meta, 0, static_cast<int64_t>(dst_width) * dst_height * sizeof(EWAPixelCoeffMeta));
 306 | }
 307 | 
 308 | static void delete_coeff_table(EWAPixelCoeff* out)
 309 | {
 310 |     aligned_free(out->factor);
 311 |     delete[] out->meta;
 312 |     delete[] out->factor_map;
 313 | }
 314 | 
 315 | struct generate_coeff_params
 316 | {
 317 |     Lut* func;
 318 |     EWAPixelCoeff* out;
 319 |     int quantize_x;
 320 |     int quantize_y;
 321 |     int samples;
 322 |     int src_width;
 323 |     int src_height;
 324 |     int dst_width;
 325 |     int dst_height;
 326 |     double radius;
 327 |     double crop_left;
 328 |     double crop_top;
 329 |     double crop_width;
 330 |     double crop_height;
 331 |     int initial_capacity;
 332 |     double initial_factor;
 333 | };
 334 | 
 335 | /* Coefficient table generation */
 336 | static void generate_coeff_table_c(const generate_coeff_params& params)
 337 | {
 338 |     Lut* func = params.func;
 339 |     EWAPixelCoeff* out = params.out;
 340 |     int quantize_x = params.quantize_x;
 341 |     int quantize_y = params.quantize_y;
 342 |     int samples = params.samples;
 343 |     int src_width = params.src_width;
 344 |     int src_height = params.src_height;
 345 |     int dst_width = params.dst_width;
 346 |     int dst_height = params.dst_height;
 347 |     double radius = params.radius;
 348 | 
 349 |     const double filter_step_x = min(static_cast<double>(dst_width) / params.crop_width, 1.0);
 350 |     const double filter_step_y = min(static_cast<double>(dst_height) / params.crop_height, 1.0);
 351 | 
 352 |     const float filter_support_x = static_cast<float>(radius / filter_step_x);
 353 |     const float filter_support_y = static_cast<float>(radius / filter_step_y);
 354 | 
 355 |     const float filter_support = max(filter_support_x, filter_support_y);
 356 |     const int filter_size = max(static_cast<int>(ceil(filter_support_x * 2.0)), static_cast<int>(ceil(filter_support_y * 2.0)));
 357 | 
 358 |     const float start_x = static_cast<float>(params.crop_left + (params.crop_width / dst_width - 1.0) / 2.0);
 359 | 
 360 |     const float x_step = static_cast<float>(params.crop_width / dst_width);
 361 |     const float y_step = static_cast<float>(params.crop_height / dst_height);
 362 | 
 363 |     float xpos = start_x;
 364 |     float ypos = static_cast<float>(params.crop_top + (params.crop_height - dst_height) / (dst_height * static_cast<int64_t>(2)));
 365 | 
 366 |     // Initialize EWAPixelCoeff data structure
 367 |     init_coeff_table(out, quantize_x, quantize_y, filter_size, dst_width, dst_height);
 368 | 
 369 |     size_t tmp_array_capacity = params.initial_capacity;
 370 |     float* tmp_array = static_cast<float*>(aligned_malloc(tmp_array_capacity * sizeof(float), 64));
 371 |     if (!tmp_array)
 372 |         throw "JincResize: failed to allocate tmp_array.";
 373 |     size_t tmp_array_size = 0;
 374 |     int tmp_array_top = 0;
 375 |     unsigned base_clz = portable_clz(tmp_array_capacity);
 376 |     const double initial_growth_factor = params.initial_factor;
 377 |     const double radius2 = radius * radius;
 378 | 
 379 |     // Use to advance the coeff pointer
 380 |     const int coeff_per_pixel = out->coeff_stride * filter_size;
 381 | 
 382 |     for (int y = 0; y < dst_height; ++y)
 383 |     {
 384 |         for (int x = 0; x < dst_width; ++x)
 385 |         {
 386 |             bool is_border = false;
 387 | 
 388 |             EWAPixelCoeffMeta* meta = &out->meta[y * dst_width + x];
 389 | 
 390 |             // Here, the window_*** variable specified a begin/size/end
 391 |             // of EWA window to process.
 392 |             int window_end_x = static_cast<int>(xpos + filter_support);
 393 |             int window_end_y = static_cast<int>(ypos + filter_support);
 394 | 
 395 |             if (window_end_x >= src_width)
 396 |             {
 397 |                 window_end_x = src_width - 1;
 398 |                 is_border = true;
 399 |             }
 400 |             if (window_end_y >= src_height)
 401 |             {
 402 |                 window_end_y = src_height - 1;
 403 |                 is_border = true;
 404 |             }
 405 | 
 406 |             int window_begin_x = window_end_x - filter_size + 1;
 407 |             int window_begin_y = window_end_y - filter_size + 1;
 408 | 
 409 |             if (window_begin_x < 0)
 410 |             {
 411 |                 window_begin_x = 0;
 412 |                 is_border = true;
 413 |             }
 414 |             if (window_begin_y < 0)
 415 |             {
 416 |                 window_begin_y = 0;
 417 |                 is_border = true;
 418 |             }
 419 | 
 420 |             meta->start_x = window_begin_x;
 421 |             meta->start_y = window_begin_y;
 422 | 
 423 |             // Quantize xpos and ypos
 424 |             const int quantized_x_int = static_cast<int>(xpos * quantize_x);
 425 |             const int quantized_y_int = static_cast<int>(ypos * quantize_y);
 426 |             const int quantized_x_value = quantized_x_int % quantize_x;
 427 |             const int quantized_y_value = quantized_y_int % quantize_y;
 428 |             const float quantized_xpos = static_cast<float>(quantized_x_int) / quantize_x;
 429 |             const float quantized_ypos = static_cast<float>(quantized_y_int) / quantize_y;
 430 | 
 431 |             if (!is_border && out->factor_map[quantized_y_value * quantize_x + quantized_x_value] != 0)
 432 |             {
 433 |                 // Not border pixel and already have coefficient calculated at this quantized position
 434 |                 meta->coeff_meta = out->factor_map[quantized_y_value * quantize_x + quantized_x_value] - 1;
 435 |             }
 436 |             else
 437 |             {
 438 |                 // then need computation
 439 |                 float divider = 0.f;
 440 | 
 441 |                 // This is the location of current target pixel in source pixel
 442 |                 // Quantized
 443 |                 //const float current_x = clamp(is_border ? xpos : quantized_xpos, 0.f, src_width - 1.f);
 444 |                 //const float current_y = clamp(is_border ? ypos : quantized_ypos, 0.f, src_height - 1.f);
 445 | 
 446 |                 if (!is_border)
 447 |                 {
 448 |                     // Change window position to quantized position
 449 |                     window_begin_x = static_cast<int>(quantized_xpos + filter_support) - filter_size + 1;
 450 |                     window_begin_y = static_cast<int>(quantized_ypos + filter_support) - filter_size + 1;
 451 |                 }
 452 | 
 453 |                 // Windowing positon
 454 |                 int window_x = window_begin_x;
 455 |                 int window_y = window_begin_y;
 456 | 
 457 |                 // First loop calcuate coeff
 458 |                 const size_t new_size = tmp_array_size + coeff_per_pixel;
 459 |                 if (new_size > tmp_array_capacity)
 460 |                 {
 461 |                     size_t new_capacity = tmp_array_capacity * (1.0 + (initial_growth_factor - 1.0)
 462 |                         * (1.0 - static_cast<double>(max(0, static_cast<int>(base_clz - portable_clz(tmp_array_capacity)))) / 32.0));
 463 |                     if (new_capacity < new_size)
 464 |                         new_capacity = new_size;
 465 |                     float* new_tmp = static_cast<float*>(aligned_malloc(new_capacity * sizeof(float), 64));
 466 |                     if (!new_tmp)
 467 |                     {
 468 |                         aligned_free(tmp_array);
 469 |                         throw "JincResize: failed to allocate new_tmp.";
 470 |                     }
 471 |                     memcpy(new_tmp, tmp_array, tmp_array_size * sizeof(float));
 472 |                     aligned_free(tmp_array);
 473 |                     tmp_array = new_tmp;
 474 |                     tmp_array_capacity = new_capacity;
 475 |                 }
 476 |                 memset(tmp_array + tmp_array_size, 0, coeff_per_pixel * sizeof(float));
 477 |                 int curr_factor_ptr = tmp_array_top;
 478 |                 tmp_array_size = new_size;
 479 | 
 480 |                 for (int ly = 0; ly < filter_size; ++ly)
 481 |                 {
 482 |                     for (int lx = 0; lx < filter_size; ++lx)
 483 |                     {
 484 |                         // Euclidean distance to sampling pixel
 485 |                         const double dx = (clamp(is_border ? xpos : quantized_xpos, 0.f, static_cast<float>(src_width - 1)) - window_x) * filter_step_x;
 486 |                         const double dy = (clamp(is_border ? ypos : quantized_ypos, 0.f, static_cast<float>(src_height - 1)) - window_y) * filter_step_y;
 487 | 
 488 |                         int index = static_cast<int>(llround((samples - 1) * (dx * dx + dy * dy) / radius2 + DOUBLE_ROUND_MAGIC_NUMBER));
 489 | 
 490 |                         const float factor = func->GetFactor(index);
 491 | 
 492 |                         tmp_array[curr_factor_ptr + static_cast<int64_t>(lx)] = factor;
 493 |                         divider += factor;
 494 | 
 495 |                         ++window_x;
 496 |                     }
 497 | 
 498 |                     curr_factor_ptr += out->coeff_stride;
 499 | 
 500 |                     window_x = window_begin_x;
 501 |                     ++window_y;
 502 |                 }
 503 | 
 504 |                 // Second loop to divide the coeff
 505 |                 curr_factor_ptr = tmp_array_top;
 506 |                 for (int ly = 0; ly < filter_size; ++ly)
 507 |                 {
 508 |                     for (int lx = 0; lx < filter_size; ++lx)
 509 |                     {
 510 |                         tmp_array[curr_factor_ptr + static_cast<int64_t>(lx)] /= divider;
 511 |                     }
 512 | 
 513 |                     curr_factor_ptr += out->coeff_stride;
 514 |                 }
 515 | 
 516 |                 // Save factor to table
 517 |                 if (!is_border)
 518 |                     out->factor_map[quantized_y_value * quantize_x + quantized_x_value] = tmp_array_top + 1;
 519 | 
 520 |                 meta->coeff_meta = tmp_array_top;
 521 |                 tmp_array_top += coeff_per_pixel;
 522 |             }
 523 | 
 524 |             xpos += x_step;
 525 |         }
 526 | 
 527 |         ypos += y_step;
 528 |         xpos = start_x;
 529 |     }
 530 | 
 531 |     // Copy from tmp_array to real array
 532 |     out->factor = tmp_array;
 533 | }
 534 | 
 535 | /* Planar resampling with coeff table */
 536 | template<typename T, int thr, int subsampled>
 537 | void JincResize::resize_plane_c(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi)
 538 | {
 539 |     const int planes_y[4] = { AVS_PLANAR_Y, AVS_PLANAR_U, AVS_PLANAR_V, AVS_PLANAR_A };
 540 |     const int planes_r[4] = { AVS_PLANAR_G, AVS_PLANAR_B, AVS_PLANAR_R, AVS_PLANAR_A };
 541 |     const int* current_planes = (avs_is_rgb(vi)) ? planes_r : planes_y;
 542 |     for (int i = 0; i < planecount; ++i)
 543 |     {
 544 |         const int plane = current_planes[i];
 545 | 
 546 |         const int src_stride = avs_get_pitch_p(src, plane) / sizeof(T);
 547 |         const int dst_stride = avs_get_pitch_p(dst, plane) / sizeof(T);
 548 |         const int dst_width = avs_get_row_size_p(dst, plane) / sizeof(T);
 549 |         const int dst_height = avs_get_height_p(dst, plane);
 550 |         const T* srcp = reinterpret_cast<const T*>(avs_get_read_ptr_p(src, plane));
 551 | 
 552 |         EWAPixelCoeff* out = [&]()
 553 |         {
 554 |             if constexpr (subsampled)
 555 |                 return (i) ? (i == 3) ? JincResize::out[0] : JincResize::out[1] : JincResize::out[0];
 556 |             else
 557 |                 return JincResize::out[0];
 558 |         }();
 559 | 
 560 |         auto loop = [&](int y)
 561 |         {
 562 |             T* __restrict dstp = reinterpret_cast<T*>(avs_get_write_ptr_p(dst, plane)) + static_cast<int64_t>(y) * dst_stride;
 563 | 
 564 |             for (int x = 0; x < dst_width; ++x)
 565 |             {
 566 |                 EWAPixelCoeffMeta* meta = out->meta + static_cast<int64_t>(y) * dst_width + x;
 567 |                 const T* src_ptr = srcp + meta->start_y * static_cast<int64_t>(src_stride) + meta->start_x;
 568 |                 const float* coeff_ptr = out->factor + meta->coeff_meta;
 569 | 
 570 |                 float result = 0.f;
 571 | 
 572 |                 for (int ly = 0; ly < out->filter_size; ++ly)
 573 |                 {
 574 |                     for (int lx = 0; lx < out->filter_size; ++lx)
 575 |                         result += src_ptr[lx] * coeff_ptr[lx];
 576 | 
 577 |                     coeff_ptr += out->coeff_stride;
 578 |                     src_ptr += src_stride;
 579 |                 }
 580 | 
 581 |                 if constexpr (std::is_integral_v<T>)
 582 |                     dstp[x] = static_cast<T>(lrintf(clamp(result, 0.f, peak)));
 583 |                 else
 584 |                     dstp[x] = result;
 585 | 
 586 |             }
 587 |         };
 588 | 
 589 |         if constexpr (thr)
 590 |         {
 591 |             for (intptr_t i = 0; i < dst_height; ++i)
 592 |                 loop(i);
 593 |         }
 594 |         else
 595 |         {
 596 |             std::vector<int> l(dst_height);
 597 |             std::iota(std::begin(l), std::end(l), 0);
 598 |             std::for_each(std::execution::par, std::begin(l), std::end(l), loop);
 599 |         }
 600 |     }
 601 | }
 602 | 
 603 | static AVS_VideoFrame* AVSC_CC JincResize_GetFrame(AVS_FilterInfo* fi, int n)
 604 | {
 605 |     JincResize* d = reinterpret_cast<JincResize*>(fi->user_data);
 606 |     AVS_ScriptEnvironment* env = fi->env;
 607 |     AVS_VideoInfo* vi = &fi->vi;
 608 | 
 609 |     AVS_VideoFrame* src = avs_get_frame(fi->child, n);
 610 |     if (!src)
 611 |         return nullptr;
 612 | 
 613 |     AVS_VideoFrame* dst = avs_new_video_frame_p(env, vi, src);
 614 | 
 615 |     (d->*d->process_frame)(src, dst, vi);
 616 | 
 617 |     if ((avs_is_420(vi) || avs_is_422(vi) || avs_is_yv411(vi)))
 618 |     {
 619 |         if (d->cplace == "mpeg2")
 620 |             avs_prop_set_int(env, avs_get_frame_props_rw(env, dst), "_ChromaLocation", 0, 0);
 621 |         else if (d->cplace == "mpeg1")
 622 |             avs_prop_set_int(env, avs_get_frame_props_rw(env, dst), "_ChromaLocation", 1, 0);
 623 |         else
 624 |             avs_prop_set_int(env, avs_get_frame_props_rw(env, dst), "_ChromaLocation", 2, 0);
 625 |     }
 626 | 
 627 |     avs_release_video_frame(src);
 628 | 
 629 |     return dst;
 630 | }
 631 | 
 632 | static void AVSC_CC free_JincResize(AVS_FilterInfo* fi)
 633 | {
 634 |     JincResize* d = reinterpret_cast<JincResize*>(fi->user_data);
 635 |     std::vector<EWAPixelCoeff*>* out = &d->out;
 636 | 
 637 |     for (int i = 0; i < static_cast<int>(out->size()); ++i)
 638 |     {
 639 |         delete_coeff_table((*out)[i]);
 640 |         delete (*out)[i];
 641 |     }
 642 | 
 643 |     delete[] d->init_lut->lut;
 644 |     delete d->init_lut;
 645 | 
 646 |     delete d;
 647 | }
 648 | 
 649 | static int AVSC_CC set_cache_hints_JincResize(AVS_FilterInfo* fi, int cachehints, int frame_range)
 650 | {
 651 |     return cachehints == AVS_CACHE_GET_MTMODE ? 2 : 0;
 652 | }
 653 | 
 654 | static AVS_Value AVSC_CC Create_JincResize(AVS_ScriptEnvironment* env, AVS_Value args, void* param)
 655 | {
 656 |     enum
 657 |     {
 658 |         Clip,
 659 |         Target_width,
 660 |         Target_height,
 661 |         Src_left,
 662 |         Src_top,
 663 |         Src_width,
 664 |         Src_height,
 665 |         Quant_x,
 666 |         Quant_y,
 667 |         Tap,
 668 |         Blur,
 669 |         Cplace,
 670 |         Threads,
 671 |         Opt,
 672 |         Initial_capacity,
 673 |         Initial_factor
 674 |     };
 675 | 
 676 |     JincResize* d = reinterpret_cast<JincResize*>(new JincResize());
 677 | 
 678 |     AVS_FilterInfo* fi;
 679 |     AVS_Clip* clip = avs_new_c_filter(env, &fi, avs_array_elt(args, Clip), 1);
 680 |     AVS_VideoInfo* vi = &fi->vi;
 681 | 
 682 |     const auto set_error = [&](AVS_Clip* clip, const char* msg)
 683 |     {
 684 |         avs_release_clip(clip);
 685 | 
 686 |         return avs_new_value_error(msg);
 687 |     };
 688 | 
 689 |     if (!avs_check_version(env, 9))
 690 |     {
 691 |         if (avs_check_version(env, 10))
 692 |         {
 693 |             if (avs_get_env_property(env, AVS_AEP_INTERFACE_BUGFIX) < 2)
 694 |                 return set_error(clip, "JincResize: AviSynth+ version must be r3688 or later.");
 695 |         }
 696 |     }
 697 |     else
 698 |         return set_error(clip, "JincResize: AviSynth+ version must be r3688 or later.");
 699 | 
 700 |     if (!avs_is_planar(vi))
 701 |         return set_error(clip, "JincResize: clip must be in planar format.");
 702 | 
 703 |     const int tap = avs_defined(avs_array_elt(args, Tap)) ? avs_as_int(avs_array_elt(args, Tap)) : 3;
 704 |     if (tap < 1 || tap > 16)
 705 |         return set_error(clip, "JincResize: tap must be between 1..16.");
 706 | 
 707 |     const int quant_x = avs_defined(avs_array_elt(args, Quant_x)) ? avs_as_int(avs_array_elt(args, Quant_x)) : 256;
 708 |     if (quant_x < 1 || quant_x > 256)
 709 |         return set_error(clip, "JincResize: quant_x must be between 1..256.");
 710 | 
 711 |     const int quant_y = avs_defined(avs_array_elt(args, Quant_y)) ? avs_as_int(avs_array_elt(args, Quant_y)) : 256;
 712 |     if (quant_y < 1 || quant_y > 256)
 713 |         return set_error(clip, "JincResize: quant_y must be between 1..256.");
 714 | 
 715 |     std::string cplace = avs_defined(avs_array_elt(args, Cplace)) ? avs_as_string(avs_array_elt(args, Cplace)) : "";
 716 | 
 717 |     if (!cplace.empty())
 718 |     {
 719 |         for (auto& c : cplace)
 720 |             c = tolower(c);
 721 | 
 722 |         if (cplace != "mpeg2" && cplace != "mpeg1" && cplace != "topleft")
 723 |             return set_error(clip, "JincResize: cplace must be MPEG2, MPEG1 or topleft.");
 724 |     }
 725 |     else
 726 |     {
 727 |         AVS_VideoFrame* frame0 = avs_get_frame(clip, 0);
 728 |         const AVS_Map* props = avs_get_frame_props_ro(env, frame0);
 729 | 
 730 |         if (avs_prop_get_type(env, props, "_ChromaLocation") == 'i')
 731 |         {
 732 |             switch (avs_prop_get_int(env, props, "_ChromaLocation", 0, nullptr))
 733 |             {
 734 |                 case 0: cplace = "mpeg2"; break;
 735 |                 case 1: cplace = "mpeg1"; break;
 736 |                 case 2: cplace = "topleft"; break;
 737 |                 default: return set_error(clip, "JincResize: invalid _ChromaLocation"); break;
 738 |             }
 739 |         }
 740 |         else
 741 |             cplace = "mpeg2";
 742 |     }
 743 | 
 744 |     if (cplace == "topleft" && !avs_is_420(vi))
 745 |         return set_error(clip, "JincResize: topleft must be used only for 4:2:0 chroma subsampling.");
 746 | 
 747 |     const int opt = avs_defined(avs_array_elt(args, Opt)) ? avs_as_int(avs_array_elt(args, Opt)) : -1;
 748 |     const int cpu_flags = avs_get_cpu_flags(env);
 749 |     if (opt > 3)
 750 |         return set_error(clip, "JincResize: opt higher than 3 is not allowed.");
 751 |     if (opt == 3 && !(cpu_flags & AVS_CPUF_AVX512F))
 752 |         return set_error(clip, "JincResize: opt=3 requires AVX-512F.");
 753 |     if (opt == 2 && !(cpu_flags & AVS_CPUF_AVX2))
 754 |         return set_error(clip, "JincResize: opt=2 requires AVX2.");
 755 |     if (opt == 1 && !(cpu_flags & AVS_CPUF_SSE4_1))
 756 |         return set_error(clip, "JincResize: opt=1 requires SSE4.1.");
 757 | 
 758 |     const int threads = avs_defined(avs_array_elt(args, Threads)) ? avs_as_int(avs_array_elt(args, Threads)) : 0;
 759 |     if (threads < 0 || threads > 1)
 760 |         return set_error(clip, "JincResize: threads must be either 0 or 1.");
 761 | 
 762 |     double crop_left = avs_defined(avs_array_elt(args, Src_left)) ? avs_as_float(avs_array_elt(args, Src_left)) : 0.0;
 763 |     double crop_width = avs_defined(avs_array_elt(args, Src_width)) ? avs_as_float(avs_array_elt(args, Src_width)) : static_cast<double>(vi->width);
 764 |     if (crop_width <= 0.0)
 765 |         crop_width = vi->width - crop_left + crop_width;
 766 | 
 767 |     double crop_top = avs_defined(avs_array_elt(args, Src_top)) ? avs_as_float(avs_array_elt(args, Src_top)) : 0.0;
 768 |     double crop_height = avs_defined(avs_array_elt(args, Src_height)) ? avs_as_float(avs_array_elt(args, Src_height)) : static_cast<double>(vi->height);
 769 |     if (crop_height <= 0.0)
 770 |         crop_height = vi->height - crop_top + crop_height;
 771 | 
 772 |     double blur = avs_defined(avs_array_elt(args, Blur)) ? avs_as_float(avs_array_elt(args, Blur)) : 0.0;
 773 |     if (!blur)
 774 |         blur = 1.0;
 775 | 
 776 |     const int target_width = avs_as_int(avs_array_elt(args, Target_width));
 777 |     const int target_height = avs_as_int(avs_array_elt(args, Target_height));
 778 | 
 779 |     const double initial_factor = avs_defined(avs_array_elt(args, Initial_factor)) ? avs_as_float(avs_array_elt(args, Initial_factor)) : 1.50;
 780 |     if (initial_factor < 1.0)
 781 |         return set_error(clip, "JincResize: initial_factor must be eqaul to or greater than 1.0.");
 782 | 
 783 |     const int src_width = vi->width;
 784 |     const int src_height = vi->height;
 785 | 
 786 |     const int initial_capacity = avs_defined(avs_array_elt(args, Initial_capacity)) ? avs_as_int(avs_array_elt(args, Initial_capacity))
 787 |         : max(target_width * target_height, src_width * src_height);
 788 |     if (initial_capacity <= 0)
 789 |         return set_error(clip, "JincResize: initial_capacity must be greater than 0.");
 790 | 
 791 |     vi->width = target_width;
 792 |     vi->height = target_height;
 793 |     d->peak = static_cast<float>((1 << avs_bits_per_component(vi)) - 1);
 794 |     const double radius = jinc_zeros[tap - 1];
 795 |     constexpr int samples = 1024;  // should be a multiple of 4
 796 |     d->init_lut = new Lut();
 797 |     d->init_lut->InitLut(samples, radius, blur);
 798 |     d->planecount = avs_num_components(vi);
 799 |     bool subsampled = false;
 800 |     std::vector<EWAPixelCoeff*>* out = &d->out;
 801 |     out->emplace_back(new EWAPixelCoeff());
 802 |     generate_coeff_params params =
 803 |     {
 804 |         d->init_lut,
 805 |         d->out[0],
 806 |         quant_x,
 807 |         quant_y,
 808 |         samples,
 809 |         src_width,
 810 |         src_height,
 811 |         target_width,
 812 |         target_height,
 813 |         radius,
 814 |         crop_left,
 815 |         crop_top,
 816 |         crop_width,
 817 |         crop_height,
 818 |         initial_capacity,
 819 |         initial_factor
 820 |     };
 821 | 
 822 |     try
 823 |     {
 824 |         if (d->planecount > 1)
 825 |         {
 826 |             if (avs_is_444(vi) || avs_is_rgb(vi))
 827 |                 generate_coeff_table_c(params);
 828 |             else
 829 |             {
 830 |                 out->emplace_back(new EWAPixelCoeff());
 831 | 
 832 |                 subsampled = true;
 833 |                 const int sub_w = avs_get_plane_width_subsampling(vi, AVS_PLANAR_U);
 834 |                 const int sub_h = avs_get_plane_height_subsampling(vi, AVS_PLANAR_U);
 835 |                 const double div_w = 1 << sub_w;
 836 |                 const double div_h = 1 << sub_h;
 837 | 
 838 |                 const double crop_left_uv = (cplace == "mpeg2" || cplace == "topleft") ?
 839 |                     (0.5 * (1.0 - static_cast<double>(src_width) / target_width) + crop_left) / div_w : crop_left / div_w;
 840 |                 const double crop_top_uv = (cplace == "topleft") ?
 841 |                     (0.5 * (1.0 - static_cast<double>(src_height) / target_height) + crop_top) / div_h : crop_top / div_h;
 842 | 
 843 |                 generate_coeff_table_c(params);
 844 |                 params = {
 845 |                     d->init_lut,
 846 |                     (*out)[1],
 847 |                     quant_x,
 848 |                     quant_y,
 849 |                     samples,
 850 |                     src_width >> sub_w,
 851 |                     src_height >> sub_h,
 852 |                     target_width >> sub_w,
 853 |                     target_height >> sub_h,
 854 |                     radius,
 855 |                     crop_left_uv,
 856 |                     crop_top_uv,
 857 |                     crop_width / div_w,
 858 |                     crop_height / div_h,
 859 |                     initial_capacity / (static_cast<int>(div_w) * static_cast<int>(div_h)),
 860 |                     initial_factor
 861 |                 };
 862 |                 generate_coeff_table_c(params);
 863 |             }
 864 |         }
 865 |         else
 866 |             generate_coeff_table_c(params);
 867 |     }
 868 |     catch (const std::exception& e)
 869 |     {
 870 |         std::vector<EWAPixelCoeff*>* out = &d->out;
 871 |         for (int i = 0; i < static_cast<int>(d->out.size()); ++i)
 872 |         {
 873 |             delete_coeff_table((*out)[i]);
 874 |             delete (*out)[i];
 875 |         }
 876 | 
 877 |         delete[] d->init_lut->lut;
 878 |         delete d->init_lut;
 879 | 
 880 |         return set_error(clip, e.what());
 881 |     }
 882 |     catch (const char* e)
 883 |     {
 884 |         std::vector<EWAPixelCoeff*>* out = &d->out;
 885 |         for (int i = 0; i < static_cast<int>(d->out.size()); ++i)
 886 |         {
 887 |             delete_coeff_table((*out)[i]);
 888 |             delete (*out)[i];
 889 |         }
 890 | 
 891 |         delete[] d->init_lut->lut;
 892 |         delete d->init_lut;
 893 | 
 894 |         return set_error(clip, e);
 895 |     }
 896 | 
 897 |     const bool avx512 = (opt == 3);
 898 |     const bool avx2 = (!!(cpu_flags & AVS_CPUF_AVX2) && opt < 0) || opt == 2;
 899 |     const bool sse41 = (!!(cpu_flags & AVS_CPUF_SSE4_1) && opt < 0) || opt == 1;
 900 | 
 901 |     if (threads)
 902 |     {
 903 |         switch (avs_component_size(vi))
 904 |         {
 905 |             case 1:
 906 |                 if (avx512)
 907 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512<uint8_t, 1, 1> : &JincResize::resize_plane_avx512<uint8_t, 1, 0>;
 908 |                 else if (avx2)
 909 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2<uint8_t, 1, 1> : &JincResize::resize_plane_avx2<uint8_t, 1, 0>;
 910 |                 else if (sse41)
 911 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41<uint8_t, 1, 1> : &JincResize::resize_plane_sse41<uint8_t, 1, 0>;
 912 |                 else
 913 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_c<uint8_t, 1, 1> : &JincResize::resize_plane_c<uint8_t, 1, 0>;
 914 |                 break;
 915 |             case 2:
 916 |                 if (avx512)
 917 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512<uint16_t, 1, 1> : &JincResize::resize_plane_avx512<uint16_t, 1, 0>;
 918 |                 else if (avx2)
 919 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2<uint16_t, 1, 1> : &JincResize::resize_plane_avx2<uint16_t, 1, 0>;
 920 |                 else if (sse41)
 921 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41<uint16_t, 1, 1> : &JincResize::resize_plane_sse41<uint16_t, 1, 0>;
 922 |                 else
 923 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_c<uint16_t, 1, 1> : &JincResize::resize_plane_c<uint16_t, 1, 0>;
 924 |                 break;
 925 |             default:
 926 |                 if (avx512)
 927 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512<float, 1, 1> : &JincResize::resize_plane_avx512<float, 1, 0>;
 928 |                 else if (avx2)
 929 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2<float, 1, 1> : &JincResize::resize_plane_avx2<float, 1, 0>;
 930 |                 else if (sse41)
 931 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41<float, 1, 1> : &JincResize::resize_plane_sse41<float, 1, 0>;
 932 |                 else
 933 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_c<float, 1, 1> : &JincResize::resize_plane_c<float, 1, 0>;
 934 |                 break;
 935 |         }
 936 |     }
 937 |     else
 938 |     {
 939 |         switch (avs_component_size(vi))
 940 |         {
 941 |             case 1:
 942 |                 if (avx512)
 943 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512<uint8_t, 0, 1> : &JincResize::resize_plane_avx512<uint8_t, 0, 0>;
 944 |                 else if (avx2)
 945 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2<uint8_t, 0, 1> : &JincResize::resize_plane_avx2<uint8_t, 0, 0>;
 946 |                 else if (sse41)
 947 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41<uint8_t, 0, 1> : &JincResize::resize_plane_sse41<uint8_t, 0, 0>;
 948 |                 else
 949 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_c<uint8_t, 0, 1> : &JincResize::resize_plane_c<uint8_t, 0, 0>;
 950 |                 break;
 951 |             case 2:
 952 |                 if (avx512)
 953 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512<uint16_t, 0, 1> : &JincResize::resize_plane_avx512<uint16_t, 0, 0>;
 954 |                 else if (avx2)
 955 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2<uint16_t, 0, 1> : &JincResize::resize_plane_avx2<uint16_t, 0, 0>;
 956 |                 else if (sse41)
 957 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41<uint16_t, 0, 1> : &JincResize::resize_plane_sse41<uint16_t, 0, 0>;
 958 |                 else
 959 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_c<uint16_t, 0, 1> : &JincResize::resize_plane_c<uint16_t, 0, 0>;
 960 |                 break;
 961 |             default:
 962 |                 if (avx512)
 963 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx512<float, 0, 1> : &JincResize::resize_plane_avx512<float, 0, 0>;
 964 |                 else if (avx2)
 965 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_avx2<float, 0, 1> : &JincResize::resize_plane_avx2<float, 0, 0>;
 966 |                 else if (sse41)
 967 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_sse41<float, 0, 1> : &JincResize::resize_plane_sse41<float, 0, 0>;
 968 |                 else
 969 |                     d->process_frame = (subsampled) ? &JincResize::resize_plane_c<float, 0, 1> : &JincResize::resize_plane_c<float, 0, 0>;
 970 |                 break;
 971 |         }
 972 |     }
 973 | 
 974 |     AVS_Value v = avs_new_value_clip(clip);
 975 | 
 976 |     fi->user_data = reinterpret_cast<void*>(d);
 977 |     fi->get_frame = JincResize_GetFrame;
 978 |     fi->set_cache_hints = set_cache_hints_JincResize;
 979 |     fi->free_filter = free_JincResize;
 980 | 
 981 |     avs_release_clip(clip);
 982 | 
 983 |     return v;
 984 | }
 985 | 
 986 | class Arguments
 987 | {
 988 |     AVS_Value m_args[12];
 989 |     const char* m_arg_names[12];
 990 |     int m_idx;
 991 | 
 992 | public:
 993 |     Arguments() : m_args{}, m_arg_names{}, m_idx{} {}
 994 | 
 995 |     void add(AVS_Value arg, const char* arg_name = nullptr)
 996 |     {
 997 |         m_args[m_idx] = arg;
 998 |         m_arg_names[m_idx] = arg_name;
 999 |         ++m_idx;
1000 |     }
1001 | 
1002 |     AVS_Value args() { return avs_new_value_array(m_args, m_idx); };
1003 | 
1004 |     const char** arg_names() { return m_arg_names; };
1005 | };
1006 | 
1007 | static void resizer(const AVS_Value& args, Arguments* out_args, int src_left_idx = 3)
1008 | {
1009 |     out_args->add(avs_array_elt(args, 0));
1010 |     out_args->add(avs_array_elt(args, 1));
1011 |     out_args->add(avs_array_elt(args, 2));
1012 | 
1013 |     if (avs_defined(avs_array_elt(args, src_left_idx + 0)))
1014 |         out_args->add(avs_array_elt(args, src_left_idx + 0), "src_left");
1015 |     if (avs_defined(avs_array_elt(args, src_left_idx + 1)))
1016 |         out_args->add(avs_array_elt(args, src_left_idx + 1), "src_top");
1017 |     if (avs_defined(avs_array_elt(args, src_left_idx + 2)))
1018 |         out_args->add(avs_array_elt(args, src_left_idx + 2), "src_width");
1019 |     if (avs_defined(avs_array_elt(args, src_left_idx + 3)))
1020 |         out_args->add(avs_array_elt(args, src_left_idx + 3), "src_height");
1021 |     if (avs_defined(avs_array_elt(args, src_left_idx + 4)))
1022 |         out_args->add(avs_array_elt(args, src_left_idx + 4), "quant_x");
1023 |     if (avs_defined(avs_array_elt(args, src_left_idx + 5)))
1024 |         out_args->add(avs_array_elt(args, src_left_idx + 5), "quant_y");
1025 |     if (avs_defined(avs_array_elt(args, src_left_idx + 6)))
1026 |         out_args->add(avs_array_elt(args, src_left_idx + 6), "cplace");
1027 |     if (avs_defined(avs_array_elt(args, src_left_idx + 7)))
1028 |         out_args->add(avs_array_elt(args, src_left_idx + 7), "threads");
1029 | }
1030 | 
1031 | template <int taps>
1032 | static AVS_Value AVSC_CC resizer_jincresize(AVS_ScriptEnvironment* env, AVS_Value args, void* param)
1033 | {
1034 |     Arguments mapped_args;
1035 | 
1036 |     resizer(args, &mapped_args);
1037 |     mapped_args.add(avs_new_value_int(taps), "tap");
1038 | 
1039 |     return avs_invoke(env, "JincResize", mapped_args.args(), mapped_args.arg_names());
1040 | }
1041 | 
1042 | const char* AVSC_CC avisynth_c_plugin_init(AVS_ScriptEnvironment* env)
1043 | {
1044 |     avs_add_function(env, "JincResize",
1045 |         "c"
1046 |         "i"
1047 |         "i"
1048 |         "[src_left]f"
1049 |         "[src_top]f"
1050 |         "[src_width]f"
1051 |         "[src_height]f"
1052 |         "[quant_x]i"
1053 |         "[quant_y]i"
1054 |         "[tap]i"
1055 |         "[blur]f"
1056 |         "[cplace]s"
1057 |         "[threads]i"
1058 |         "[opt]i"
1059 |         "[initial_capacity]i"
1060 |         "[initial_factor]f", Create_JincResize, 0);
1061 |     avs_add_function(env, "Jinc36Resize",
1062 |         "c"
1063 |         "i"
1064 |         "i"
1065 |         "[src_left]f"
1066 |         "[src_top]f"
1067 |         "[src_width]f"
1068 |         "[src_height]f"
1069 |         "[quant_x]i"
1070 |         "[quant_y]i"
1071 |         "[cplace]s"
1072 |         "[threads]i", resizer_jincresize<3>, 0);
1073 |     avs_add_function(env, "Jinc64Resize",
1074 |         "c"
1075 |         "i"
1076 |         "i"
1077 |         "[src_left]f"
1078 |         "[src_top]f"
1079 |         "[src_width]f"
1080 |         "[src_height]f"
1081 |         "[quant_x]i"
1082 |         "[quant_y]i"
1083 |         "[cplace]s"
1084 |         "[threads]i", resizer_jincresize<4>, 0);
1085 |     avs_add_function(env, "Jinc144Resize",
1086 |         "c"
1087 |         "i"
1088 |         "i"
1089 |         "[src_left]f"
1090 |         "[src_top]f"
1091 |         "[src_width]f"
1092 |         "[src_height]f"
1093 |         "[quant_x]i"
1094 |         "[quant_y]i"
1095 |         "[cplace]s"
1096 |         "[threads]i", resizer_jincresize<6>, 0);
1097 |     avs_add_function(env, "Jinc256Resize",
1098 |         "c"
1099 |         "i"
1100 |         "i"
1101 |         "[src_left]f"
1102 |         "[src_top]f"
1103 |         "[src_width]f"
1104 |         "[src_height]f"
1105 |         "[quant_x]i"
1106 |         "[quant_y]i"
1107 |         "[cplace]s"
1108 |         "[threads]i", resizer_jincresize<8>, 0);
1109 | 
1110 |     return "JincResize";
1111 | }
1112 | 


--------------------------------------------------------------------------------
/src/JincResize.h:
--------------------------------------------------------------------------------
 1 | #ifndef __JINCRESIZE_H__
 2 | #define __JINCRESIZE_H__
 3 | 
 4 | #include <execution>
 5 | #include <string>
 6 | #include <vector>
 7 | 
 8 | #include "avisynth_c.h"
 9 | #include "avs/minmax.h"
10 | 
11 | struct EWAPixelCoeffMeta
12 | {
13 |     int start_x;
14 |     int start_y;
15 |     int coeff_meta;
16 | };
17 | 
18 | struct EWAPixelCoeff
19 | {
20 |     float* factor;
21 |     EWAPixelCoeffMeta* meta;
22 |     int* factor_map;
23 |     int filter_size;
24 |     int coeff_stride;
25 | };
26 | 
27 | class Lut
28 | {
29 |     int lut_size = 1024;
30 | 
31 | public:
32 |     Lut();
33 |     void InitLut(int lut_size, double radius, double blur);
34 |     float GetFactor(int index);
35 | 
36 |     double* lut;
37 | };
38 | 
39 | struct JincResize
40 | {
41 |     std::string cplace;
42 |     Lut* init_lut;
43 |     std::vector<EWAPixelCoeff*> out;
44 |     int planecount;
45 |     float peak;
46 | 
47 |     template<typename T, int thr, int subsampled>
48 |     void resize_plane_c(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
49 |     template <typename T, int thr, int subsampled>
50 |     void resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
51 |     template <typename T, int thr, int subsampled>
52 |     void resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
53 |     template <typename T, int thr, int subsampled>
54 |     void resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
55 | 
56 |     void(JincResize::* process_frame)(AVS_VideoFrame*, AVS_VideoFrame*, AVS_VideoInfo*);
57 | };
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/src/JincResize.rc:
--------------------------------------------------------------------------------
 1 | #include <winver.h>
 2 | 
 3 | 1 VERSIONINFO
 4 | FILEVERSION             2,1,4,0
 5 | PRODUCTVERSION        	2,1,4,0
 6 | FILEFLAGSMASK           VS_FFI_FILEFLAGSMASK
 7 | FILETYPE                VFT_DLL
 8 | BEGIN
 9 |     BLOCK "StringFileInfo"
10 |     BEGIN
11 |         BLOCK "040904E4"
12 |         BEGIN
13 |         VALUE "Comments",         "Jinc (EWA Lanczos) resampling filter."
14 |         VALUE "FileDescription",  "JincResize for AviSynth+"
15 |         VALUE "FileVersion",      "2.1.4"
16 |         VALUE "InternalName",     "JincResize"
17 |         VALUE "OriginalFilename", "JincResize.dll"
18 |         VALUE "ProductName",      "JincResize"
19 |         VALUE "ProductVersion",   "2.1.4"
20 |         END
21 |     END
22 |     BLOCK "VarFileInfo"
23 |     BEGIN
24 |         VALUE "Translation", 0x409, 1252
25 |     END
26 | END
27 | 


--------------------------------------------------------------------------------
/src/resize_plane_avx2.cpp:
--------------------------------------------------------------------------------
  1 | #include <immintrin.h>
  2 | 
  3 | #include "JincResize.h"
  4 | 
  5 | #if !defined(__AVX2__)
  6 | #error "AVX2 option needed"
  7 | #endif
  8 | 
  9 | template <typename T, int thr, int subsampled>
 10 | void JincResize::resize_plane_avx2(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi)
 11 | {
 12 |     const int planes_y[4] = { AVS_PLANAR_Y, AVS_PLANAR_U, AVS_PLANAR_V, AVS_PLANAR_A };
 13 |     const int planes_r[4] = { AVS_PLANAR_G, AVS_PLANAR_B, AVS_PLANAR_R, AVS_PLANAR_A };
 14 |     const int* current_planes = (avs_is_rgb(vi)) ? planes_r : planes_y;
 15 |     for (int i = 0; i < planecount; ++i)
 16 |     {
 17 |         const int plane = current_planes[i];
 18 | 
 19 |         const int src_stride = avs_get_pitch_p(src, plane) / sizeof(T);
 20 |         const int dst_stride = avs_get_pitch_p(dst, plane) / sizeof(T);
 21 |         const int dst_width = avs_get_row_size_p(dst, plane) / sizeof(T);
 22 |         const int dst_height = avs_get_height_p(dst, plane);
 23 |         const T* srcp = reinterpret_cast<const T*>(avs_get_read_ptr_p(src, plane));
 24 |         const __m256 min_val = (i && !avs_is_rgb(vi)) ? _mm256_set1_ps(-0.5f) : _mm256_setzero_ps();
 25 | 
 26 |         EWAPixelCoeff* out = [&]()
 27 |         {
 28 |             if constexpr (subsampled)
 29 |                 return (i) ? (i == 3) ? JincResize::out[0] : JincResize::out[1] : JincResize::out[0];
 30 |             else
 31 |                 return JincResize::out[0];
 32 |         }();
 33 | 
 34 |         auto loop = [&](int y)
 35 |         {
 36 |             T* __restrict dstp = reinterpret_cast<T*>(avs_get_write_ptr_p(dst, plane)) + static_cast<int64_t>(y) * dst_stride;
 37 | 
 38 |             for (int x = 0; x < dst_width; ++x)
 39 |             {
 40 |                 EWAPixelCoeffMeta* meta = out->meta + static_cast<int64_t>(y) * dst_width + x;
 41 |                 const T* src_ptr = srcp + (meta->start_y * static_cast<int64_t>(src_stride)) + meta->start_x;
 42 |                 const float* coeff_ptr = out->factor + meta->coeff_meta;
 43 |                 __m256 result = _mm256_setzero_ps();
 44 | 
 45 |                 if constexpr (std::is_same_v<T, uint8_t>)
 46 |                 {
 47 |                     for (int ly = 0; ly < out->filter_size; ++ly)
 48 |                     {
 49 |                         for (int lx = 0; lx < out->filter_size; lx += 8)
 50 |                         {
 51 |                             const __m256 src_ps = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_loadu_si128(const_cast<__m128i*>(reinterpret_cast<const __m128i*>(src_ptr + lx)))));
 52 |                             const __m256 coeff = _mm256_load_ps(coeff_ptr + lx);
 53 |                             result = _mm256_fmadd_ps(src_ps, coeff, result);
 54 |                         }
 55 | 
 56 |                         coeff_ptr += out->coeff_stride;
 57 |                         src_ptr += src_stride;
 58 |                     }
 59 | 
 60 |                     __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(result), _mm256_extractf128_ps(result, 1));
 61 |                     hsum = _mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum));
 62 |                     dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128()), _mm_setzero_si128()));
 63 |                 }
 64 |                 else if constexpr (std::is_same_v<T, uint16_t>)
 65 |                 {
 66 |                     for (int ly = 0; ly < out->filter_size; ++ly)
 67 |                     {
 68 |                         for (int lx = 0; lx < out->filter_size; lx += 8)
 69 |                         {
 70 |                             const __m256 src_ps = _mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(_mm_loadu_si128(const_cast<__m128i*>(reinterpret_cast<const __m128i*>(src_ptr + lx)))));
 71 |                             const __m256 coeff = _mm256_load_ps(coeff_ptr + lx);
 72 |                             result = _mm256_fmadd_ps(src_ps, coeff, result);
 73 |                         }
 74 | 
 75 |                         coeff_ptr += out->coeff_stride;
 76 |                         src_ptr += src_stride;
 77 |                     }
 78 | 
 79 |                     __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(result), _mm256_extractf128_ps(result, 1));
 80 |                     hsum = _mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum));
 81 |                     dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128()));
 82 |                 }
 83 |                 else
 84 |                 {
 85 |                     for (int ly = 0; ly < out->filter_size; ++ly)
 86 |                     {
 87 |                         for (int lx = 0; lx < out->filter_size; lx += 8)
 88 |                         {
 89 |                             const __m256 src_ps = _mm256_max_ps(_mm256_loadu_ps(src_ptr + lx), min_val);
 90 |                             const __m256 coeff = _mm256_load_ps(coeff_ptr + lx);
 91 |                             result = _mm256_fmadd_ps(src_ps, coeff, result);
 92 |                         }
 93 | 
 94 |                         coeff_ptr += out->coeff_stride;
 95 |                         src_ptr += src_stride;
 96 |                     }
 97 | 
 98 |                     __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(result), _mm256_extractf128_ps(result, 1));
 99 |                     dstp[x] = _mm_cvtss_f32(_mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum)));
100 |                 }
101 |             }
102 |         };
103 | 
104 |         if constexpr (thr)
105 |         {
106 |             for (intptr_t i = 0; i < dst_height; ++i)
107 |                 loop(i);
108 |         }
109 |         else
110 |         {
111 |             std::vector<int> l(dst_height);
112 |             std::iota(std::begin(l), std::end(l), 0);
113 |             std::for_each(std::execution::par, std::begin(l), std::end(l), loop);
114 |         }
115 |     }
116 | }
117 | 
118 | template void JincResize::resize_plane_avx2<uint8_t, 0, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
119 | template void JincResize::resize_plane_avx2<uint16_t, 0, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
120 | template void JincResize::resize_plane_avx2<float, 0, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
121 | 
122 | template void JincResize::resize_plane_avx2<uint8_t, 1, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
123 | template void JincResize::resize_plane_avx2<uint16_t, 1, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
124 | template void JincResize::resize_plane_avx2<float, 1, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
125 | 
126 | template void JincResize::resize_plane_avx2<uint8_t, 0, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
127 | template void JincResize::resize_plane_avx2<uint16_t, 0, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
128 | template void JincResize::resize_plane_avx2<float, 0, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
129 | 
130 | template void JincResize::resize_plane_avx2<uint8_t, 1, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
131 | template void JincResize::resize_plane_avx2<uint16_t, 1, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
132 | template void JincResize::resize_plane_avx2<float, 1, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
133 | 


--------------------------------------------------------------------------------
/src/resize_plane_avx512.cpp:
--------------------------------------------------------------------------------
  1 | #include <immintrin.h>
  2 | 
  3 | #include "JincResize.h"
  4 | 
  5 | #if !defined(__AVX512F__ ) && !defined(__INTEL_COMPILER)
  6 | #error "AVX512 option needed"
  7 | #endif
  8 | 
  9 | template <typename T, int thr, int subsampled>
 10 | void JincResize::resize_plane_avx512(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi)
 11 | {
 12 |     const int planes_y[4] = { AVS_PLANAR_Y, AVS_PLANAR_U, AVS_PLANAR_V, AVS_PLANAR_A };
 13 |     const int planes_r[4] = { AVS_PLANAR_G, AVS_PLANAR_B, AVS_PLANAR_R, AVS_PLANAR_A };
 14 |     const int* current_planes = (avs_is_rgb(vi)) ? planes_r : planes_y;
 15 |     for (int i = 0; i < planecount; ++i)
 16 |     {
 17 |         const int plane = current_planes[i];
 18 | 
 19 |         const int src_stride = avs_get_pitch_p(src, plane) / sizeof(T);
 20 |         const int dst_stride = avs_get_pitch_p(dst, plane) / sizeof(T);
 21 |         const int dst_width = avs_get_row_size_p(dst, plane) / sizeof(T);
 22 |         const int dst_height = avs_get_height_p(dst, plane);
 23 |         const T* srcp = reinterpret_cast<const T*>(avs_get_read_ptr_p(src, plane));
 24 |         const __m512 min_val = (i && !avs_is_rgb(vi)) ? _mm512_set1_ps(-0.5f) : _mm512_setzero_ps();
 25 | 
 26 |         EWAPixelCoeff* out = [&]()
 27 |         {
 28 |             if constexpr (subsampled)
 29 |                 return (i) ? (i == 3) ? JincResize::out[0] : JincResize::out[1] : JincResize::out[0];
 30 |             else
 31 |                 return JincResize::out[0];
 32 |         }();
 33 | 
 34 |         auto loop = [&](int y)
 35 |         {
 36 |             T* __restrict dstp = reinterpret_cast<T*>(avs_get_write_ptr_p(dst, plane)) + static_cast<int64_t>(y) * dst_stride;
 37 | 
 38 |             for (int x = 0; x < dst_width; ++x)
 39 |             {
 40 |                 EWAPixelCoeffMeta* meta = out->meta + static_cast<int64_t>(y) * dst_width + x;
 41 |                 const T* src_ptr = srcp + (meta->start_y * static_cast<int64_t>(src_stride)) + meta->start_x;
 42 |                 const float* coeff_ptr = out->factor + meta->coeff_meta;
 43 |                 __m512 result = _mm512_setzero_ps();
 44 | 
 45 |                 if constexpr (std::is_same_v<T, uint8_t>)
 46 |                 {
 47 |                     for (int ly = 0; ly < out->filter_size; ++ly)
 48 |                     {
 49 |                         for (int lx = 0; lx < out->filter_size; lx += 16)
 50 |                         {
 51 |                             const __m512 src_ps = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(src_ptr + lx))));
 52 |                             const __m512 coeff = _mm512_load_ps(coeff_ptr + lx);
 53 |                             result = _mm512_fmadd_ps(src_ps, coeff, result);
 54 |                         }
 55 | 
 56 |                         coeff_ptr += out->coeff_stride;
 57 |                         src_ptr += src_stride;
 58 |                     }
 59 | 
 60 |                     const __m256 lo_hi_256 = _mm256_add_ps(_mm512_castps512_ps256(result), _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(result), 1)));
 61 |                     __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(lo_hi_256), _mm256_extractf128_ps(lo_hi_256, 1));
 62 |                     hsum = _mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum));
 63 |                     dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128()), _mm_setzero_si128()));
 64 |                 }
 65 |                 else if constexpr (std::is_same_v<T, uint16_t>)
 66 |                 {
 67 |                     for (int ly = 0; ly < out->filter_size; ++ly)
 68 |                     {
 69 |                         for (int lx = 0; lx < out->filter_size; lx += 16)
 70 |                         {
 71 |                             const __m512 src_ps = _mm512_cvtepi32_ps(_mm512_cvtepu16_epi32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(src_ptr + lx))));
 72 |                             const __m512 coeff = _mm512_load_ps(coeff_ptr + lx);
 73 |                             result = _mm512_fmadd_ps(src_ps, coeff, result);
 74 |                         }
 75 | 
 76 |                         coeff_ptr += out->coeff_stride;
 77 |                         src_ptr += src_stride;
 78 |                     }
 79 | 
 80 |                     const __m256 lo_hi_256 = _mm256_add_ps(_mm512_castps512_ps256(result), _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(result), 1)));
 81 |                     __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(lo_hi_256), _mm256_extractf128_ps(lo_hi_256, 1));
 82 |                     hsum = _mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum));
 83 |                     dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128()));
 84 |                 }
 85 |                 else
 86 |                 {
 87 |                     for (int ly = 0; ly < out->filter_size; ++ly)
 88 |                     {
 89 |                         for (int lx = 0; lx < out->filter_size; lx += 16)
 90 |                         {
 91 |                             const __m512 src_ps = _mm512_max_ps(_mm512_loadu_ps(src_ptr + lx), min_val);
 92 |                             const __m512 coeff = _mm512_load_ps(coeff_ptr + lx);
 93 |                             result = _mm512_fmadd_ps(src_ps, coeff, result);
 94 |                         }
 95 | 
 96 |                         coeff_ptr += out->coeff_stride;
 97 |                         src_ptr += src_stride;
 98 |                     }
 99 | 
100 |                     const __m256 lo_hi_256 = _mm256_add_ps(_mm512_castps512_ps256(result), _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(result), 1)));
101 |                     __m128 hsum = _mm_add_ps(_mm256_castps256_ps128(lo_hi_256), _mm256_extractf128_ps(lo_hi_256, 1));
102 |                     dstp[x] = _mm_cvtss_f32(_mm_hadd_ps(_mm_hadd_ps(hsum, hsum), _mm_hadd_ps(hsum, hsum)));
103 |                 }
104 |             }
105 |         };
106 | 
107 |         if constexpr (thr)
108 |         {
109 |             for (intptr_t i = 0; i < dst_height; ++i)
110 |                 loop(i);
111 |         }
112 |         else
113 |         {
114 |             std::vector<int> l(dst_height);
115 |             std::iota(std::begin(l), std::end(l), 0);
116 |             std::for_each(std::execution::par, std::begin(l), std::end(l), loop);
117 |         }
118 |     }
119 | }
120 | 
121 | template void JincResize::resize_plane_avx512<uint8_t, 0, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
122 | template void JincResize::resize_plane_avx512<uint16_t, 0, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
123 | template void JincResize::resize_plane_avx512<float, 0, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
124 | 
125 | template void JincResize::resize_plane_avx512<uint8_t, 1, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
126 | template void JincResize::resize_plane_avx512<uint16_t, 1, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
127 | template void JincResize::resize_plane_avx512<float, 1, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
128 | 
129 | template void JincResize::resize_plane_avx512<uint8_t, 0, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
130 | template void JincResize::resize_plane_avx512<uint16_t, 0, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
131 | template void JincResize::resize_plane_avx512<float, 0, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
132 | 
133 | template void JincResize::resize_plane_avx512<uint8_t, 1, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
134 | template void JincResize::resize_plane_avx512<uint16_t, 1, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
135 | template void JincResize::resize_plane_avx512<float, 1, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
136 | 


--------------------------------------------------------------------------------
/src/resize_plane_sse41.cpp:
--------------------------------------------------------------------------------
  1 | #include <smmintrin.h>
  2 | 
  3 | #include "JincResize.h"
  4 | 
  5 | template <typename T, int thr, int subsampled>
  6 | void JincResize::resize_plane_sse41(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi)
  7 | {
  8 |     const int planes_y[4] = { AVS_PLANAR_Y, AVS_PLANAR_U, AVS_PLANAR_V, AVS_PLANAR_A };
  9 |     const int planes_r[4] = { AVS_PLANAR_G, AVS_PLANAR_B, AVS_PLANAR_R, AVS_PLANAR_A };
 10 |     const int* current_planes = (avs_is_rgb(vi)) ? planes_r : planes_y;
 11 |     for (int i = 0; i < planecount; ++i)
 12 |     {
 13 |         const int plane = current_planes[i];
 14 | 
 15 |         const int src_stride = avs_get_pitch_p(src, plane) / sizeof(T);
 16 |         const int dst_stride = avs_get_pitch_p(dst, plane) / sizeof(T);
 17 |         const int dst_width = avs_get_row_size_p(dst, plane) / sizeof(T);
 18 |         const int dst_height = avs_get_height_p(dst, plane);
 19 |         const T* srcp = reinterpret_cast<const T*>(avs_get_read_ptr_p(src, plane));
 20 |         const __m128 min_val = (i && !avs_is_rgb(vi)) ? _mm_set_ps1(-0.5f) : _mm_setzero_ps();
 21 | 
 22 |         EWAPixelCoeff* out = [&]()
 23 |         {
 24 |             if constexpr (subsampled)
 25 |                 return (i) ? (i == 3) ? JincResize::out[0] : JincResize::out[1] : JincResize::out[0];
 26 |             else
 27 |                 return JincResize::out[0];
 28 |         }();
 29 | 
 30 |         auto loop = [&](int y)
 31 |         {
 32 |             T* __restrict dstp = reinterpret_cast<T*>(avs_get_write_ptr_p(dst, plane)) + static_cast<int64_t>(y) * dst_stride;
 33 | 
 34 |             for (int x = 0; x < dst_width; ++x)
 35 |             {
 36 |                 EWAPixelCoeffMeta* meta = out->meta + static_cast<int64_t>(y) * dst_width + x;
 37 |                 const T* src_ptr = srcp + (meta->start_y * static_cast<int64_t>(src_stride)) + meta->start_x;
 38 |                 const float* coeff_ptr = out->factor + meta->coeff_meta;
 39 |                 __m128 result = _mm_setzero_ps();
 40 | 
 41 |                 if constexpr (std::is_same_v<T, uint8_t>)
 42 |                 {
 43 |                     for (int ly = 0; ly < out->filter_size; ++ly)
 44 |                     {
 45 |                         for (int lx = 0; lx < out->filter_size; lx += 4)
 46 |                         {
 47 |                             const __m128 src_ps = _mm_cvtepi32_ps(_mm_cvtepu8_epi32(_mm_cvtsi32_si128(*(reinterpret_cast<const int32_t*>(src_ptr + lx)))));
 48 |                             const __m128 coeff = _mm_load_ps(coeff_ptr + lx);
 49 |                             result = _mm_add_ps(result, _mm_mul_ps(src_ps, coeff));
 50 |                         }
 51 | 
 52 |                         coeff_ptr += out->coeff_stride;
 53 |                         src_ptr += src_stride;
 54 |                     }
 55 | 
 56 |                     const __m128 hsum = _mm_hadd_ps(_mm_hadd_ps(result, result), _mm_hadd_ps(result, result));
 57 |                     dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128()), _mm_setzero_si128()));
 58 |                 }
 59 |                 else if constexpr (std::is_same_v<T, uint16_t>)
 60 |                 {
 61 |                     for (int ly = 0; ly < out->filter_size; ++ly)
 62 |                     {
 63 |                         for (int lx = 0; lx < out->filter_size; lx += 4)
 64 |                         {
 65 |                             const __m128 src_ps = _mm_cvtepi32_ps(_mm_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(src_ptr + lx))));
 66 |                             const __m128 coeff = _mm_load_ps(coeff_ptr + lx);
 67 |                             result = _mm_add_ps(result, _mm_mul_ps(src_ps, coeff));
 68 |                         }
 69 | 
 70 |                         coeff_ptr += out->coeff_stride;
 71 |                         src_ptr += src_stride;
 72 |                     }
 73 | 
 74 |                     const __m128 hsum = _mm_hadd_ps(_mm_hadd_ps(result, result), _mm_hadd_ps(result, result));
 75 |                     dstp[x] = _mm_cvtsi128_si32(_mm_packus_epi32(_mm_cvtps_epi32(hsum), _mm_setzero_si128()));
 76 |                 }
 77 |                 else
 78 |                 {
 79 |                     for (int ly = 0; ly < out->filter_size; ++ly)
 80 |                     {
 81 |                         for (int lx = 0; lx < out->filter_size; lx += 4)
 82 |                         {
 83 |                             const __m128 src_ps = _mm_max_ps(_mm_loadu_ps(src_ptr + lx), min_val);
 84 |                             const __m128 coeff = _mm_load_ps(coeff_ptr + lx);
 85 |                             result = _mm_add_ps(result, _mm_mul_ps(src_ps, coeff));
 86 |                         }
 87 | 
 88 |                         coeff_ptr += out->coeff_stride;
 89 |                         src_ptr += src_stride;
 90 |                     }
 91 | 
 92 |                     dstp[x] = _mm_cvtss_f32(_mm_hadd_ps(_mm_hadd_ps(result, result), _mm_hadd_ps(result, result)));
 93 |                 }
 94 |             }
 95 |         };
 96 | 
 97 |         if constexpr (thr)
 98 |         {
 99 |             for (intptr_t i = 0; i < dst_height; ++i)
100 |                 loop(i);
101 |         }
102 |         else
103 |         {
104 |             std::vector<int> l(dst_height);
105 |             std::iota(std::begin(l), std::end(l), 0);
106 |             std::for_each(std::execution::par, std::begin(l), std::end(l), loop);
107 |         }
108 |     }
109 | }
110 | 
111 | template void JincResize::resize_plane_sse41<uint8_t, 0, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
112 | template void JincResize::resize_plane_sse41<uint16_t, 0, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
113 | template void JincResize::resize_plane_sse41<float, 0, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
114 | 
115 | template void JincResize::resize_plane_sse41<uint8_t, 1, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
116 | template void JincResize::resize_plane_sse41<uint16_t, 1, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
117 | template void JincResize::resize_plane_sse41<float, 1, 1>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
118 | 
119 | template void JincResize::resize_plane_sse41<uint8_t, 0, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
120 | template void JincResize::resize_plane_sse41<uint16_t, 0, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
121 | template void JincResize::resize_plane_sse41<float, 0, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
122 | 
123 | template void JincResize::resize_plane_sse41<uint8_t, 1, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
124 | template void JincResize::resize_plane_sse41<uint16_t, 1, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
125 | template void JincResize::resize_plane_sse41<float, 1, 0>(AVS_VideoFrame* src, AVS_VideoFrame* dst, AVS_VideoInfo* vi);
126 | 


--------------------------------------------------------------------------------