├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── TComb - ReadMe.txt
├── TComb
├── CMakeLists.txt
├── Files.cmake
├── PlanarFrame.cpp
├── PlanarFrame.h
├── TComb.cpp
├── TComb.h
├── TComb.rc
├── TComb.sln
├── TComb.vcproj
├── TComb.vcxproj
├── TComb.vcxproj.filters
├── TComb_asm.asm
├── TComb_asm_x64.asm
├── TComb_core.cpp
├── avisynth.h
├── avs
│ ├── alignment.h
│ ├── capi.h
│ ├── config.h
│ ├── cpuid.h
│ ├── filesystem.h
│ ├── minmax.h
│ ├── posix.h
│ ├── types.h
│ └── win.h
├── common.h
└── resource.h
└── cmake_uninstall.cmake.in
/.gitignore:
--------------------------------------------------------------------------------
1 | CMakeCache.txt
2 | CMakeFiles/*
3 |
4 | #cmake generated files
5 | cmake_install.cmake
6 | cmake_uninstall.cmake
7 | generate.stamp
8 | generate.stamp.depend
9 | makefile
10 |
11 | #make
12 | install_manifest.txt
13 |
14 | ## Ignore Visual Studio temporary files, build results, and
15 | ## files generated by popular Visual Studio add-ons.
16 |
17 | # User-specific files
18 | *.suo
19 | *.user
20 | *.userosscache
21 | *.sln.docstates
22 |
23 | # User-specific files (MonoDevelop/Xamarin Studio)
24 | *.userprefs
25 |
26 | # Build results
27 | [Dd]ebug/
28 | [Dd]ebugPublic/
29 | [Rr]elease/
30 | [Rr]eleases/
31 | x64/
32 | x86/
33 | build/
34 | bld/
35 | [Bb]in/
36 | [Oo]bj/
37 |
38 | # Visual Studo 2015 cache/options directory
39 | .vs/
40 |
41 | # MSTest test Results
42 | [Tt]est[Rr]esult*/
43 | [Bb]uild[Ll]og.*
44 |
45 | # NUNIT
46 | *.VisualState.xml
47 | TestResult.xml
48 |
49 | # Build Results of an ATL Project
50 | [Dd]ebugPS/
51 | [Rr]eleasePS/
52 | dlldata.c
53 |
54 | *_i.c
55 | *_p.c
56 | *_i.h
57 | *.ilk
58 | *.meta
59 | *.obj
60 | *.pch
61 | *.pdb
62 | *.pgc
63 | *.pgd
64 | *.rsp
65 | *.sbr
66 | *.tlb
67 | *.tli
68 | *.tlh
69 | *.tmp
70 | *.tmp_proj
71 | *.log
72 | *.vspscc
73 | *.vssscc
74 | .builds
75 | *.pidb
76 | *.svclog
77 | *.scc
78 |
79 | # Chutzpah Test files
80 | _Chutzpah*
81 |
82 | # Visual C++ cache files
83 | ipch/
84 | *.aps
85 | *.ncb
86 | *.opensdf
87 | *.sdf
88 | *.cachefile
89 |
90 | # Visual Studio profiler
91 | *.psess
92 | *.vsp
93 | *.vspx
94 |
95 | # TFS 2012 Local Workspace
96 | $tf/
97 |
98 | # Guidance Automation Toolkit
99 | *.gpState
100 |
101 | # ReSharper is a .NET coding add-in
102 | _ReSharper*/
103 | *.[Rr]e[Ss]harper
104 | *.DotSettings.user
105 |
106 | # JustCode is a .NET coding addin-in
107 | .JustCode
108 |
109 | # TeamCity is a build add-in
110 | _TeamCity*
111 |
112 | # DotCover is a Code Coverage Tool
113 | *.dotCover
114 |
115 | # NCrunch
116 | _NCrunch_*
117 | .*crunch*.local.xml
118 |
119 | # MightyMoose
120 | *.mm.*
121 | AutoTest.Net/
122 |
123 | # Web workbench (sass)
124 | .sass-cache/
125 |
126 | # Installshield output folder
127 | [Ee]xpress/
128 |
129 | # DocProject is a documentation generator add-in
130 | DocProject/buildhelp/
131 | DocProject/Help/*.HxT
132 | DocProject/Help/*.HxC
133 | DocProject/Help/*.hhc
134 | DocProject/Help/*.hhk
135 | DocProject/Help/*.hhp
136 | DocProject/Help/Html2
137 | DocProject/Help/html
138 |
139 | # Click-Once directory
140 | publish/
141 |
142 | # Publish Web Output
143 | *.[Pp]ublish.xml
144 | *.azurePubxml
145 | # TODO: Comment the next line if you want to checkin your web deploy settings
146 | # but database connection strings (with potential passwords) will be unencrypted
147 | *.pubxml
148 | *.publishproj
149 |
150 | # NuGet Packages
151 | *.nupkg
152 | # The packages folder can be ignored because of Package Restore
153 | **/packages/*
154 | # except build/, which is used as an MSBuild target.
155 | !**/packages/build/
156 | # Uncomment if necessary however generally it will be regenerated when needed
157 | #!**/packages/repositories.config
158 |
159 | # Windows Azure Build Output
160 | csx/
161 | *.build.csdef
162 |
163 | # Windows Store app package directory
164 | AppPackages/
165 |
166 | # Others
167 | *.[Cc]ache
168 | ClientBin/
169 | [Ss]tyle[Cc]op.*
170 | ~$*
171 | *~
172 | *.dbmdl
173 | *.dbproj.schemaview
174 | *.pfx
175 | *.publishsettings
176 | node_modules/
177 | bower_components/
178 |
179 | # RIA/Silverlight projects
180 | Generated_Code/
181 |
182 | # Backup & report files from converting an old project file
183 | # to a newer Visual Studio version. Backup files are not needed,
184 | # because we have git ;-)
185 | _UpgradeReport_Files/
186 | Backup*/
187 | UpgradeLog*.XML
188 | UpgradeLog*.htm
189 |
190 | # SQL Server files
191 | *.mdf
192 | *.ldf
193 |
194 | # Business Intelligence projects
195 | *.rdl.data
196 | *.bim.layout
197 | *.bim_*.settings
198 |
199 | # Microsoft Fakes
200 | FakesAssemblies/
201 |
202 | # Node.js Tools for Visual Studio
203 | .ntvs_analysis.dat
204 |
205 | # Visual Studio 6 build log
206 | *.plg
207 |
208 | # Visual Studio 6 workspace options file
209 | *.opt
210 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # We need CMake 3.8 at least, because we require
2 | # CMAKE_CXX_STANDARD to be set to C++17.
3 | # Visual Studio 2019 is supported from CMake 3.14.1
4 | # Possible generators:
5 | # "MinGW Makefiles": MSYS2/Mingw32 GCC 8.3 build
6 | # "Visual Studio 15 2017" optional platform generator Win32 and x64
7 |
8 | # "Visual Studio 16 2019" optional platform generator Win32 and x64
9 | # "Visual Studio 16 2019" + LLVM 8.0 (clang) optional platform generator Win32 and x64
10 | CMAKE_MINIMUM_REQUIRED( VERSION 3.8.2 )
11 |
12 | project("TComb" LANGUAGES CXX)
13 | include(GNUInstallDirs)
14 |
15 | # Avoid uselessly linking to unused libraries
16 | set(CMAKE_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
17 | set(CMAKE_C_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
18 | set(CMAKE_CXX_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
19 |
20 | # We require C++17 or higher.
21 | set(CMAKE_CXX_STANDARD 17)
22 | set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
23 | set(CMAKE_CXX_EXTENSIONS FALSE)
24 |
25 | # Detect Intel processors and turn Intel SIMD on or off automatically.
26 | message("-- Detected target processor as: ${CMAKE_SYSTEM_PROCESSOR}")
27 | string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" ARCHID)
28 | if( ("${ARCHID}" STREQUAL "x86") OR
29 | ("${ARCHID}" STREQUAL "x64") OR
30 | ("${ARCHID}" STREQUAL "i686") OR
31 | ("${ARCHID}" STREQUAL "amd64") OR
32 | ("${ARCHID}" STREQUAL "x86_64") )
33 | set(INTEL_SIMD "ON")
34 | else()
35 | set(INTEL_SIMD "OFF")
36 | endif()
37 |
38 | option(ENABLE_INTEL_SIMD "Enable SIMD intrinsics for Intel processors" "${INTEL_SIMD}")
39 |
40 | if(CMAKE_CONFIGURATION_TYPES)
41 | set(CMAKE_CONFIGURATION_TYPES Debug Release RelWithDebInfo)
42 | set(CMAKE_CONFIGURATION_TYPES "${CMAKE_CONFIGURATION_TYPES}" CACHE STRING "Reset the configurations to what we need" FORCE)
43 | endif()
44 |
45 | IF( MSVC ) # Check for Visual Studio
46 | # We do not allow creating Visual Studio solutions, existing .sln file contains
47 | # all x86/x64 versions of MSVC and LLVM builds.
48 | MESSAGE(FATAL_ERROR "Please use the existing sln file both for MS VC and also for LLVM toolset in VS")
49 | # anyway we keep all things below
50 | # ** not tested **
51 |
52 |
53 |
54 |
55 | #1910-1919 = VS 15.0 (v141 toolset) Visual Studio 2017
56 | #1920 = VS 16.0 (v142 toolset) Visual Studio 2019
57 |
58 | IF( MSVC_VERSION VERSION_LESS 1910 )
59 | MESSAGE(FATAL_ERROR "Visual C++ 2017 or newer required.")
60 | ENDIF()
61 |
62 | IF(MSVC_IDE)
63 | message("Reported CMAKE_GENERATOR_TOOLSET is: ${CMAKE_GENERATOR_TOOLSET}")
64 |
65 | # For LLVM Clang installed separately, specify llvm or LLVM
66 | # Since Visual Studio 2019 v16.4, LLVM 9.0 is integrated, for this use Toolset: ClangCL
67 | IF(CMAKE_GENERATOR_TOOLSET STREQUAL "LLVM" OR CMAKE_GENERATOR_TOOLSET STREQUAL "llvm" OR CMAKE_GENERATOR_TOOLSET STREQUAL "ClangCL")
68 | if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") # hope: always
69 | message("LLVM toolset was specified via -T. Compiler ID is: ${CMAKE_CXX_COMPILER_ID}; CMAKE_CXX_COMPILER_VERSION is: ${CMAKE_CXX_COMPILER_VERSION}")
70 | # Clang; 9.0.0
71 | # These are probably not supported when clang is downloaded as a ready-made binary: CLANG_VERSION_MAJOR CLANG_VERSION_MINOR CLANG_VERSION_STRING
72 | # string (REGEX REPLACE ".*clang version ([0-9]+\\.[0-9]+).*" "\\1" CLANG_VERSION_STRING ${clang_full_version_string})
73 | if( CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.1 )
74 | MESSAGE(FATAL_ERROR "Clang 7.0.1 or newer required") # as of 2019.december actually we are using 9.0
75 | endif()
76 | endif()
77 | set(CLANG_IN_VS "1")
78 | ELSEIF(CMAKE_GENERATOR_TOOLSET STREQUAL "v141_clang_c2")
79 | #1900 is reported
80 | message("v141_clang_c2 toolset was specified via -T. Reported MSVC_VERSION is: ${MSVC_VERSION}")
81 | message("May not work, try LLVM")
82 | set(CLANG_IN_VS "1")
83 | ENDIF()
84 |
85 | option(WINXP_SUPPORT "Make binaries compatible with Windows XP and Vista" OFF)
86 | if(WINXP_SUPPORT)
87 | # We want our project to also run on Windows XP
88 | # Not for LLVM: Clang stopped XP support in 2016
89 | # 1900 (VS2015) is not supported but we leave here
90 | IF(MSVC_VERSION VERSION_LESS 1910 )
91 | IF(NOT CLANG_IN_VS STREQUAL "1")
92 | set(CMAKE_GENERATOR_TOOLSET "v140_xp" CACHE STRING "The compiler toolset to use for Visual Studio." FORCE) # VS2015
93 | # https://connect.microsoft.com/VisualStudio/feedback/details/1789709/visual-c-2015-runtime-broken-on-windows-server-2003-c-11-magic-statics
94 | message("CMAKE_GENERATOR_TOOLSET is forced to: ${CMAKE_GENERATOR_TOOLSET}")
95 | add_definitions("/Zc:threadSafeInit-")
96 | ENDIF()
97 | ELSE()
98 | IF(NOT CLANG_IN_VS STREQUAL "1")
99 | set(CMAKE_GENERATOR_TOOLSET "v141_xp" CACHE STRING "The compiler toolset to use for Visual Studio." FORCE) # VS2017, also choosable for VS2019
100 | # https://connect.microsoft.com/VisualStudio/feedback/details/1789709/visual-c-2015-runtime-broken-on-windows-server-2003-c-11-magic-statics
101 | message("CMAKE_GENERATOR_TOOLSET is forced to: ${CMAKE_GENERATOR_TOOLSET}")
102 | add_definitions("/Zc:threadSafeInit-")
103 | ENDIF()
104 | ENDIF()
105 | endif()
106 | ENDIF()
107 |
108 | IF(CLANG_IN_VS STREQUAL "1")
109 | #these are unknown
110 | #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexceptions")
111 | #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
112 | STRING( REPLACE "/EHsc" "/EHa" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
113 | STRING( REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
114 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-inconsistent-missing-override")
115 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-inconsistent-missing-override")
116 | ELSE()
117 | # Enable C++ with SEH exceptions
118 | # Avoid an obnoxious 'overrriding /EHsc with /EHa' warning when
119 | # using something other than MSBuild
120 | STRING( REPLACE "/EHsc" "/EHa" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
121 | STRING( REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
122 | ENDIF()
123 | # Prevent VC++ from complaining about not using MS-specific functions
124 | add_definitions("/D _CRT_SECURE_NO_WARNINGS /D _SECURE_SCL=0")
125 |
126 | # Enable CRT heap debugging - only effective in debug builds
127 | add_definitions("/D _CRTDBG_MAP_ALLOC")
128 |
129 | # Set additional optimization flags
130 | set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Oy /Ot /GS- /Oi")
131 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oy /Ot /GS- /Oi")
132 |
133 | # CPU_ARCH can be overridden with the corresponding values when using MSVC:
134 | # IA32 (disabled),
135 | # SSE (Pentium III and higher, 1999),
136 | # SSE2 (Pentium 4 and higher, 2000/2001),
137 | # AVX (Sandy Bridge and higher, 2011),
138 | # AVX2 (Haswell and higher, 2013)
139 | set(MSVC_CPU_ARCH "SSE2" CACHE STRING "Set MSVC architecture optimization level (default: SSE2)")
140 |
141 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:${MSVC_CPU_ARCH}")
142 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:${MSVC_CPU_ARCH}")
143 |
144 | if(CMAKE_SIZEOF_VOID_P EQUAL 8)
145 | # MSVC doesn't allow 64-bit builds to have their /arch set to SSE2 (no-op) or below
146 | if("${MSVC_CPU_ARCH}" MATCHES "(IA32|SSE|SSE2)")
147 | set(DELETE_THIS "/arch:${MSVC_CPU_ARCH}")
148 | message("MSVC doesn't allow x86-64 builds to define /arch:${MSVC_CPU_ARCH}. Setting will be ignored.")
149 | STRING( REPLACE "${DELETE_THIS}" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
150 | STRING( REPLACE "${DELETE_THIS}" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
151 | endif()
152 | endif()
153 |
154 | IF(CLANG_IN_VS STREQUAL "1")
155 | # suppress other frequent but harmless/unavoidable warnings
156 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-function")
157 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function")
158 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-reorder")
159 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-reorder")
160 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-value")
161 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-value")
162 | # allow per-function attributes like __attribute__((__target__("sse4.1")))
163 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-gcc-compat")
164 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-gcc-compat")
165 | ENDIF()
166 |
167 | # Set C++17 flag
168 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /std:c++17")
169 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++17")
170 |
171 | # Enable standards-conformance mode for MSVC compilers that support this
172 | # flag (Visual C++ 2017 and later).
173 | if (NOT (MSVC_VERSION LESS 1910))
174 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /permissive-")
175 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /permissive-")
176 | endif()
177 |
178 | if(ENABLE_INTEL_SIMD)
179 | add_definitions("/D INTEL_INTRINSICS")
180 | endif()
181 |
182 | ELSE()
183 |
184 | if(ENABLE_INTEL_SIMD)
185 | SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2 -DINTEL_INTRINSICS" )
186 | endif()
187 |
188 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
189 | SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-security" )
190 | endif()
191 |
192 | IF(WIN32)
193 | SET( CMAKE_SHARED_LINKER_FLAGS "-Wl,--enable-stdcall-fixup" )
194 | ELSE()
195 | if(APPLE)
196 | # macOS uses Clang's linker, doesn't like --no-undefined
197 | SET( CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-undefined,error" )
198 | else()
199 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
200 | # make sure there are no undefined symbols
201 | SET( CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined" )
202 | endif()
203 | endif()
204 | ENDIF()
205 | ENDIF()
206 |
207 | IF(ENABLE_INTEL_SIMD)
208 | message("Intel SIMD enabled")
209 | ELSE()
210 | message("Intel SIMD disabled")
211 | ENDIF()
212 |
213 | add_subdirectory("TComb")
214 |
215 | # uninstall target
216 | configure_file(
217 | "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in"
218 | "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake"
219 | IMMEDIATE @ONLY)
220 |
221 | add_custom_target(uninstall
222 | COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
223 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
341 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TComb
2 |
3 | This is an update to tritical's TComb v2.0 Beta 2 moving it from beta to release as it encompasses all the changes in tritical's To-Do-List.
4 |
5 | ### Requirements
6 |
7 | This filter requires AviSynth 2.6.0 or AviSynth+ as well as the Visual C++ Redistributable Package for Visual Studio 2015-19.
8 |
9 | ### Syntax and Parameters
10 |
11 | The syntax and parameters are identical to the original TComb with the exception of the "opt" parameter. To see a list refer to this [link](http://avisynth.nl/index.php/TComb).
12 |
13 | ### Changes
14 |
15 | In 2015 Elegant made many changes when updating TComb in order to improve speed (see full changelog for more details):
16 |
17 | * Removed buffering of frames/info that weren't actually used
18 | * Switched to AVS 2.6 API
19 | * Added x64 support which also utilizes SSE2
20 | * Restructured debug and error messages
21 | * Removed MMX/ISSE support
22 | * Removed/changed "opt" parameter
23 |
24 | In 2021 came a general bugfix release by pinterf.
25 | Added linux port, the missing 8 bit Y and YUV formats, external assembler was rewritten in SIMD intrinsics.
26 |
27 | ### Programmer Notes
28 |
29 | This program was compiled using Visual Studio 2019 and falls under the GNU General Public License.
30 |
31 | I (Elegant) would like to thank jpsdr and dubhater for their work on nnedi3 and the VapourSynth version of TComb (respectively). Their work led to the port of this project.
32 | I'd also like to thank the masm32 community who were very helpful as I explored assembly.
33 |
34 | Build instructions
35 | ==================
36 | VS2019:
37 | use IDE
38 |
39 | Windows GCC (mingw installed by msys2):
40 | from the 'build' folder under project root:
41 |
42 | del ..\CMakeCache.txt
43 | cmake .. -G "MinGW Makefiles" -DENABLE_INTEL_SIMD:bool=on
44 | @rem test: cmake .. -G "MinGW Makefiles" -DENABLE_INTEL_SIMD:bool=off
45 | cmake --build . --config Release
46 |
47 | Linux
48 | note: ENABLE_INTEL_SIMD is automatically off for non x86 arhitectures
49 |
50 | * Clone repo and build
51 |
52 | git clone https://github.com/pinterf/TComb
53 | cd TComb
54 | cmake -B build -S .
55 | cmake --build build
56 |
57 | Useful hints:
58 | build after clean:
59 |
60 | cmake --build build --clean-first
61 |
62 | Force no asm support
63 |
64 | cmake -B build -S . -DENABLE_INTEL_SIMD:bool=off
65 |
66 | delete cmake cache
67 |
68 | rm build/CMakeCache.txt
69 |
70 | * Find binaries at
71 |
72 | build/TComb/libtcomb.so
73 |
74 | * Install binaries
75 |
76 | cd build
77 | sudo make install
78 |
79 |
--------------------------------------------------------------------------------
/TComb - ReadMe.txt:
--------------------------------------------------------------------------------
1 | |
2 | TComb for AviSynth |
3 | v2.3 (24 February 2021) |
4 | by tritical |
5 | modified by Elegant (v2.0; 17 July 2015) |
6 | additional work by pinterf |
7 | |
8 | HELP FILE |
9 | -------------------------------------------------------------------------------------------------------
10 | -------------------------------------------------------------------------------------------------------
11 |
12 |
13 | INFO:
14 |
15 |
16 | TComb is a temporal comb filter (it reduces cross-luminance (rainbowing) and cross-chrominance
17 | (dot crawl) artifacts in static areas of the picture). It will ONLY work with NTSC material, and
18 | WILL NOT work with telecined material where the rainbowing/dotcrawl was introduced prior to the
19 | telecine process! It must be used before ivtc or deinterlace in order to work. In terms of what
20 | it does it is similar to guavacomb/dedot.
21 |
22 | TComb currently supports Y8, YV12, YV16, YV24, YV411 and YUY2 colorspaces.
23 |
24 | TComb does support seeking... that is, jumping to a random frame will produce the same result
25 | as if you had linearly run up to that frame. For dot crawl removal tcomb requires at least 3
26 | static fields of the same parity and for rainbow removal tcomb requires at least 5 static fields
27 | of the same parity.
28 |
29 |
30 | Syntax =>
31 |
32 | TComb(int mode, int fthreshL, int fthreshC, int othreshL, int othreshC, bool map,
33 | double scthresh, bool debug, int opt)
34 |
35 |
36 |
37 | PARAMETERS:
38 |
39 |
40 | mode - (limit processing to luma or chroma only)
41 |
42 | Controls whether both luma/chroma are processed or only one or the other. Possible settings:
43 |
44 | 0 - process luma only (dot crawl removal)
45 | 1 - process chroma only (rainbow removal)
46 | 2 - process both
47 |
48 | For greyscale clips mode=0 is used regardless the settings
49 |
50 | default: 2 (int)
51 |
52 |
53 | fthreshL/fthreshC - (filtered pixel correlation thresholds)
54 |
55 | One of the things TComb checks for is correlation between filtered values over the length
56 | of the filtering window. If all values differ by less than fthreshL (for luma) or fthreshC
57 | (for chroma) then the filtered values are considered to be correlated. Larger values will
58 | allow more filtering (will be more effective at removing rainbowing/dot crawl), but will also
59 | create more artifacts. Smaller values will produce less artifacts, but will be less effective
60 | in removing rainbowing/dot crawl. A good range of values is between 4 and 7.
61 |
62 | default: fthreshL -> 4 (int)
63 | fthreshC -> 5
64 |
65 |
66 | othreshL/othreshC - (original pixel correlation thresholds)
67 |
68 | One of the things TComb checks for is correlation between original pixel values from every
69 | other field of the same parity. Due to the oscillation period, these values should be equal
70 | or very similar in static areas containing dot crawl or rainbowing. If the pixel values
71 | differ by less than othreshL (for luma) or othreshC (for chroma) then the pixels are considered
72 | to be correlated. Larger values will allow more filtering (will be more effective at removing
73 | rainbowing/dotcrawl), but will also create more artifacts. Smaller values will produce less
74 | artifacts, but will be less effective in removing rainbowing/dotcrawl. A good range of values
75 | is between 4 and 8.
76 |
77 | default: othreshL -> 5 (int)
78 | othreshC -> 6
79 |
80 |
81 | map -
82 |
83 | Identifies pixels that are being replaced with filtered values. Each pixel in the output
84 | frame will have one of the following values indicating how it is being filtered:
85 |
86 | 0 - not being filtered
87 | 85 - [1 2 1] average of (n,n+1,n+2)
88 | 170 - [1 2 1] average of (n-2,n-1,n)
89 | 255 - [1 2 1] average of (n-1,n,n+1)
90 |
91 | ** n = current frame
92 |
93 | default: false (bool)
94 |
95 |
96 | scthresh - (scenechange threshold)
97 |
98 | Sets the scenechange detection threshold as a percentage of maximum change on the luma
99 | plane. Use the debug output to see which frames are detected as scenechanges and the
100 | scenechange statistics.
101 |
102 | default: 12.0 (float)
103 |
104 |
105 | debug -
106 |
107 | Will enable debug output. The only thing it shows are the scenechange stats. The info
108 | is output via OutputDebugString(). You can use the utility "DebugView" from sysinternals
109 | to view the output. The frame numbers in the debug output correspond to the input clip
110 | after a separatefields() call. TComb internally invokes separatefields() before itself
111 | and weave() after itself.
112 |
113 | default: false (bool)
114 |
115 | opt - (another debug parameter: CPU)
116 |
117 | 0: C only (no assembly at all)
118 | other: automatically choose SSE2 or C
119 |
120 | For development use: opt parameters can appear/disappear/change their meaning between versions
121 |
122 | default: -1 (int)
123 |
124 |
125 | BASIC SETUP/USAGE:
126 |
127 |
128 | Setting up TComb is pretty simple. The only values that would ever really need adjusting
129 | are fthreshL/fthreshC, othreshL/othreshC, and mode.
130 |
131 | Set mode to 0 if you want to do dot crawl removal only, set it to 1 if you want to
132 | do rainbow removal only, or set it to 2 to do both.
133 |
134 | Dot Crawl Removal Tweaking (fthreshL/othreshL):
135 |
136 | To find good values for fthreshL/othreshL, start with the following line:
137 |
138 | tcomb(mode=0,fthreshL=255,othreshL=255)
139 |
140 | Now, keep othreshL at 255 but set fthreshL down to 1. Keep increasing fthreshL
141 | in steps of 1 to 2 until you find the point at which all dot crawl is removed.
142 | Remember that value. Next, set fthreshL back to 255, and set othreshL to 1.
143 | Now, increase othreshL in steps of 1 or 2 until you find the point at which all
144 | dot crawl is removed. You've now got values for fthreshL/othreshL.
145 |
146 | Rainbowing Removal Tweaking (fthreshC/othreshC):
147 |
148 | To find good values for fthreshC/othreshC, start with the following line:
149 |
150 | tcomb(mode=1,fthreshC=255,othreshC=255)
151 |
152 | Now, keep othreshC at 255 but set fthreshC down to 1. Keep increasing fthreshC
153 | in steps of 1 to 2 until you find the point at which all (or most) rainbowing is
154 | removed. Remember that value. Next, set fthreshC back to 255, and set othreshC
155 | to 1. Now, increase othreshC in steps of 1 or 2 until you find the point at which
156 | all (or most) rainbowing is removed. You've now got values for fthreshC/othreshC.
157 |
158 | Once you've got values for mode, fthreshL/fthreshC, and othreshL/othreshC, add the
159 | necessary tcomb() line into your script and run through part of it. If you see any
160 | artifacts try lowering your fthresh/othresh values.
161 |
162 |
163 |
164 | CHANGE LIST:
165 |
166 | ** v2.3 (20210224 pinterf)**
167 | - Y8, YV16, YV24, YV411 support
168 |
169 | ** v2.2 (20210223 pinterf)**
170 | - Fix: unsave register x64 assembler causing artifacts
171 | - Drop all external asm
172 | - Rewrite assembler in SIMD intrinsics (old stuff is not removed yet, only conditionally ignored)
173 | - Add CMake build system
174 | - Add MinGW/gcc support
175 | - Add linux support (with ENABLE_INTEL_SIMD=off option as well)
176 | - Add build instructions to README.md
177 |
178 | ** v2.1 (20210222 pinterf)**
179 | - project forked to https://github.com/pinterf/TComb/
180 | - param 'opt' is back for debug. 0 means pure C code
181 | - Fix bug in x64 assembler buildFinalMask_SSE2
182 | - Fix crash in 32bit version of VerticalBlur3_SSE2
183 | - Fix: scenechange SSE2 did not work
184 | - Fix: x64 assembler HorizontalBlur6_SSE2
185 | - Fix: HorizontalBlur6: C only did top 2 lines. SSE2 bad top 2 lines
186 | - Fix: HorizontalBlur3_SSE2 artifacts (both x86 and x64)
187 | - Fix: HorizontalBlur3_SSE2 missing rounder (both x86 and x64) (now C and SSE2 is giving identical results)
188 | - Code:
189 | - Update to Visual Studio 2019
190 | - update to actual Avisynth+ headers
191 | - clang-friendly code
192 | - removed memcpy and bitblt variants
193 | - replaced planarframes module with the one I updated in tivtc project for Avisynth+ and hbd preparation
194 | - Fix debug build configuration in VS project settings
195 |
196 | ** v2.0.0.1 (20150726 Elegant)**
197 | - Corrected the masks used in HorizontalBlur6 for x64.
198 |
199 | ** v2.0 (20150717 Elegant)**
200 |
201 | - Removed buffering of frames/info that weren't actually used (was there for
202 | development/testing purposes). Should save a lot of RAM usage.
203 | - Switched to AVS 2.6 API since AviSynth 2.6.0 was released.
204 | - Added x64 support which also utilizes SSE2. This also includes some missing SSE2 functions (andNeighborsInPlace_SSE2).
205 | - Restructured debug and error messages so that it was apparent that TComb was responsible.
206 | - Removed MMX/ISSE support as times have changed and the support was not going to be carried over to x64.
207 | - Removed "opt" parameter. TComb will now use SSE2 if available and will fallback on C++ if it is not supported.
208 |
209 | End of tritical version history
210 | ------------------------------------------------------------------------
211 |
212 | 05/16/2006 v2.0 Beta 2
213 |
214 | + Stricter checking of othreshL/othreshC when looking for oscillation
215 | + For dot crawl detection require at least one vertical neighbor (y-1/y+1, x-1/x/x+1)
216 | - fixed possible crash with yuy2 input (sse2 planar<->packed conversions)
217 |
218 |
219 | 03/31/2006 v2.0 Beta 1
220 |
221 | - complete rewrite
222 |
223 |
224 | 06/24/2005 v0.9.0
225 |
226 | - Initial Release
227 |
228 |
229 |
230 | contact: GitHub (@Elegant996)
231 |
--------------------------------------------------------------------------------
/TComb/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Visual Studio 2019 is supported from CMake 3.14.1
2 | # Tested generators:
3 | # "MinGW Makefiles": MSYS2/Mingw32 GCC 8.3 build
4 | # "Visual Studio 16 2019" optional platform generator Win32 and x64
5 | # "Visual Studio 16 2019" + LLVM 8.0 (clang) optional platform generator Win32 and x64
6 | CMAKE_MINIMUM_REQUIRED( VERSION 3.8.2 )
7 |
8 | set(PluginName "TComb")
9 |
10 | if (NOT WIN32)
11 | string(TOLOWER "${PluginName}" PluginName)
12 | endif()
13 |
14 | set(ProjectName "${PluginName}")
15 | project(${ProjectName} LANGUAGES CXX)
16 |
17 | Include("Files.cmake")
18 |
19 | add_library(${PluginName} SHARED ${TComb_Sources})
20 |
21 | set_target_properties(${PluginName} PROPERTIES "OUTPUT_NAME" "${PluginName}")
22 | if (MINGW)
23 | set_target_properties(${PluginName} PROPERTIES PREFIX "")
24 | set_target_properties(${PluginName} PROPERTIES IMPORT_PREFIX "")
25 | endif()
26 |
27 | IF(ENABLE_INTEL_SIMD)
28 | #require sse2, some other plugins may need to set sse4.1 for quick msvc->gcc porting
29 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DINTEL_INTRINSICS -msse2")
30 | ENDIF()
31 |
32 | # Automatically group source files according to directory structure
33 | foreach(FILE ${TComb_Sources})
34 | get_filename_component(PARENT_DIR "${FILE}" PATH)
35 |
36 | string(REGEX REPLACE "(\\./)" "" GROUP "${PARENT_DIR}")
37 | string(REPLACE "/" "\\" GROUP "${GROUP}")
38 |
39 | # group into "Source Files" and "Header Files"
40 | if ("${FILE}" MATCHES ".*\\.cpp")
41 | set(GROUP "Source Files\\${GROUP}")
42 | elseif("${FILE}" MATCHES ".*\\.h")
43 | set(GROUP "Header Files\\${GROUP}")
44 | elseif("${FILE}" MATCHES ".*\\.asm")
45 | set(GROUP "Assembler Files\\${GROUP}")
46 | endif()
47 |
48 | source_group("${GROUP}" FILES "${FILE}")
49 | endforeach()
50 |
51 | if (MSVC_IDE)
52 | IF(CLANG_IN_VS STREQUAL "1")
53 | # special SSSE3 option for source files with *_ssse3.cpp pattern
54 | file(GLOB_RECURSE SRCS_SSSE3 "*_ssse3.cpp")
55 | set_source_files_properties(${SRCS_SSSE3} PROPERTIES COMPILE_FLAGS " -mssse3 ")
56 |
57 | # special SSE4.1 option for source files with *_sse41.cpp pattern
58 | file(GLOB_RECURSE SRCS_SSE41 "*_sse41.cpp")
59 | set_source_files_properties(${SRCS_SSE41} PROPERTIES COMPILE_FLAGS " -msse4.1 ")
60 |
61 | # special AVX option for source files with *_avx.cpp pattern
62 | file(GLOB_RECURSE SRCS_AVX "*_avx.cpp")
63 | set_source_files_properties(${SRCS_AVX} PROPERTIES COMPILE_FLAGS " -mavx ")
64 |
65 | # special AVX2 option for source files with *_avx2.cpp pattern
66 | file(GLOB_RECURSE SRCS_AVX2 "*_avx2.cpp")
67 | set_source_files_properties(${SRCS_AVX2} PROPERTIES COMPILE_FLAGS " -mavx2 -mfma ")
68 |
69 | # special AVX512 option for source files with *_avx512.cpp pattern
70 | file(GLOB_RECURSE SRCS_AVX512 "*_avx512.cpp")
71 | set_source_files_properties(${SRCS_AVX512} PROPERTIES COMPILE_FLAGS " -mavx512f -mavx512bw ")
72 | ELSE()
73 | # special AVX option for source files with *_avx.cpp pattern
74 | file(GLOB_RECURSE SRCS_AVX "*_avx.cpp")
75 | set_source_files_properties(${SRCS_AVX} PROPERTIES COMPILE_FLAGS " /arch:AVX ")
76 |
77 | # special AVX2 option for source files with *_avx2.cpp pattern
78 | file(GLOB_RECURSE SRCS_AVX2 "*_avx2.cpp")
79 | set_source_files_properties(${SRCS_AVX2} PROPERTIES COMPILE_FLAGS " /arch:AVX2 ")
80 |
81 | # special AVX512 option for source files with *_avx512.cpp pattern
82 | file(GLOB_RECURSE SRCS_AVX512 "*_avx512.cpp")
83 | set_source_files_properties(${SRCS_AVX512} PROPERTIES COMPILE_FLAGS " /arch:AVX512 ")
84 | ENDIF()
85 | else()
86 | # special SSSE3 option for source files with *_ssse3.cpp pattern
87 | file(GLOB_RECURSE SRCS_SSSE3 "*_ssse3.cpp")
88 | set_source_files_properties(${SRCS_SSSE3} PROPERTIES COMPILE_FLAGS " -mssse3 ")
89 |
90 | # special SSE4.1 option for source files with *_sse41.cpp pattern
91 | file(GLOB_RECURSE SRCS_SSE41 "*_sse41.cpp")
92 | set_source_files_properties(${SRCS_SSE41} PROPERTIES COMPILE_FLAGS " -msse4.1 ")
93 |
94 | # special AVX option for source files with *_avx.cpp pattern
95 | file(GLOB_RECURSE SRCS_AVX "*_avx.cpp")
96 | set_source_files_properties(${SRCS_AVX} PROPERTIES COMPILE_FLAGS " -mavx ")
97 |
98 | # special AVX2 option for source files with *_avx2.cpp pattern
99 | file(GLOB_RECURSE SRCS_AVX2 "*_avx2.cpp")
100 | set_source_files_properties(${SRCS_AVX2} PROPERTIES COMPILE_FLAGS " -mavx2 -mfma ")
101 |
102 | # special AVX512 option for source files with *_avx512.cpp pattern
103 | file(GLOB_RECURSE SRCS_AVX512 "*_avx512.cpp")
104 | set_source_files_properties(${SRCS_AVX512} PROPERTIES COMPILE_FLAGS " -mavx512f -mavx512bw ")
105 | endif()
106 |
107 |
108 | # Specify include directories
109 | target_include_directories(${ProjectName} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
110 | #dedicated include dir for avisynth.h
111 | #target_include_directories(${ProjectName} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
112 |
113 | # Windows DLL dependencies
114 | if (MSVC OR MINGW)
115 | target_link_libraries(${ProjectName} "uuid" "winmm" "vfw32" "msacm32" "gdi32" "user32" "advapi32" "ole32" "imagehlp")
116 | else()
117 | #non Windows
118 | target_link_libraries(${ProjectName})
119 | # "pthread" "dl"
120 | endif()
121 |
122 | include(GNUInstallDirs)
123 |
124 | INSTALL(TARGETS ${ProjectName}
125 | LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/avisynth")
126 |
--------------------------------------------------------------------------------
/TComb/Files.cmake:
--------------------------------------------------------------------------------
1 | FILE(GLOB TComb_Sources RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
2 | "*.c"
3 | "*.cpp"
4 | "*.hpp"
5 | "*.h"
6 |
7 | "avs/*.h"
8 | )
9 |
10 | IF( MSVC OR MINGW )
11 | # Export definitions in general are not needed on x64 and only cause warnings,
12 | # unfortunately we still must need a .def file for some COM functions.
13 | # NO C interface for this plugin
14 | # if(CMAKE_SIZEOF_VOID_P EQUAL 8)
15 | # LIST(APPEND TComb_Sources "TComb64.def")
16 | # else()
17 | # LIST(APPEND TComb_Sources "TComb.def")
18 | # endif()
19 | ENDIF()
20 |
21 | IF( MSVC_IDE )
22 | # Ninja, unfortunately, seems to have some issues with using rc.exe
23 | LIST(APPEND TComb_Sources "TComb.rc")
24 | ENDIF()
25 |
--------------------------------------------------------------------------------
/TComb/PlanarFrame.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | ** My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar
3 | ** to packed conversions, and always gives 16 bit alignment for all
4 | ** planes. Supports YV12/YUY2 frames from avisynth, can do any planar format
5 | ** internally.
6 | **
7 | ** Copyright (C) 2005-2006 Kevin Stone
8 | **
9 | ** This program is free software; you can redistribute it and/or modify
10 | ** it under the terms of the GNU General Public License as published by
11 | ** the Free Software Foundation; either version 2 of the License, or
12 | ** (at your option) any later version.
13 | **
14 | ** This program is distributed in the hope that it will be useful,
15 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | ** GNU General Public License for more details.
18 | **
19 | ** You should have received a copy of the GNU General Public License
20 | ** along with this program; if not, write to the Free Software
21 | ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 | */
23 |
24 | #include "PlanarFrame.h"
25 | #include "avs/cpuid.h"
26 | #include "common.h"
27 | #include
28 | #ifdef INTEL_INTRINSICS
29 | #include
30 | #endif
31 |
32 | // 8 bits only!!!
33 |
34 | PlanarFrame::PlanarFrame(int cpuFlags)
35 | {
36 | ypitch = uvpitch = 0;
37 | ywidth = uvwidth = 0;
38 | yheight = uvheight = 0;
39 | y = u = v = NULL;
40 | useSIMD = true;
41 | packed = false;
42 | cpu = cpuFlags;
43 | }
44 |
45 | PlanarFrame::PlanarFrame(VideoInfo &viInfo, int cpuFlags)
46 | {
47 | ypitch = uvpitch = 0;
48 | ywidth = uvwidth = 0;
49 | yheight = uvheight = 0;
50 | y = u = v = NULL;
51 | useSIMD = true;
52 | packed = false;
53 | cpu = cpuFlags;
54 | allocSpace(viInfo);
55 | }
56 |
57 | PlanarFrame::PlanarFrame(VideoInfo &viInfo, bool _packed, int cpuFlags)
58 | {
59 | ypitch = uvpitch = 0;
60 | ywidth = uvwidth = 0;
61 | yheight = uvheight = 0;
62 | y = u = v = NULL;
63 | useSIMD = true;
64 | packed = _packed;
65 | cpu = cpuFlags;
66 | allocSpace(viInfo);
67 | }
68 |
69 | PlanarFrame::~PlanarFrame()
70 | {
71 | if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; }
72 | if (u != NULL) { _aligned_free(u); u = NULL; }
73 | if (v != NULL) { _aligned_free(v); v = NULL; }
74 | }
75 |
76 | void PlanarFrame::FillMemDebug()
77 | {
78 | if (!debug) return;
79 | // MIN_ALIGNMENT bytes before
80 | uint32_t* pInt = (uint32_t*)(y);
81 | for (int i = 0; i < MIN_ALIGNMENT / sizeof(uint32_t); i++)
82 | pInt[i] = 0xDEADBEEF;
83 | // MIN_ALIGNMENT bytes after
84 | pInt = (uint32_t*)(y + MIN_ALIGNMENT + ypitch * yheight);
85 | for (int i = 0; i < MIN_ALIGNMENT / sizeof(uint32_t); i++)
86 | pInt[i] = 0xDEADBEEF;
87 | y = y + MIN_ALIGNMENT; // our real pointer after guard area
88 | }
89 |
90 | bool PlanarFrame::allocSpace(VideoInfo &viInfo)
91 | {
92 | if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; }
93 | if (u != NULL) { _aligned_free(u); u = NULL; }
94 | if (v != NULL) { _aligned_free(v); v = NULL; }
95 | int height = viInfo.height;
96 | int width = viInfo.width;
97 | if (viInfo.IsPlanar())
98 | {
99 | ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
100 | ywidth = width;
101 | yheight = height;
102 |
103 | debug_padding = debug ? MIN_ALIGNMENT : 0;
104 | y = (uint8_t*)_aligned_malloc(ypitch * yheight + 2 * debug_padding, MIN_ALIGNMENT);
105 | if (y == NULL) return false;
106 | FillMemDebug();
107 |
108 | if (!viInfo.IsY()) {
109 | width >>= viInfo.GetPlaneWidthSubsampling(PLANAR_U);
110 | height >>= viInfo.GetPlaneHeightSubsampling(PLANAR_U);
111 | uvpitch = width + ((width % MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width % MIN_ALIGNMENT));
112 | uvwidth = width;
113 | uvheight = height;
114 | u = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT);
115 | if (u == NULL) return false;
116 | v = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT);
117 | if (v == NULL) return false;
118 | }
119 | return true;
120 | }
121 | else if (viInfo.IsYUY2())
122 | {
123 | debug_padding = 0;
124 |
125 | if (!packed)
126 | {
127 | ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
128 | ywidth = width;
129 | yheight = height;
130 | width >>= 1;
131 | uvpitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
132 | uvwidth = width;
133 | uvheight = height;
134 | y = (uint8_t*)_aligned_malloc(ypitch*yheight, MIN_ALIGNMENT);
135 | if (y == NULL) return false;
136 | u = (uint8_t*)_aligned_malloc(uvpitch*uvheight, MIN_ALIGNMENT);
137 | if (u == NULL) return false;
138 | v = (uint8_t*)_aligned_malloc(uvpitch*uvheight, MIN_ALIGNMENT);
139 | if (v == NULL) return false;
140 | return true;
141 | }
142 | else
143 | {
144 | width *= 2;
145 | ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
146 | ywidth = width;
147 | yheight = height;
148 | y = (uint8_t*)_aligned_malloc(ypitch*yheight, MIN_ALIGNMENT);
149 | if (y == NULL) return false;
150 | uvpitch = uvwidth = uvheight = 0;
151 | u = v = NULL;
152 | return true;
153 | }
154 | }
155 | return false;
156 | }
157 |
158 | bool PlanarFrame::allocSpace(int specs[4])
159 | {
160 | if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; }
161 | if (u != NULL) { _aligned_free(u); u = NULL; }
162 | if (v != NULL) { _aligned_free(v); v = NULL; }
163 | int height = specs[0];
164 | int width = specs[2];
165 | ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
166 | ywidth = width;
167 | yheight = height;
168 | height = specs[1];
169 | width = specs[3];
170 | uvpitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
171 | uvwidth = width;
172 | uvheight = height;
173 |
174 | const int debugpadding = debug ? MIN_ALIGNMENT : 0;
175 | y = (uint8_t*)_aligned_malloc(ypitch * yheight + 2 * debugpadding, MIN_ALIGNMENT);
176 | if (y == NULL) return false;
177 | FillMemDebug();
178 |
179 | if (uvpitch) {
180 | u = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT);
181 | if (u == NULL) return false;
182 | v = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT);
183 | if (v == NULL) return false;
184 | }
185 | return true;
186 | }
187 |
188 |
189 | void PlanarFrame::createPlanar(int yheight, int uvheight, int ywidth, int uvwidth)
190 | {
191 | int specs[4] = { yheight, uvheight, ywidth, uvwidth };
192 | allocSpace(specs);
193 | }
194 |
195 | void PlanarFrame::createPlanar(int height, int width, int chroma_format)
196 | {
197 | int specs[4];
198 | if (chroma_format <= PLANAR_420) // 420
199 | {
200 | specs[0] = height; specs[1] = height >> 1;
201 | specs[2] = width; specs[3] = width >> 1;
202 | }
203 | else if (chroma_format == PLANAR_422) // 422
204 | {
205 | specs[0] = height; specs[1] = height;
206 | specs[2] = width; specs[3] = width >> 1;
207 | }
208 | else if (chroma_format == PLANAR_444) // 444
209 | {
210 | specs[0] = height; specs[1] = height;
211 | specs[2] = width; specs[3] = width;
212 | }
213 | else if (chroma_format == PLANAR_411) // 411
214 | {
215 | specs[0] = height; specs[1] = height;
216 | specs[2] = width; specs[3] = width >> 2;
217 | }
218 | else if (chroma_format == PLANAR_400) // greyscale
219 | {
220 | specs[0] = height; specs[1] = 0;
221 | specs[2] = width; specs[3] = 0;
222 | }
223 | allocSpace(specs);
224 | }
225 |
226 | void PlanarFrame::createFromProfile(VideoInfo &viInfo)
227 | {
228 | allocSpace(viInfo);
229 | }
230 |
231 | void PlanarFrame::createFromFrame(PVideoFrame &frame, VideoInfo &viInfo)
232 | {
233 | allocSpace(viInfo);
234 | copyInternalFrom(frame, viInfo);
235 | }
236 |
237 | void PlanarFrame::createFromPlanar(PlanarFrame &frame)
238 | {
239 | int specs[4] = { frame.yheight, frame.uvheight, frame.ywidth, frame.uvwidth };
240 | allocSpace(specs);
241 | copyInternalFrom(frame);
242 | }
243 |
244 | void PlanarFrame::copyFrom(PVideoFrame &frame, VideoInfo &viInfo)
245 | {
246 | copyInternalFrom(frame, viInfo);
247 | }
248 |
249 | void PlanarFrame::copyFrom(PlanarFrame &frame)
250 | {
251 | copyInternalFrom(frame);
252 | }
253 |
254 | void PlanarFrame::copyTo(PVideoFrame &frame, VideoInfo &viInfo)
255 | {
256 | copyInternalTo(frame, viInfo);
257 | }
258 |
259 | void PlanarFrame::copyTo(PlanarFrame &frame)
260 | {
261 | copyInternalTo(frame);
262 | }
263 |
264 | void PlanarFrame::copyPlaneTo(PlanarFrame &frame, int plane)
265 | {
266 | copyInternalPlaneTo(frame, plane);
267 | }
268 |
269 | uint8_t* PlanarFrame::GetPtr(int plane)
270 | {
271 | if (plane == 0) return y;
272 | if (plane == 1) return u;
273 | return v;
274 | }
275 |
276 | int PlanarFrame::NumComponents() {
277 | if (uvpitch)
278 | return 3;
279 | return 1;
280 | }
281 |
282 | int PlanarFrame::GetWidth(int plane)
283 | {
284 | if (plane == 0) return ywidth;
285 | else return uvwidth;
286 | }
287 |
288 | int PlanarFrame::GetHeight(int plane)
289 | {
290 | if (plane == 0) return yheight;
291 | else return uvheight;
292 | }
293 |
294 | int PlanarFrame::GetPitch(int plane)
295 | {
296 | if (plane == 0) return ypitch;
297 | else return uvpitch;
298 | }
299 |
300 | void PlanarFrame::freePlanar()
301 | {
302 | if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; }
303 | if (u != NULL) { _aligned_free(u); u = NULL; }
304 | if (v != NULL) { _aligned_free(v); v = NULL; }
305 | ypitch = uvpitch = 0;
306 | ywidth = uvwidth = 0;
307 | yheight = uvheight = 0;
308 | }
309 |
310 | void PlanarFrame::copyInternalFrom(PVideoFrame &frame, VideoInfo &viInfo)
311 | {
312 | if (y == NULL) return;
313 | if (viInfo.IsPlanar())
314 | {
315 | BitBlt(y, ypitch, frame->GetReadPtr(PLANAR_Y), frame->GetPitch(PLANAR_Y),
316 | frame->GetRowSize(PLANAR_Y), frame->GetHeight(PLANAR_Y));
317 | if (u == NULL || v == NULL) return;
318 | BitBlt(u, uvpitch, frame->GetReadPtr(PLANAR_U), frame->GetPitch(PLANAR_U),
319 | frame->GetRowSize(PLANAR_U), frame->GetHeight(PLANAR_U));
320 | BitBlt(v, uvpitch, frame->GetReadPtr(PLANAR_V), frame->GetPitch(PLANAR_V),
321 | frame->GetRowSize(PLANAR_V), frame->GetHeight(PLANAR_V));
322 | }
323 | else if (viInfo.IsYUY2())
324 | {
325 | convYUY2to422(frame->GetReadPtr(), y, u, v, frame->GetPitch(), ypitch, uvpitch,
326 | viInfo.width, viInfo.height);
327 | }
328 | }
329 |
330 | void PlanarFrame::copyInternalFrom(PlanarFrame &frame)
331 | {
332 | if (y == NULL) return;
333 | BitBlt(y, ypitch, frame.y, frame.ypitch, frame.ywidth, frame.yheight);
334 | if (u == NULL || v == NULL) return;
335 | BitBlt(u, uvpitch, frame.u, frame.uvpitch, frame.uvwidth, frame.uvheight);
336 | BitBlt(v, uvpitch, frame.v, frame.uvpitch, frame.uvwidth, frame.uvheight);
337 | }
338 |
339 | void PlanarFrame::copyInternalTo(PVideoFrame &frame, VideoInfo &viInfo)
340 | {
341 | if (y == NULL) return;
342 | if (viInfo.IsPlanar())
343 | {
344 | BitBlt(frame->GetWritePtr(PLANAR_Y), frame->GetPitch(PLANAR_Y), y, ypitch, ywidth, yheight);
345 | if (u == NULL || v == NULL) return;
346 | BitBlt(frame->GetWritePtr(PLANAR_U), frame->GetPitch(PLANAR_U), u, uvpitch, uvwidth, uvheight);
347 | BitBlt(frame->GetWritePtr(PLANAR_V), frame->GetPitch(PLANAR_V), v, uvpitch, uvwidth, uvheight);
348 | }
349 | else if (viInfo.IsYUY2())
350 | {
351 | conv422toYUY2(y, u, v, frame->GetWritePtr(), ypitch, uvpitch, frame->GetPitch(), ywidth, yheight);
352 | }
353 | }
354 |
355 | void PlanarFrame::copyInternalTo(PlanarFrame &frame)
356 | {
357 | if (y == NULL) return;
358 | BitBlt(frame.y, frame.ypitch, y, ypitch, ywidth, yheight);
359 | if (u == NULL || v == NULL) return;
360 | BitBlt(frame.u, frame.uvpitch, u, uvpitch, uvwidth, uvheight);
361 | BitBlt(frame.v, frame.uvpitch, v, uvpitch, uvwidth, uvheight);
362 | }
363 |
364 | void PlanarFrame::copyInternalPlaneTo(PlanarFrame &frame, int plane)
365 | {
366 | if (plane == 0 && y != NULL)
367 | BitBlt(frame.y, frame.ypitch, y, ypitch, ywidth, yheight);
368 | else if (plane == 1 && u != NULL)
369 | BitBlt(frame.u, frame.uvpitch, u, uvpitch, uvwidth, uvheight);
370 | else if (plane == 2 && v != NULL)
371 | BitBlt(frame.v, frame.uvpitch, v, uvpitch, uvwidth, uvheight);
372 | }
373 |
374 | void PlanarFrame::copyChromaTo(PlanarFrame &dst)
375 | {
376 | BitBlt(dst.u, dst.uvpitch, u, uvpitch, dst.uvwidth, dst.uvheight);
377 | BitBlt(dst.v, dst.uvpitch, v, uvpitch, dst.uvwidth, dst.uvheight);
378 | }
379 |
380 | void PlanarFrame::copyToForBMP(PVideoFrame &dst, VideoInfo &viInfo)
381 | {
382 | uint8_t *dstp = dst->GetWritePtr(PLANAR_Y);
383 | if (viInfo.IsPlanar())
384 | {
385 | int out_pitch = (ywidth + 3) & -4;
386 | BitBlt(dstp, out_pitch, y, ypitch, ywidth, yheight);
387 | BitBlt(dstp + (out_pitch*yheight), out_pitch >> 1, v, uvpitch, uvwidth, uvheight);
388 | BitBlt(dstp + (out_pitch*yheight) + ((out_pitch >> 1)*uvheight), out_pitch >> 1, u, uvpitch, uvwidth, uvheight);
389 | }
390 | else
391 | {
392 | int out_pitch = (dst->GetRowSize(PLANAR_Y) + 3) & -4;
393 | conv422toYUY2(y, u, v, dstp, ypitch, uvpitch, out_pitch, viInfo.width, viInfo.height);
394 | }
395 | }
396 |
397 | PlanarFrame& PlanarFrame::operator=(PlanarFrame &ob2)
398 | {
399 | cpu = ob2.cpu;
400 | ypitch = ob2.ypitch;
401 | yheight = ob2.yheight;
402 | ywidth = ob2.ywidth;
403 | uvpitch = ob2.uvpitch;
404 | uvheight = ob2.uvheight;
405 | uvwidth = ob2.uvwidth;
406 | this->copyFrom(ob2);
407 | return *this;
408 | }
409 |
410 | void PlanarFrame::convYUY2to422(const uint8_t *src, uint8_t *py, uint8_t *pu,
411 | uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height)
412 | {
413 | #ifdef INTEL_INTRINSICS
414 | if ((cpu&CPUF_SSE2) && useSIMD)
415 | convYUY2to422_SSE2(src, py, pu, pv, pitch1, pitch2Y, pitch2UV, width, height);
416 | else
417 | #endif
418 | {
419 | width >>= 1;
420 | for (int y = 0; y < height; ++y)
421 | {
422 | for (int x = 0; x < width; ++x)
423 | {
424 | py[x << 1] = src[x << 2];
425 | pu[x] = src[(x << 2) + 1];
426 | py[(x << 1) + 1] = src[(x << 2) + 2];
427 | pv[x] = src[(x << 2) + 3];
428 | }
429 | py += pitch2Y;
430 | pu += pitch2UV;
431 | pv += pitch2UV;
432 | src += pitch1;
433 | }
434 | }
435 | }
436 |
437 |
438 | #ifdef INTEL_INTRINSICS
439 | void PlanarFrame::convYUY2to422_SSE2(const uint8_t *src, uint8_t *py, uint8_t *pu,
440 | uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height)
441 | {
442 | width >>= 1; // mov ecx, width
443 | __m128i Ymask = _mm_set1_epi16(0x00FF);
444 | for (int y = 0; y < height; y++) {
445 | for (int x = 0; x < width; x += 4) {
446 | __m128i fullsrc = _mm_load_si128(reinterpret_cast(src + x * 4)); // VYUYVYUYVYUYVYUY
447 | __m128i yy = _mm_and_si128(fullsrc, Ymask); // 0Y0Y0Y0Y0Y0Y0Y0Y
448 | __m128i uvuv = _mm_srli_epi16(fullsrc, 8); // 0V0U0V0U0V0U0V0U
449 | yy = _mm_packus_epi16(yy, yy); // xxxxxxxxYYYYYYYY
450 | uvuv = _mm_packus_epi16(uvuv, uvuv); // xxxxxxxxVUVUVUVU
451 | __m128i uu = _mm_and_si128(uvuv, Ymask); // xxxxxxxx0U0U0U0U
452 | __m128i vv = _mm_srli_epi16(uvuv, 8); // xxxxxxxx0V0V0V0V
453 | uu = _mm_packus_epi16(uu, uu); // xxxxxxxxxxxxUUUU
454 | vv = _mm_packus_epi16(vv, vv); // xxxxxxxxxxxxVVVV
455 | _mm_storel_epi64(reinterpret_cast<__m128i *>(py + x * 2), yy); // store y
456 | *(uint32_t *)(pu + x) = _mm_cvtsi128_si32(uu); // store u
457 | *(uint32_t *)(pv + x) = _mm_cvtsi128_si32(vv); // store v
458 | }
459 | src += pitch1;
460 | py += pitch2Y;
461 | pu += pitch2UV;
462 | pv += pitch2UV;
463 | }
464 | }
465 | #endif
466 |
467 | void PlanarFrame::conv422toYUY2(uint8_t *py, uint8_t *pu, uint8_t *pv,
468 | uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height)
469 | {
470 | #ifdef INTEL_INTRINSICS
471 | if ((cpu&CPUF_SSE2) && useSIMD)
472 | conv422toYUY2_SSE2(py, pu, pv, dst, pitch1Y, pitch1UV, pitch2, width, height);
473 | else
474 | #endif
475 | {
476 | width >>= 1;
477 | for (int y = 0; y < height; ++y)
478 | {
479 | for (int x = 0; x < width; ++x)
480 | {
481 | dst[x << 2] = py[x << 1];
482 | dst[(x << 2) + 1] = pu[x];
483 | dst[(x << 2) + 2] = py[(x << 1) + 1];
484 | dst[(x << 2) + 3] = pv[x];
485 | }
486 | py += pitch1Y;
487 | pu += pitch1UV;
488 | pv += pitch1UV;
489 | dst += pitch2;
490 | }
491 | }
492 | }
493 |
494 |
495 | #ifdef INTEL_INTRINSICS
496 | void PlanarFrame::conv422toYUY2_SSE2(uint8_t *py, uint8_t *pu, uint8_t *pv,
497 | uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height)
498 | {
499 | width >>= 1; // mov ecx, width
500 | for (int y = 0; y < height; y++) {
501 | for (int x = 0; x < width; x += 4) {
502 | __m128i yy = _mm_loadl_epi64(reinterpret_cast(py + x * 2)); // YYYYYYYY
503 | __m128i uu = _mm_castps_si128(_mm_load_ss(reinterpret_cast(pu + x))); // 000000000000UUUU
504 | __m128i vv = _mm_castps_si128(_mm_load_ss(reinterpret_cast(pv + x))); // 000000000000VVVV
505 | __m128i uvuv = _mm_unpacklo_epi8(uu, vv); // 00000000VUVUVUVU
506 | __m128i yuyv = _mm_unpacklo_epi8(yy,uvuv); // VYUYVYUYVYUYVYUY
507 | _mm_store_si128(reinterpret_cast<__m128i *>(dst + x * 4), yuyv);
508 | }
509 | dst += pitch2;
510 | py += pitch1Y;
511 | pu += pitch1UV;
512 | pv += pitch1UV;
513 | }
514 | }
515 | #endif
516 |
517 | // Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al.
518 | // http://www.avisynth.org
519 |
520 | // This program is free software; you can redistribute it and/or modify
521 | // it under the terms of the GNU General Public License as published by
522 | // the Free Software Foundation; either version 2 of the License, or
523 | // (at your option) any later version.
524 | //
525 | // This program is distributed in the hope that it will be useful,
526 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
527 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
528 | // GNU General Public License for more details.
529 | //
530 | // You should have received a copy of the GNU General Public License
531 | // along with this program; if not, write to the Free Software
532 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
533 | // http://www.gnu.org/copyleft/gpl.html .
534 | //
535 | // Linking Avisynth statically or dynamically with other modules is making a
536 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU
537 | // General Public License cover the whole combination.
538 | //
539 | // As a special exception, the copyright holders of Avisynth give you
540 | // permission to link Avisynth with independent modules that communicate with
541 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
542 | // terms of these independent modules, and to copy and distribute the
543 | // resulting combined work under terms of your choice, provided that
544 | // every copy of the combined work is accompanied by a complete copy of
545 | // the source code of Avisynth (the version of Avisynth used to produce the
546 | // combined work), being distributed under the terms of the GNU General
547 | // Public License plus this exception. An independent module is a module
548 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
549 | // import and export plugins, or graphical user interfaces.
550 |
551 | // from Avisynth 2.55 source...
552 | // copied so we don't need an
553 | // IScriptEnvironment pointer
554 | // to call it
555 |
556 | #include "avisynth.h"
557 | #include
558 |
559 | void PlanarFrame::BitBlt(uint8_t* dstp, int dst_pitch, const uint8_t* srcp,
560 | int src_pitch, int row_size, int height)
561 | {
562 | if (!height || !row_size) return;
563 | if (height == 1 || (dst_pitch == src_pitch && src_pitch == row_size))
564 | memcpy(dstp, srcp, src_pitch * height);
565 | else
566 | {
567 | for (int y = height; y > 0; --y)
568 | {
569 | memcpy(dstp, srcp, row_size);
570 | dstp += dst_pitch;
571 | srcp += src_pitch;
572 | }
573 | }
574 | }
575 |
576 | int PlanarFrame::CheckMemory()
577 | {
578 | if (!debug) return 0;
579 | if (!y) return 0;
580 | // check buffer overrun
581 | uint32_t* pInt = (uint32_t*)(y - MIN_ALIGNMENT);
582 | for (int i = 0; i < MIN_ALIGNMENT / sizeof(uint32_t); i++)
583 | if (pInt[i] != 0xDEADBEEF)
584 | return 1;
585 | return 0;
586 | }
587 |
588 |
--------------------------------------------------------------------------------
/TComb/PlanarFrame.h:
--------------------------------------------------------------------------------
1 | /*
2 | ** My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar
3 | ** to packed conversions, and always gives 16 bit alignment for all
4 | ** planes. Supports YV12/YUY2 frames from avisynth, can do any planar format
5 | ** internally.
6 | **
7 | ** Copyright (C) 2005-2006 Kevin Stone
8 | **
9 | ** This program is free software; you can redistribute it and/or modify
10 | ** it under the terms of the GNU General Public License as published by
11 | ** the Free Software Foundation; either version 2 of the License, or
12 | ** (at your option) any later version.
13 | **
14 | ** This program is distributed in the hope that it will be useful,
15 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | ** GNU General Public License for more details.
18 | **
19 | ** You should have received a copy of the GNU General Public License
20 | ** along with this program; if not, write to the Free Software
21 | ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 | */
23 |
24 | #ifndef __PlanarFrame_H__
25 | #define __PlanarFrame_H__
26 |
27 | #include
28 | #include "avisynth.h"
29 |
30 | #define MIN_ALIGNMENT 64
31 |
32 | #define PLANAR_420 1
33 | #define PLANAR_422 2
34 | #define PLANAR_444 3
35 | #define PLANAR_411 4
36 | #define PLANAR_400 5
37 |
38 | class PlanarFrame
39 | {
40 | private:
41 | int cpu;
42 | bool useSIMD, packed;
43 | int ypitch, uvpitch;
44 | int ywidth, uvwidth;
45 | int yheight, uvheight;
46 | bool debug = false;
47 | int debug_padding = 0;
48 | uint8_t *y, *u, *v;
49 | bool allocSpace(VideoInfo &viInfo);
50 | bool allocSpace(int specs[4]);
51 | void copyInternalFrom(PVideoFrame &frame, VideoInfo &viInfo);
52 | void copyInternalFrom(PlanarFrame &frame);
53 | void copyInternalTo(PVideoFrame &frame, VideoInfo &viInfo);
54 | void copyInternalTo(PlanarFrame &frame);
55 | void copyInternalPlaneTo(PlanarFrame &frame, int plane);
56 | void convYUY2to422(const uint8_t *src, uint8_t *py, uint8_t *pu,
57 | uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height);
58 | void conv422toYUY2(uint8_t *py, uint8_t *pu, uint8_t *pv,
59 | uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height);
60 | #ifdef INTEL_INTRINSICS
61 | void convYUY2to422_SSE2(const uint8_t* src, uint8_t* py, uint8_t* pu,
62 | uint8_t* pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height);
63 | void conv422toYUY2_SSE2(uint8_t *py, uint8_t *pu, uint8_t *pv,
64 | uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height);
65 | #endif
66 |
67 | public:
68 | PlanarFrame(int cpuInfo);
69 | PlanarFrame(VideoInfo &viInfo, int cpuInfo);
70 | PlanarFrame(VideoInfo &viInfo, bool _packed, int cpuInfo);
71 | ~PlanarFrame();
72 | void createPlanar(int yheight, int uvheight, int ywidth, int uvwidth);
73 | void createPlanar(int height, int width, int chroma_format);
74 | void createFromProfile(VideoInfo &viInfo);
75 | void createFromFrame(PVideoFrame &frame, VideoInfo &viInfo);
76 | void createFromPlanar(PlanarFrame &frame);
77 | void copyFrom(PVideoFrame &frame, VideoInfo &viInfo);
78 | void copyTo(PVideoFrame &frame, VideoInfo &viInfo);
79 | void copyFrom(PlanarFrame &frame);
80 | void copyTo(PlanarFrame &frame);
81 | void copyChromaTo(PlanarFrame &dst);
82 | void copyToForBMP(PVideoFrame &dst, VideoInfo &viInfo);
83 | void copyPlaneTo(PlanarFrame &dst, int plane);
84 | void freePlanar();
85 | uint8_t* GetPtr(int plane = 0);
86 | int NumComponents();
87 | int GetWidth(int plane = 0);
88 | int GetHeight(int plane = 0);
89 | int GetPitch(int plane = 0);
90 | void BitBlt(uint8_t* dstp, int dst_pitch, const uint8_t* srcp,
91 | int src_pitch, int row_size, int height);
92 | int CheckMemory();
93 | void FillMemDebug();
94 | PlanarFrame& operator=(PlanarFrame &ob2);
95 | };
96 |
97 | #endif
--------------------------------------------------------------------------------
/TComb/TComb.h:
--------------------------------------------------------------------------------
1 | /*
2 | ** TComb v2.x for Avisynth 2.6 and Avisynth+
3 | **
4 | ** TComb is a temporal comb filter (it reduces cross-luminance (rainbowing)
5 | ** and cross-chrominance (dot crawl) artifacts in static areas of the picture).
6 | ** It will ONLY work with NTSC material, and WILL NOT work with telecined material
7 | ** where the rainbowing/dotcrawl was introduced prior to the telecine process!
8 | ** It must be used before ivtc or deinterlace.
9 | **
10 | ** Copyright (C) 2021 Ferenc Pintér
11 | **
12 | ** Copyright (C) 2015 Shane Panke
13 | **
14 | ** Copyright (C) 2005-2006 Kevin Stone
15 | **
16 | ** This program is free software; you can redistribute it and/or modify
17 | ** it under the terms of the GNU General Public License as published by
18 | ** the Free Software Foundation; either version 2 of the License, or
19 | ** (at your option) any later version.
20 | **
21 | ** This program is distributed in the hope that it will be useful,
22 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of
23 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 | ** GNU General Public License for more details.
25 | **
26 | ** You should have received a copy of the GNU General Public License
27 | ** along with this program; if not, write to the Free Software
28 | ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 | */
30 |
31 | #if defined(_WIN32) && !defined(INTEL_INTRINSICS)
32 | #error Forgot to set INTEL_INTRINSICS? Comment out this line if not
33 | #endif
34 |
35 | #include "avisynth.h"
36 | #include "common.h"
37 | #include
38 | #include
39 | #include "PlanarFrame.h"
40 |
41 | // version appears in .rc as well
42 | #define VERSION "v2.3"
43 |
44 | //#define OLD_ASM
45 |
46 | #define min3(a,b,c) std::min(std::min(a,b),c)
47 | #define max3(a,b,c) std::max(std::max(a,b),c)
48 | #define min4(a,b,c,d) std::min(std::min(a,b),std::min(c,d))
49 | #define max4(a,b,c,d) std::max(std::max(a,b),std::max(c,d))
50 |
51 | class TCombFrame
52 | {
53 | public:
54 | int fnum;
55 | bool sc;
56 | bool isValid[11];
57 | PlanarFrame* orig, * msk1, * msk2;
58 | PlanarFrame** b, * avg, * omsk;
59 | TCombFrame();
60 | TCombFrame(VideoInfo& vi, int cpuFlags);
61 | ~TCombFrame();
62 | void setFNum(int i);
63 | };
64 |
65 | class TCombCache
66 | {
67 | public:
68 | TCombFrame** frames;
69 | int start_pos, size;
70 | TCombCache();
71 | TCombCache(int _size, VideoInfo& vi, int cpuFlags);
72 | ~TCombCache();
73 | void resetCacheStart(int first, int last);
74 | int getCachePos(int n);
75 | };
76 |
77 | class TComb : public GenericVideoFilter
78 | {
79 | public:
80 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
81 | TComb(PClip _child, int _mode, int _fthreshL, int _fthreshC, int _othreshL,
82 | int othreshC, bool _map, double _scthresh, bool _debug, int _opt, IScriptEnvironment* env);
83 | ~TComb();
84 | private:
85 | bool map, debug;
86 | int fthreshL, fthreshC;
87 | int othreshL, othreshC;
88 | int mode, opt;
89 | unsigned long diffmaxsc;
90 | double scthresh;
91 | PlanarFrame* dstPF, * tmpPF;
92 | PlanarFrame* minPF, * maxPF;
93 | PlanarFrame* padPF;
94 | TCombCache* tdc;
95 | char buf[256];
96 | int mapn(int n);
97 | void getAverages(int lc, IScriptEnvironment* env);
98 | void buildOscillationMasks(int lc, IScriptEnvironment* env);
99 | void getFinalMasks(int lc, IScriptEnvironment* env);
100 | void insertFrame(PVideoFrame& src, int pos, int fnum, int lc, IScriptEnvironment* env);
101 | void buildDiffMask(TCombFrame* tf1, TCombFrame* tf2, int lc, IScriptEnvironment* env);
102 | void buildDiffMasks(int lc, IScriptEnvironment* env);
103 | void absDiff(PlanarFrame* src1, PlanarFrame* src2, PlanarFrame* dst,
104 | int lc, IScriptEnvironment* env);
105 | void absDiffAndMinMask(PlanarFrame* src1, PlanarFrame* src2, PlanarFrame* dst,
106 | int lc, IScriptEnvironment* env);
107 | void VerticalBlur3(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env);
108 | void HorizontalBlur3(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env);
109 | void getStartStop(int lc, int& start, int& stop);
110 | void buildFinalFrame(PlanarFrame* p2, PlanarFrame* p1, PlanarFrame* src,
111 | PlanarFrame* n1, PlanarFrame* n2, PlanarFrame* m1, PlanarFrame* m2, PlanarFrame* m3,
112 | PlanarFrame* dst, int lc, IScriptEnvironment* env);
113 | void copyPad(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env);
114 | void MinMax(PlanarFrame* src, PlanarFrame* dmin, PlanarFrame* dmax, int lc,
115 | IScriptEnvironment* env);
116 | void HorizontalBlur6(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env);
117 | void absDiffAndMinMaskThresh(PlanarFrame* src1, PlanarFrame* src2, PlanarFrame* dst,
118 | int lc, IScriptEnvironment* env);
119 | void buildFinalMask(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* m1,
120 | PlanarFrame* dst, int lc, IScriptEnvironment* env);
121 | void calcAverages(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* dst, int lc, IScriptEnvironment* env);
122 | void checkOscillation5(PlanarFrame* p2, PlanarFrame* p1, PlanarFrame* s1,
123 | PlanarFrame* n1, PlanarFrame* n2, PlanarFrame* dst, int lc, IScriptEnvironment* env);
124 | void checkAvgOscCorrelation(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* s3,
125 | PlanarFrame* s4, PlanarFrame* dst, int lc, IScriptEnvironment* env);
126 | void or3Masks(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* s3,
127 | PlanarFrame* dst, int lc, IScriptEnvironment* env);
128 | void orAndMasks(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* dst, int lc, IScriptEnvironment* env);
129 | void andMasks(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* dst, int lc, IScriptEnvironment* env);
130 | bool checkSceneChange(PlanarFrame* s1, PlanarFrame* s2, int n, IScriptEnvironment* env);
131 | void andNeighborsInPlace(PlanarFrame* src, int lc, IScriptEnvironment* env);
132 | };
133 |
134 | void checkSceneChangePlanar_1_SSE2_simd(const uint8_t* prvp, const uint8_t* srcp,
135 | int height, int width, int prv_pitch, int src_pitch, uint64_t& diffp);
136 |
137 | template
138 | void checkSceneChangePlanar_1_c(const pixel_t* prvp, const pixel_t* srcp,
139 | int height, int width, int prv_pitch, int src_pitch, uint64_t& diffp);
140 |
141 | void andMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height);
142 | void andMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height);
143 |
144 | void orAndMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height);
145 | void orAndMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height);
146 |
147 | void or3Masks_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * s3p, uint8_t * dstp, int stride, int width, int height);
148 | void or3Masks_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, uint8_t* dstp, int stride, int width, int height);
149 |
150 | void calcAverages_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, uint8_t * dstp, int stride, int width, int height);
151 | void calcAverages_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height);
152 |
153 | void MinMax_SSE2_simd(const uint8_t * srcp, uint8_t * dstpMin, uint8_t * dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh);
154 | void MinMax_c(const uint8_t* srcp, uint8_t* dstpMin, uint8_t* dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh);
155 |
156 | void absDiff_SSE2_simd(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height);
157 | void absDiff_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height);
158 |
159 | void buildFinalMask_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * m1p, uint8_t * dstp, int stride, int width, int height, int thresh);
160 | void buildFinalMask_c(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * m1p, uint8_t * dstp, int stride, int width, int height, int thresh);
161 |
162 | void checkOscillation5_SSE2_simd(const uint8_t * p2p, const uint8_t * p1p, const uint8_t * s1p, const uint8_t * n1p, const uint8_t * n2p, uint8_t * dstp, int stride, int width, int height, int thresh);
163 | void checkOscillation5_c(const uint8_t * p2p, const uint8_t * p1p, const uint8_t * s1p, const uint8_t * n1p, const uint8_t * n2p, uint8_t * dstp, int stride, int width, int height, int thresh);
164 |
165 | void absDiffAndMinMaskThresh_SSE2_simd(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height, int thresh);
166 | void absDiffAndMinMaskThresh_c(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height, int thresh);
167 |
168 | void absDiffAndMinMask_SSE2_simd(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height);
169 | void absDiffAndMinMask_c(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height);
170 |
171 | void checkAvgOscCorrelation_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * s3p, const uint8_t * s4p, uint8_t * dstp, int stride, int width, int height, int thresh);
172 | void checkAvgOscCorrelation_c(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * s3p, const uint8_t * s4p, uint8_t * dstp, int stride, int width, int height, int thresh);
173 |
174 | void VerticalBlur3_SSE2_simd(const uint8_t * srcp, uint8_t * dstp, int stride, int width, int height);
175 | void VerticalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height);
176 |
177 | void HorizontalBlur3_SSE2_simd(const uint8_t * srcp, uint8_t * dstp, int stride, int width, int height);
178 | void HorizontalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height);
179 |
180 | void HorizontalBlur6_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height);
181 | void HorizontalBlur6_c(const uint8_t * srcp, uint8_t * dstp, int stride, int width, int height);
182 |
183 | void andNeighborsInPlace_SSE2_simd(uint8_t * srcp, int stride, int width, int height);
184 | // no distinct C here
185 |
186 |
--------------------------------------------------------------------------------
/TComb/TComb.rc:
--------------------------------------------------------------------------------
1 | // Microsoft Visual C++ generated resource script.
2 | //
3 | #include "resource.h"
4 |
5 | #define APSTUDIO_READONLY_SYMBOLS
6 | /////////////////////////////////////////////////////////////////////////////
7 | //
8 | // Generated from the TEXTINCLUDE 2 resource.
9 | //
10 | #include "winres.h"
11 |
12 | /////////////////////////////////////////////////////////////////////////////
13 | #undef APSTUDIO_READONLY_SYMBOLS
14 |
15 | /////////////////////////////////////////////////////////////////////////////
16 | // English (United States) resources
17 |
18 | #if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
19 | LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
20 | #pragma code_page(1252)
21 |
22 | #ifdef APSTUDIO_INVOKED
23 | /////////////////////////////////////////////////////////////////////////////
24 | //
25 | // TEXTINCLUDE
26 | //
27 |
28 | 1 TEXTINCLUDE
29 | BEGIN
30 | "resource.h\0"
31 | END
32 |
33 | 2 TEXTINCLUDE
34 | BEGIN
35 | "#include ""winres.h""\r\n"
36 | "\0"
37 | END
38 |
39 | 3 TEXTINCLUDE
40 | BEGIN
41 | "\r\n"
42 | "\0"
43 | END
44 |
45 | #endif // APSTUDIO_INVOKED
46 |
47 |
48 | /////////////////////////////////////////////////////////////////////////////
49 | //
50 | // Version
51 | //
52 |
53 | VS_VERSION_INFO VERSIONINFO
54 | FILEVERSION 2,3,0,0
55 | PRODUCTVERSION 2,3,0,0
56 | FILEFLAGSMASK 0x17L
57 | #ifdef _DEBUG
58 | FILEFLAGS 0x1L
59 | #else
60 | FILEFLAGS 0x0L
61 | #endif
62 | FILEOS 0x4L
63 | FILETYPE 0x2L
64 | FILESUBTYPE 0x0L
65 | BEGIN
66 | BLOCK "StringFileInfo"
67 | BEGIN
68 | BLOCK "040904b0"
69 | BEGIN
70 | VALUE "FileDescription", "TComb for Avisynth 2.6 and Avisynth+"
71 | VALUE "FileVersion", "2.3.0.0"
72 | VALUE "LegalCopyright", "Copyright (C) 2005-2006 Kevin Stone 2015- et al."
73 | VALUE "OriginalFilename", "TComb.dll"
74 | VALUE "ProductVersion", "2.3.0.0"
75 | END
76 | END
77 | BLOCK "VarFileInfo"
78 | BEGIN
79 | VALUE "Translation", 0x409, 1200
80 | END
81 | END
82 |
83 | #endif // English (United States) resources
84 | /////////////////////////////////////////////////////////////////////////////
85 |
86 |
87 |
88 | #ifndef APSTUDIO_INVOKED
89 | /////////////////////////////////////////////////////////////////////////////
90 | //
91 | // Generated from the TEXTINCLUDE 3 resource.
92 | //
93 |
94 |
95 | /////////////////////////////////////////////////////////////////////////////
96 | #endif // not APSTUDIO_INVOKED
97 |
98 |
--------------------------------------------------------------------------------
/TComb/TComb.sln:
--------------------------------------------------------------------------------
1 | Microsoft Visual Studio Solution File, Format Version 12.00
2 | # Visual Studio 2013
3 | VisualStudioVersion = 12.0.30501.0
4 | MinimumVisualStudioVersion = 10.0.40219.1
5 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TComb", "TComb.vcxproj", "{B4188B7A-C76E-4E35-946F-3477273D0A44}"
6 | EndProject
7 | Global
8 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
9 | Debug|Win32 = Debug|Win32
10 | Debug|x64 = Debug|x64
11 | Release|Win32 = Release|Win32
12 | Release|x64 = Release|x64
13 | EndGlobalSection
14 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
15 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|Win32.ActiveCfg = Debug|Win32
16 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|Win32.Build.0 = Debug|Win32
17 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|x64.ActiveCfg = Debug|x64
18 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|x64.Build.0 = Debug|x64
19 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|Win32.ActiveCfg = Release|Win32
20 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|Win32.Build.0 = Release|Win32
21 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|x64.ActiveCfg = Release|x64
22 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|x64.Build.0 = Release|x64
23 | EndGlobalSection
24 | GlobalSection(SolutionProperties) = preSolution
25 | HideSolutionNode = FALSE
26 | EndGlobalSection
27 | EndGlobal
28 |
--------------------------------------------------------------------------------
/TComb/TComb.vcproj:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
11 |
12 |
13 |
19 |
32 |
34 |
45 |
47 |
49 |
51 |
53 |
55 |
57 |
59 |
61 |
63 |
65 |
66 |
73 |
92 |
94 |
104 |
106 |
108 |
110 |
112 |
114 |
116 |
118 |
120 |
122 |
124 |
125 |
126 |
127 |
128 |
129 |
133 |
135 |
136 |
138 |
139 |
141 |
142 |
143 |
147 |
149 |
150 |
152 |
153 |
155 |
156 |
158 |
159 |
161 |
162 |
164 |
165 |
166 |
170 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
--------------------------------------------------------------------------------
/TComb/TComb.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | Win32
7 |
8 |
9 | Debug
10 | x64
11 |
12 |
13 | Release
14 | Win32
15 |
16 |
17 | Release
18 | x64
19 |
20 |
21 |
22 | {B4188B7A-C76E-4E35-946F-3477273D0A44}
23 | Win32Proj
24 | 10.0
25 |
26 |
27 |
28 | DynamicLibrary
29 | v142
30 | MultiByte
31 | true
32 |
33 |
34 | DynamicLibrary
35 | v142
36 | MultiByte
37 |
38 |
39 | DynamicLibrary
40 | v142
41 | MultiByte
42 |
43 |
44 | DynamicLibrary
45 | v142
46 | MultiByte
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 | <_ProjectFileVersion>12.0.30501.0
67 |
68 |
69 | Debug\
70 | Debug\
71 | true
72 |
73 |
74 | true
75 |
76 |
77 | Release\
78 | Release\
79 | false
80 |
81 |
82 | false
83 |
84 |
85 |
86 | Disabled
87 | INTEL_INTRINSICS;WIN32;_DEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions)
88 | true
89 | EnableFastChecks
90 | MultiThreadedDebug
91 | true
92 | true
93 |
94 | Level3
95 | EditAndContinue
96 | NoListing
97 |
98 |
99 | $(OutDir)TComb.dll
100 | true
101 | $(OutDir)TComb.pdb
102 | Windows
103 | false
104 | false
105 | $(OutDir)TComb.lib
106 | MachineX86
107 | false
108 |
109 |
110 |
111 |
112 | Disabled
113 | INTEL_INTRINSICS;WIN32;_DEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions)
114 | EnableFastChecks
115 | MultiThreadedDebug
116 | true
117 | true
118 |
119 |
120 | Level3
121 | ProgramDatabase
122 |
123 |
124 | $(OutDir)TComb.dll
125 | true
126 | $(OutDir)TComb.pdb
127 | Windows
128 | false
129 | false
130 | $(OutDir)TComb.lib
131 |
132 |
133 |
134 |
135 | Full
136 | AnySuitable
137 | true
138 | Speed
139 | true
140 | false
141 | INTEL_INTRINSICS;WIN32;NDEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions)
142 | MultiThreaded
143 | false
144 | true
145 |
146 | Level3
147 | ProgramDatabase
148 | true
149 | NoListing
150 |
151 |
152 | $(OutDir)TComb.dll
153 | false
154 | Windows
155 | true
156 | true
157 | $(OutDir)TComb.lib
158 | MachineX86
159 | UseLinkTimeCodeGeneration
160 | false
161 |
162 |
163 |
164 |
165 | MaxSpeed
166 | AnySuitable
167 | true
168 | Speed
169 | true
170 | false
171 | INTEL_INTRINSICS;WIN32;NDEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions)
172 | MultiThreaded
173 | false
174 | true
175 |
176 |
177 | Level3
178 | ProgramDatabase
179 | AssemblyAndSourceCode
180 |
181 |
182 | $(OutDir)TComb.dll
183 | true
184 | Windows
185 | true
186 | true
187 | $(OutDir)TComb.lib
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 | true
215 | true
216 |
217 |
218 | true
219 | true
220 |
221 |
222 |
223 |
224 |
225 |
226 |
--------------------------------------------------------------------------------
/TComb/TComb.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 | Source Files
23 |
24 |
25 | Source Files
26 |
27 |
28 |
29 |
30 | Header Files
31 |
32 |
33 | Header Files
34 |
35 |
36 | Header Files
37 |
38 |
39 | Header Files
40 |
41 |
42 | Header Files
43 |
44 |
45 | Header Files
46 |
47 |
48 | Header Files
49 |
50 |
51 | Header Files
52 |
53 |
54 | Header Files
55 |
56 |
57 | Header Files
58 |
59 |
60 | Header Files
61 |
62 |
63 | Header Files
64 |
65 |
66 |
67 |
68 | Resource Files
69 |
70 |
71 |
72 |
73 | Source Files
74 |
75 |
76 | Source Files
77 |
78 |
79 |
--------------------------------------------------------------------------------
/TComb/TComb_asm.asm:
--------------------------------------------------------------------------------
1 | .xmm
2 | .model flat,c
3 |
4 | .data
5 |
6 | align 16
7 |
8 | onesByte qword 2 dup(0101010101010101h)
9 | sixsMask_W qword 2 dup(0006000600060006h)
10 | eightsMask_W qword 2 dup(0008000800080008h)
11 |
12 | .code
13 |
14 | buildFinalMask_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,m1p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
15 |
16 | mov eax,s1p
17 | mov ebx,s2p
18 | mov edx,m1p
19 | mov esi,dstp
20 | mov edi,width_
21 |
22 | dec thresh
23 | movd xmm4,thresh
24 | punpcklbw xmm4, xmm4
25 | punpcklwd xmm4, xmm4
26 | punpckldq xmm4, xmm4
27 | punpcklqdq xmm4, xmm4
28 |
29 | pxor xmm5,xmm5
30 |
31 | yloop:
32 | xor ecx,ecx
33 | align 16
34 | xloop:
35 | movdqa xmm0,[eax+ecx]
36 | movdqa xmm1,[ebx+ecx]
37 | movdqa xmm2,xmm0
38 | psubusb xmm0,xmm1
39 | psubusb xmm1,xmm2
40 | por xmm0,xmm1
41 | psubusb xmm0,xmm4
42 | pcmpeqb xmm0,xmm5
43 | pand xmm0,[edx+ecx]
44 | movdqa [esi+ecx],xmm0
45 |
46 | add ecx,16
47 | cmp ecx,edi
48 | jl xloop
49 |
50 | add eax,stride
51 | add ebx,stride
52 | add edx,stride
53 | add esi,stride
54 | dec height
55 | jnz yloop
56 |
57 | ret
58 |
59 | buildFinalMask_SSE2 endp
60 |
61 |
62 |
63 | andNeighborsInPlace_SSE2 proc public uses esi edi srcp:dword,stride:dword,width_:dword,height:dword
64 |
65 | mov eax,srcp
66 | mov edx,width_
67 | mov esi,eax
68 | sub esi,stride
69 | mov edi,eax
70 | add edi,stride
71 |
72 | yloop:
73 | xor ecx,ecx
74 | align 16
75 | xloop:
76 | movdqa xmm0,[esi+ecx]
77 | movdqu xmm1,[esi+ecx-1]
78 | por xmm0,xmm1
79 | movdqu xmm1,[esi+ecx+1]
80 | por xmm0,xmm1
81 | movdqa xmm1,[eax+ecx]
82 | movdqu xmm2,[edi+ecx-1]
83 | por xmm0,xmm2
84 | por xmm0,[edi+ecx]
85 | movdqu xmm2,[edi+ecx+1]
86 | por xmm0,xmm2
87 | pand xmm0,xmm1
88 | movdqa [eax+ecx],xmm0
89 |
90 | add ecx,16
91 | cmp ecx,edx
92 | jl xloop
93 |
94 | add eax,stride
95 | add esi,stride
96 | add edi,stride
97 | dec height
98 | jnz yloop
99 |
100 | ret
101 |
102 | andNeighborsInPlace_SSE2 endp
103 |
104 |
105 |
106 | absDiff_SSE2 proc public uses ebx esi srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword
107 |
108 | mov eax,srcp1
109 | mov esi,srcp2
110 | mov ebx,dstp
111 | mov edx,width_
112 |
113 | yloop:
114 | xor ecx,ecx
115 | align 16
116 | xloop:
117 | movdqa xmm0,[eax+ecx]
118 | movdqa xmm1,[esi+ecx]
119 | movdqa xmm2,xmm0
120 | psubusb xmm0,xmm1
121 | psubusb xmm1,xmm2
122 | por xmm0,xmm1
123 | movdqa [ebx+ecx],xmm0
124 |
125 | add ecx,16
126 | cmp ecx,edx
127 | jl xloop
128 |
129 | add eax,stride
130 | add esi,stride
131 | add ebx,stride
132 | dec height
133 | jnz yloop
134 |
135 | ret
136 |
137 | absDiff_SSE2 endp
138 |
139 |
140 |
141 | absDiffAndMinMask_SSE2 proc public uses ebx esi edi srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword
142 |
143 | mov eax,srcp1
144 | mov esi,srcp2
145 | mov ebx,dstp
146 | mov edx,width_
147 | mov edi,height
148 |
149 | yloop:
150 | xor ecx,ecx
151 | align 16
152 | xloop:
153 | movdqa xmm0,[eax+ecx]
154 | movdqa xmm1,[esi+ecx]
155 | movdqa xmm2,xmm0
156 | psubusb xmm0,xmm1
157 | psubusb xmm1,xmm2
158 | por xmm0,xmm1
159 | pminub xmm0,[ebx+ecx]
160 | movdqa [ebx+ecx],xmm0
161 |
162 | add ecx,16
163 | cmp ecx,edx
164 | jl xloop
165 |
166 | add eax,stride
167 | add esi,stride
168 | add ebx,stride
169 | dec edi
170 | jnz yloop
171 |
172 | ret
173 |
174 | absDiffAndMinMask_SSE2 endp
175 |
176 |
177 |
178 | absDiffAndMinMaskThresh_SSE2 proc public uses ebx esi edi srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
179 |
180 | mov eax,srcp1
181 | mov esi,srcp2
182 | mov ebx,dstp
183 | mov edx,width_
184 | mov edi,height
185 |
186 | dec thresh
187 | movd xmm3,thresh
188 | punpcklbw xmm3,xmm3
189 | punpcklwd xmm3,xmm3
190 | punpckldq xmm3,xmm3
191 | punpcklqdq xmm3,xmm3
192 |
193 | pxor xmm4,xmm4
194 |
195 | yloop:
196 | xor ecx,ecx
197 | align 16
198 | xloop:
199 | movdqa xmm0,[eax+ecx]
200 | movdqa xmm1,[esi+ecx]
201 | movdqa xmm2,xmm0
202 | psubusb xmm0,xmm1
203 | psubusb xmm1,xmm2
204 | por xmm0,xmm1
205 | pminub xmm0,[ebx+ecx]
206 | psubusb xmm0,xmm3
207 | pcmpeqb xmm0,xmm4
208 | movdqa [ebx+ecx],xmm0
209 |
210 | add ecx,16
211 | cmp ecx,edx
212 | jl xloop
213 |
214 | add eax,stride
215 | add esi,stride
216 | add ebx,stride
217 | dec edi
218 | jnz yloop
219 |
220 | ret
221 |
222 | absDiffAndMinMaskThresh_SSE2 endp
223 |
224 |
225 |
226 | MinMax_SSE2 proc public uses ebx esi edi srcp:dword,minp:dword,maxp:dword,src_stride:dword,min_stride:dword,width_:dword,height:dword,thresh:dword
227 |
228 | mov eax,srcp
229 | mov esi,eax
230 | sub esi,src_stride
231 | mov edi,eax
232 | add edi,src_stride
233 | mov ebx,minp
234 | mov edx,maxp
235 |
236 | movd xmm3,thresh
237 | punpcklbw xmm3,xmm3
238 | punpcklwd xmm3,xmm3
239 | punpckldq xmm3,xmm3
240 | punpcklqdq xmm3,xmm3
241 |
242 | yloop:
243 | xor ecx,ecx
244 | align 16
245 | xloop:
246 | ; srcp-1 is aligned because the pointer passed to this function is srcp+stride+1.
247 | movdqa xmm0,[esi+ecx-1]
248 | movdqa xmm1,xmm0
249 | movdqu xmm2,[esi+ecx]
250 | pminub xmm0,xmm2
251 | pmaxub xmm1,xmm2
252 | movdqu xmm2,[esi+ecx+1]
253 | pminub xmm0,xmm2
254 | pmaxub xmm1,xmm2
255 | movdqa xmm2,[eax+ecx-1]
256 | pminub xmm0,xmm2
257 | pmaxub xmm1,xmm2
258 | movdqu xmm2,[eax+ecx]
259 | pminub xmm0,xmm2
260 | pmaxub xmm1,xmm2
261 | movdqu xmm2,[eax+ecx+1]
262 | pminub xmm0,xmm2
263 | pmaxub xmm1,xmm2
264 | movdqa xmm2,[edi+ecx-1]
265 | pminub xmm0,xmm2
266 | pmaxub xmm1,xmm2
267 | movdqu xmm2,[edi+ecx]
268 | pminub xmm0,xmm2
269 | pmaxub xmm1,xmm2
270 | movdqu xmm2,[edi+ecx+1]
271 | pminub xmm0,xmm2
272 | pmaxub xmm1,xmm2
273 | psubusb xmm0,xmm3
274 | paddusb xmm1,xmm3
275 | movdqa [ebx+ecx],xmm0
276 | movdqa [edx+ecx],xmm1
277 |
278 | add ecx,16
279 | cmp ecx,width_
280 | jl xloop
281 |
282 | add esi,src_stride
283 | add eax,src_stride
284 | add edi,src_stride
285 | add ebx,min_stride
286 | add edx,min_stride
287 | dec height
288 | jnz yloop
289 |
290 | ret
291 |
292 | MinMax_SSE2 endp
293 |
294 |
295 |
296 | checkOscillation5_SSE2 proc public uses ebx esi edi p2p:dword,p1p:dword,s1p:dword,n1p:dword,n2p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
297 |
298 | mov eax,p2p
299 | mov ebx,p1p
300 | mov edx,s1p
301 | mov edi,n1p
302 | mov esi,n2p
303 |
304 |
305 | pxor xmm6,xmm6
306 |
307 | dec thresh
308 | movd xmm7,thresh
309 | punpcklbw xmm7,xmm7
310 | punpcklwd xmm7,xmm7
311 | punpckldq xmm7,xmm7
312 | punpcklqdq xmm7,xmm7
313 |
314 | yloop:
315 | xor ecx,ecx
316 | align 16
317 | xloop:
318 | movdqa xmm0,[eax+ecx]
319 | movdqa xmm2,[ebx+ecx]
320 | movdqa xmm1,xmm0
321 | movdqa xmm3,xmm2
322 | pminub xmm0,[edx+ecx]
323 | pmaxub xmm1,[edx+ecx]
324 | pminub xmm2,[edi+ecx]
325 | pmaxub xmm3,[edi+ecx]
326 | pminub xmm0,[esi+ecx]
327 | pmaxub xmm1,[esi+ecx]
328 |
329 | movdqa xmm4,xmm3
330 | movdqa xmm5,xmm1
331 | psubusb xmm4,xmm2
332 | psubusb xmm5,xmm0
333 | ; minus (thresh-1)
334 | psubusb xmm4,xmm7
335 | psubusb xmm5,xmm7
336 | ; minus 1
337 | psubusb xmm2,oword ptr onesByte
338 | psubusb xmm0,oword ptr onesByte
339 |
340 | psubusb xmm1,xmm2
341 | psubusb xmm3,xmm0
342 |
343 | pcmpeqb xmm1,xmm6
344 | pcmpeqb xmm3,xmm6
345 | pcmpeqb xmm4,xmm6
346 | pcmpeqb xmm5,xmm6
347 | mov eax,dstp
348 | por xmm1,xmm3
349 | pand xmm4,xmm5
350 | pand xmm1,xmm4
351 | movdqa [eax+ecx],xmm1
352 |
353 | add ecx,16
354 | mov eax,p2p
355 | cmp ecx,width_
356 | jl xloop
357 |
358 | mov eax,stride
359 | add ebx,stride
360 | add p2p,eax
361 | add edx,stride
362 | add edi,stride
363 | add dstp,eax
364 | add esi,stride
365 | mov eax,p2p
366 | dec height
367 | jnz yloop
368 |
369 | ret
370 |
371 | checkOscillation5_SSE2 endp
372 |
373 |
374 |
375 | calcAverages_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
376 |
377 | mov eax,s1p
378 | mov ebx,s2p
379 | mov edx,dstp
380 | mov edi,height
381 | mov esi,width_
382 |
383 | yloop:
384 | xor ecx,ecx
385 | align 16
386 | xloop:
387 | movdqa xmm0,[eax+ecx]
388 | pavgb xmm0,[ebx+ecx]
389 | movdqa [edx+ecx],xmm0
390 |
391 | add ecx,16
392 | cmp ecx,esi
393 | jl xloop
394 |
395 | add eax,stride
396 | add ebx,stride
397 | add edx,stride
398 | dec edi
399 | jnz yloop
400 |
401 | ret
402 |
403 | calcAverages_SSE2 endp
404 |
405 |
406 |
407 | checkAvgOscCorrelation_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,s3p:dword,s4p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
408 |
409 | mov eax,s1p
410 | mov ebx,s2p
411 | mov edx,s3p
412 | mov edi,s4p
413 | mov esi,dstp
414 |
415 | dec thresh
416 | movd xmm2, thresh
417 | punpcklbw xmm2, xmm2
418 | punpcklwd xmm2, xmm2
419 | punpckldq xmm2, xmm2
420 | punpcklqdq xmm2, xmm2
421 |
422 | pxor xmm3,xmm3
423 |
424 | yloop:
425 | xor ecx,ecx
426 | align 16
427 | xloop:
428 | movdqa xmm5,[eax+ecx]
429 | movdqa xmm0,xmm5
430 | movdqa xmm1,xmm5
431 | movdqa xmm5,[ebx+ecx]
432 | pminub xmm0,xmm5
433 | pmaxub xmm1,xmm5
434 | movdqa xmm5,[edx+ecx]
435 | pminub xmm0,xmm5
436 | pmaxub xmm1,xmm5
437 | movdqa xmm5,[edi+ecx]
438 | pminub xmm0,xmm5
439 | pmaxub xmm1,xmm5
440 | psubusb xmm1,xmm0
441 | movdqa xmm4,[esi+ecx]
442 | psubusb xmm1,xmm2
443 | pcmpeqb xmm1,xmm3
444 | pand xmm1,xmm4
445 | movdqa [esi+ecx],xmm1
446 |
447 | add ecx,16
448 | cmp ecx,width_
449 | jl xloop
450 |
451 | add eax,stride
452 | add ebx,stride
453 | add edx,stride
454 | add edi,stride
455 | add esi,stride
456 | dec height
457 | jnz yloop
458 |
459 | ret
460 |
461 | checkAvgOscCorrelation_SSE2 endp
462 |
463 |
464 |
465 | or3Masks_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,s3p:dword,dstp:dword,stride:dword,width_:dword,height:dword
466 |
467 | mov eax,s1p
468 | mov ebx,s2p
469 | mov edx,s3p
470 | mov edi,dstp
471 | mov esi,width_
472 |
473 | yloop:
474 | xor ecx,ecx
475 | align 16
476 | xloop:
477 | movdqa xmm0,[eax+ecx]
478 | por xmm0,[ebx+ecx]
479 | por xmm0,[edx+ecx]
480 | movdqa [edi+ecx],xmm0
481 |
482 | add ecx,16
483 | cmp ecx,esi
484 | jl xloop
485 |
486 | add eax,stride
487 | add ebx,stride
488 | add edx,stride
489 | add edi,stride
490 | dec height
491 | jnz yloop
492 |
493 | ret
494 |
495 | or3Masks_SSE2 endp
496 |
497 |
498 |
499 | orAndMasks_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
500 |
501 | mov eax,s1p
502 | mov ebx,s2p
503 | mov edx,dstp
504 | mov edi,width_
505 | mov esi,height
506 |
507 | yloop:
508 | xor ecx,ecx
509 | align 16
510 | xloop:
511 | movdqa xmm0,[eax+ecx]
512 | movdqa xmm1,[edx+ecx]
513 | pand xmm0,[ebx+ecx]
514 | por xmm1,xmm0
515 | movdqa [edx+ecx],xmm1
516 |
517 | add ecx,16
518 | cmp ecx,edi
519 | jl xloop
520 |
521 | add eax,stride
522 | add ebx,stride
523 | add edx,stride
524 | dec esi
525 | jnz yloop
526 |
527 | ret
528 |
529 | orAndMasks_SSE2 endp
530 |
531 |
532 |
533 | andMasks_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
534 |
535 | mov eax,s1p
536 | mov ebx,s2p
537 | mov edx,dstp
538 | mov edi,width_
539 | mov esi,height
540 |
541 | yloop:
542 | xor ecx,ecx
543 | align 16
544 | xloop:
545 | movdqa xmm0,[eax+ecx]
546 | pand xmm0,[ebx+ecx]
547 | movdqa [edx+ecx],xmm0
548 |
549 | add ecx,16
550 | cmp ecx,edi
551 | jl xloop
552 |
553 | add eax,stride
554 | add ebx,stride
555 | add edx,stride
556 | dec esi
557 | jnz yloop
558 |
559 | ret
560 |
561 | andMasks_SSE2 endp
562 |
563 |
564 |
565 | checkSceneChange_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,stride:dword,width_:dword,height:dword,diffp:dword
566 |
567 | mov eax,s1p
568 | mov edi,s2p
569 | mov esi,stride
570 | mov edx,width_
571 | pxor xmm1,xmm1
572 |
573 | yloop:
574 | xor ecx,ecx
575 | align 16
576 | xloop:
577 | movdqa xmm0,[eax+ecx]
578 | psadbw xmm0,[edi+ecx]
579 | paddd xmm1,xmm0
580 |
581 | add ecx,16
582 | cmp ecx,edx
583 | jl xloop
584 |
585 | add eax,esi
586 | add edi,esi
587 | dec height
588 | jnz yloop
589 |
590 | movdqa xmm2,xmm1
591 | psrldq xmm1,8
592 | paddd xmm2,xmm1
593 |
594 | mov eax, diffp
595 | movd DWORD PTR [eax],xmm2
596 |
597 | ret
598 |
599 | checkSceneChange_SSE2 endp
600 |
601 |
602 |
603 | VerticalBlur3_SSE2 proc public uses ebx esi edi srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
604 |
605 | mov eax,srcp
606 | mov ebx,dstp
607 | mov edx,stride
608 | mov esi,eax
609 | mov edi,eax
610 | sub esi,edx
611 | add edi,edx
612 | mov edx,width_
613 |
614 | ; 0x0002,for rounding
615 | pcmpeqb xmm6,xmm6
616 | psrlw xmm6,15
617 | psllw xmm6,1
618 | pxor xmm7,xmm7
619 |
620 | xor ecx,ecx
621 |
622 | toploop:
623 | movdqa xmm0,[eax+ecx]
624 | pavgb xmm0,[edi+ecx]
625 | movdqa [ebx+ecx],xmm0
626 |
627 | add ecx,16
628 | cmp ecx,edx
629 | jl toploop
630 |
631 | add esi,stride
632 | add eax,stride
633 | add edi,stride
634 | add ebx,stride
635 | sub height,2 ; the main loop processes 2 lines fewer than the height
636 |
637 | yloop:
638 | xor ecx,ecx
639 | xloop:
640 | movdqa xmm0,[esi+ecx]
641 | movdqa xmm1,[eax+ecx]
642 | movdqa xmm2,[edi+ecx]
643 | movdqa xmm3,xmm0
644 | movdqa xmm4,xmm1
645 | movdqa xmm5,xmm2
646 | punpcklbw xmm0,xmm7
647 | punpcklbw xmm1,xmm7
648 | punpcklbw xmm2,xmm7
649 | punpckhbw xmm3,xmm7
650 | punpckhbw xmm4,xmm7
651 | punpckhbw xmm5,xmm7
652 |
653 | ; add bottom to top
654 | paddw xmm0,xmm2
655 | paddw xmm3,xmm5
656 |
657 | ; multiply center by 2
658 | psllw xmm1,1
659 | psllw xmm4,1
660 |
661 | ; add center to sum
662 | paddw xmm0,xmm1
663 | paddw xmm3,xmm4
664 |
665 | ; add 2 to sum
666 | paddw xmm0,xmm6
667 | paddw xmm3,xmm6
668 |
669 | ; divide by 4
670 | psrlw xmm0,2
671 | psrlw xmm3,2
672 | packuswb xmm0,xmm3
673 | movdqa [ebx+ecx],xmm0
674 |
675 | add ecx,16
676 | cmp ecx,edx
677 | jl xloop
678 |
679 | add esi,stride
680 | add eax,stride
681 | add edi,stride
682 | add ebx,stride
683 | dec height
684 | jnz yloop
685 |
686 | xor ecx,ecx
687 |
688 | bottomloop:
689 | movdqa xmm0,[esi+ecx]
690 | pavgb xmm0,[eax+ecx]
691 | movdqa [ebx+ecx],xmm0
692 |
693 | add ecx,16
694 | cmp ecx,edx
695 | jl bottomloop
696 |
697 | ret
698 |
699 | VerticalBlur3_SSE2 endp
700 |
701 |
702 |
703 | HorizontalBlur3_SSE2 proc public srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
704 |
705 | mov eax,srcp
706 | mov edx,dstp
707 | pxor xmm7,xmm7
708 | ; 0x0002,for rounding
709 | pcmpeqb xmm6,xmm6
710 | psrlw xmm6,15
711 | psllw xmm6,1
712 |
713 | yloop:
714 | xor ecx,ecx
715 | align 16
716 | xloop:
717 | movdqu xmm0,[eax+ecx-1]
718 | movdqa xmm1,[eax+ecx]
719 | movdqu xmm2,[eax+ecx+1]
720 | movdqa xmm3,xmm0
721 | movdqa xmm4,xmm1
722 | movdqa xmm5,xmm2
723 | punpcklbw xmm0,xmm7
724 | punpcklbw xmm1,xmm7
725 | punpcklbw xmm2,xmm7
726 | punpckhbw xmm3,xmm7
727 | punpckhbw xmm4,xmm7
728 | punpckhbw xmm5,xmm7
729 | ; center * 2
730 | psllw xmm1,1
731 | psllw xmm4,1
732 | paddw xmm1,xmm0
733 | paddw xmm4,xmm3
734 | paddw xmm1,xmm2
735 | paddw xmm4,xmm5
736 |
737 | ; add 2 to sum
738 | paddw xmm1,xmm6
739 | paddw xmm4,xmm6
740 |
741 | ; divide by 4
742 | psrlw xmm1,2
743 | psrlw xmm4,2
744 | packuswb xmm1,xmm4
745 | movdqa [edx+ecx],xmm1
746 |
747 | add ecx,16
748 | cmp ecx,width_
749 | jl xloop
750 |
751 | add eax,stride
752 | add edx,stride
753 | dec height
754 | jnz yloop
755 |
756 | ret
757 |
758 | HorizontalBlur3_SSE2 endp
759 |
760 |
761 |
762 | HorizontalBlur6_SSE2 proc public srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
763 |
764 | mov eax,srcp
765 | mov edx,dstp
766 | movdqu xmm6,oword ptr sixsMask_W
767 | pxor xmm7,xmm7
768 |
769 | yloop:
770 | xor ecx,ecx
771 | align 16
772 | xloop:
773 | movdqu xmm0,[eax+ecx-2]
774 | movdqu xmm1,[eax+ecx+2]
775 | movdqa xmm2,xmm0
776 | movdqa xmm3,xmm1
777 | punpcklbw xmm0,xmm7
778 | punpcklbw xmm1,xmm7
779 | punpckhbw xmm2,xmm7
780 | punpckhbw xmm3,xmm7
781 |
782 | ; srcp[x-2] + srcp[x+2]
783 | paddw xmm0,xmm1
784 | paddw xmm2,xmm3
785 |
786 | ; srcp[x-1] + srcp[x+1]
787 | movdqu xmm1,[eax+ecx-1]
788 | movdqu xmm3,[eax+ecx+1]
789 | movdqa xmm4,xmm1
790 | movdqa xmm5,xmm3
791 | punpcklbw xmm1,xmm7
792 | punpcklbw xmm3,xmm7
793 | punpckhbw xmm4,xmm7
794 | punpckhbw xmm5,xmm7
795 | paddw xmm1,xmm3
796 | paddw xmm4,xmm5
797 |
798 | ; (srcp[x-1 + srcp[x+])*4
799 | psllw xmm1,2
800 | psllw xmm4,2
801 |
802 | ; (srcp[x-1 + srcp[x+])*4 + srcp[x-2] + srcp[x+2]
803 | paddw xmm0,xmm1
804 | paddw xmm2,xmm4
805 |
806 | ; srcp[x] * 6
807 | movdqa xmm1,[eax+ecx]
808 | movdqu xmm5,oword ptr eightsMask_W
809 | movdqa xmm3,xmm1
810 | punpcklbw xmm1,xmm7
811 | punpckhbw xmm3,xmm7
812 | pmullw xmm1,xmm6
813 | pmullw xmm3,xmm6
814 | paddw xmm0,xmm1
815 | paddw xmm2,xmm3
816 |
817 | ; add 8
818 | paddw xmm0,xmm5
819 | paddw xmm2,xmm5
820 |
821 | ; divide by 16
822 | psrlw xmm0,4
823 | psrlw xmm2,4
824 | packuswb xmm0,xmm2
825 | movdqa [edx+ecx],xmm0
826 |
827 | add ecx,16
828 | cmp ecx,width_
829 | jl xloop
830 |
831 | add eax,stride
832 | add edx,stride
833 | dec height
834 | jnz yloop
835 |
836 | ret
837 |
838 | HorizontalBlur6_SSE2 endp
839 |
840 |
841 |
842 | end
--------------------------------------------------------------------------------
/TComb/TComb_asm_x64.asm:
--------------------------------------------------------------------------------
1 | .code
2 |
3 | ;buildFinalMask_SSE2 proc s1p:dword,s2p:dword,m1p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
4 | ; s1p = rcx
5 | ; s2p = rdx
6 | ; m1p = r8
7 | ; dstp = r9
8 |
9 | buildFinalMask_SSE2 proc public frame
10 |
11 | stride equ dword ptr [rbp+48]
12 | width_ equ dword ptr [rbp+56]
13 | height equ dword ptr [rbp+64]
14 | thresh equ dword ptr [rbp+72]
15 |
16 | push rbp
17 | .pushreg rbp
18 | mov rbp,rsp
19 | push rbx
20 | .pushreg rbx
21 | push rsi
22 | .pushreg rsi
23 | push rdi
24 | .pushreg rdi
25 | .endprolog
26 |
27 | mov rax,rcx
28 | mov rbx,rdx
29 | mov rdx,r8
30 | mov rsi,r9
31 | movsxd r8,stride
32 | xor rdi,rdi
33 | mov edi,width_
34 | xor r9,r9
35 | mov r9d,height
36 | mov r10,16
37 |
38 | dec thresh
39 | movd xmm4,thresh
40 | punpcklbw xmm4, xmm4
41 | punpcklwd xmm4, xmm4
42 | punpckldq xmm4, xmm4
43 | punpcklqdq xmm4, xmm4
44 | pxor xmm5,xmm5
45 |
46 | yloop:
47 | xor rcx,rcx
48 | xloop:
49 | movdqa xmm0,[rax+rcx]
50 | movdqa xmm1,[rbx+rcx]
51 | movdqa xmm2,xmm0
52 | psubusb xmm0,xmm1
53 | psubusb xmm1,xmm2
54 | por xmm0,xmm1
55 | psubusb xmm0,xmm4
56 | pcmpeqb xmm0,xmm5
57 | pand xmm0,[rdx+rcx]
58 | movdqa [rsi+rcx],xmm0
59 |
60 | add rcx,r10
61 | cmp rcx,rdi
62 | jl xloop
63 |
64 | add rax,r8
65 | add rbx,r8
66 | add rdx,r8
67 | add rsi,r8
68 | dec r9
69 | jnz yloop
70 |
71 | pop rdi
72 | pop rsi
73 | pop rbx
74 | pop rbp
75 |
76 | ret
77 |
78 | buildFinalMask_SSE2 endp
79 |
80 |
81 |
82 | ;andNeighborsInPlace_SSE2 proc srcp:dword,stride:dword,width_:dword,height:dword
83 | ; srcp = rcx
84 | ; stride = rdx
85 | ; width_ = r8d
86 | ; height = r9d
87 |
88 | andNeighborsInPlace_SSE2 proc public frame
89 |
90 | push rbp
91 | .pushreg rbp
92 | mov rbp,rsp
93 | push rsi
94 | .pushreg rsi
95 | push rdi
96 | .pushreg rdi
97 | .endprolog
98 |
99 | mov rax,rcx
100 | xchg r8,rdx
101 | movsxd r8,r8d
102 | mov rsi,rax
103 | mov rdi,rax
104 | sub rsi,r8
105 | add rdi,r8
106 | mov r10,16
107 |
108 | yloop:
109 | xor rcx,rcx
110 | xloop:
111 | movdqa xmm0,[rsi+rcx]
112 | movdqu xmm1,[rsi+rcx-1]
113 | por xmm0,xmm1
114 | movdqu xmm1,[rsi+rcx+1]
115 | por xmm0,xmm1
116 | movdqa xmm1,[rax+rcx]
117 | movdqu xmm2,[rdi+rcx-1]
118 | por xmm0,xmm2
119 | por xmm0,[rdi+rcx]
120 | movdqu xmm2,[rdi+rcx+1]
121 | por xmm0,xmm2
122 | pand xmm0,xmm1
123 | movdqa [rax+rcx],xmm0
124 |
125 | add rcx,r10
126 | cmp rcx,rdx
127 | jl xloop
128 |
129 | add rax,r8
130 | add rsi,r8
131 | add rdi,r8
132 | dec r9d
133 | jnz yloop
134 |
135 | pop rdi
136 | pop rsi
137 | pop rbp
138 |
139 | ret
140 |
141 | andNeighborsInPlace_SSE2 endp
142 |
143 |
144 |
145 | ;absDiff_SSE2 proc srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword
146 | ; srcp1 = rcx
147 | ; srcp2 = rdx
148 | ; dstp = r8
149 | ; stride = r9d
150 |
151 | absDiff_SSE2 proc public frame
152 |
153 | width_ equ dword ptr [rbp+48]
154 | height equ dword ptr [rbp+56]
155 |
156 | push rbp
157 | .pushreg rbp
158 | mov rbp,rsp
159 | push rbx
160 | .pushreg rbx
161 | push rsi
162 | .pushreg rsi
163 | .endprolog
164 |
165 | mov rax,rcx
166 | mov rsi,rdx
167 | mov rbx,r8
168 | movsxd r8,r9d
169 | xor rdx,rdx
170 | mov edx,width_
171 | xor r9,r9
172 | mov r9d,height
173 | mov r10,16
174 |
175 | yloop:
176 | xor rcx,rcx
177 | xloop:
178 | movdqa xmm0,[rax+rcx]
179 | movdqa xmm1,[rsi+rcx]
180 | movdqa xmm2,xmm0
181 | psubusb xmm0,xmm1
182 | psubusb xmm1,xmm2
183 | por xmm0,xmm1
184 | movdqa [rbx+rcx],xmm0
185 |
186 | add rcx,r10
187 | cmp rcx,rdx
188 | jl xloop
189 |
190 | add rax,r8
191 | add rsi,r8
192 | add rbx,r8
193 | dec r9d
194 | jnz yloop
195 |
196 | pop rsi
197 | pop rbx
198 | pop rbp
199 |
200 | ret
201 |
202 | absDiff_SSE2 endp
203 |
204 |
205 |
206 | ;absDiffAndMinMask_SSE2 proc srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword
207 | ; srcp1 = rcx
208 | ; srcp2 = rdx
209 | ; dstp = r8
210 | ; stride = r9d
211 |
212 | absDiffAndMinMask_SSE2 proc public frame
213 |
214 | width_ equ dword ptr [rbp+48]
215 | height equ dword ptr [rbp+56]
216 |
217 | push rbp
218 | .pushreg rbp
219 | mov rbp,rsp
220 | push rbx
221 | .pushreg rbx
222 | push rsi
223 | .pushreg rsi
224 | push rdi
225 | .pushreg rdi
226 | .endprolog
227 |
228 | mov rax,rcx
229 | mov rsi,rdx
230 | mov rbx,r8
231 | movsxd r8,r9d
232 | xor rdx,rdx
233 | mov edx,width_
234 | xor rdi,rdi
235 | mov edi,height
236 | mov r10,16
237 |
238 | yloop:
239 | xor rcx,rcx
240 | xloop:
241 | movdqa xmm0,[rax+rcx]
242 | movdqa xmm1,[rsi+rcx]
243 | movdqa xmm2,xmm0
244 | psubusb xmm0,xmm1
245 | psubusb xmm1,xmm2
246 | por xmm0,xmm1
247 | pminub xmm0,[rbx+rcx]
248 | movdqa [rbx+rcx],xmm0
249 |
250 | add rcx,r10
251 | cmp rcx,rdx
252 | jl xloop
253 |
254 | add rax,r8
255 | add rsi,r8
256 | add rbx,r8
257 | dec edi
258 | jnz yloop
259 |
260 | pop rdi
261 | pop rsi
262 | pop rbx
263 | pop rbp
264 |
265 | ret
266 |
267 | absDiffAndMinMask_SSE2 endp
268 |
269 |
270 |
271 | ;absDiffAndMinMaskThresh_SSE2 proc srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
272 | ; srcp1 = rcx
273 | ; srcp2 = rdx
274 | ; dstp = r8
275 | ; stride = r9d
276 |
277 | absDiffAndMinMaskThresh_SSE2 proc public frame
278 |
279 | width_ equ dword ptr [rbp+48]
280 | height equ dword ptr [rbp+56]
281 | thresh equ dword ptr [rbp+64]
282 |
283 | push rbp
284 | .pushreg rbp
285 | mov rbp,rsp
286 | push rbx
287 | .pushreg rbx
288 | push rsi
289 | .pushreg rsi
290 | push rdi
291 | .pushreg rdi
292 | .endprolog
293 |
294 | mov rax,rcx
295 | mov rsi,rdx
296 | mov rbx,r8
297 | movsxd r8,r9d
298 | xor rdx,rdx
299 | mov edx,width_
300 | xor rdi,rdi
301 | mov edi,height
302 | dec thresh
303 | movd xmm3,thresh
304 | punpcklbw xmm3,xmm3
305 | punpcklwd xmm3,xmm3
306 | punpckldq xmm3,xmm3
307 | punpcklqdq xmm3,xmm3
308 | pxor xmm4,xmm4
309 | mov r10,16
310 |
311 | yloop:
312 | xor rcx,rcx
313 | xloop:
314 | movdqa xmm0,[rax+rcx]
315 | movdqa xmm1,[rsi+rcx]
316 | movdqa xmm2,xmm0
317 | psubusb xmm0,xmm1
318 | psubusb xmm1,xmm2
319 | por xmm0,xmm1
320 | pminub xmm0,[rbx+rcx]
321 | psubusb xmm0,xmm3
322 | pcmpeqb xmm0,xmm4
323 | movdqa [rbx+rcx],xmm0
324 |
325 | add rcx,r10
326 | cmp rcx,rdx
327 | jl xloop
328 |
329 | add rax,r8
330 | add rsi,r8
331 | add rbx,r8
332 | dec edi
333 | jnz yloop
334 |
335 | pop rdi
336 | pop rsi
337 | pop rbx
338 | pop rbp
339 |
340 | ret
341 |
342 | absDiffAndMinMaskThresh_SSE2 endp
343 |
344 |
345 |
346 | ;MinMax_SSE2 proc srcp:dword,minp:dword,maxp:dword,src_stride:dword,min_stride:dword,width_:dword,height:dword,thresh:dword
347 | ; srcp = rcx
348 | ; minp = edx
349 | ; maxp = r8d
350 | ; src_stride = r9d
351 |
352 | MinMax_SSE2 proc public frame
353 |
354 | min_stride equ dword ptr [rbp+48]
355 | width_ equ dword ptr [rbp+56]
356 | height equ dword ptr [rbp+64]
357 | thresh equ dword ptr [rbp+72]
358 |
359 | push rbp
360 | .pushreg rbp
361 | mov rbp,rsp
362 | push rbx
363 | .pushreg rbx
364 | push rsi
365 | .pushreg rsi
366 | push rdi
367 | .pushreg rdi
368 | push r12
369 | .pushreg r12
370 | .endprolog
371 |
372 | mov rax,rcx
373 | mov rsi,rax
374 | mov rdi,rax
375 | mov rbx,rdx
376 | mov rdx,r8
377 | movsxd r8,r9d
378 | movsxd r9,min_stride
379 | mov r10d,width_
380 | mov r11d,height
381 | mov r12,16
382 | sub rsi,r8
383 | add rdi,r8
384 |
385 | movd xmm3,thresh
386 | punpcklbw xmm3,xmm3
387 | punpcklwd xmm3,xmm3
388 | punpckldq xmm3,xmm3
389 | punpcklqdq xmm3,xmm3
390 |
391 | yloop:
392 | xor rcx,rcx
393 | xloop:
394 | ; srcp-1 is aligned because the pointer passed to this function is srcp+stride+1.
395 | movdqa xmm0,[rsi+rcx-1]
396 | movdqa xmm1,xmm0
397 | movdqu xmm2,[rsi+rcx]
398 | pminub xmm0,xmm2
399 | pmaxub xmm1,xmm2
400 | movdqu xmm2,[rsi+rcx+1]
401 | pminub xmm0,xmm2
402 | pmaxub xmm1,xmm2
403 | movdqa xmm2,[rax+rcx-1]
404 | pminub xmm0,xmm2
405 | pmaxub xmm1,xmm2
406 | movdqu xmm2,[rax+rcx]
407 | pminub xmm0,xmm2
408 | pmaxub xmm1,xmm2
409 | movdqu xmm2,[rax+rcx+1]
410 | pminub xmm0,xmm2
411 | pmaxub xmm1,xmm2
412 | movdqa xmm2,[rdi+rcx-1]
413 | pminub xmm0,xmm2
414 | pmaxub xmm1,xmm2
415 | movdqu xmm2,[rdi+rcx]
416 | pminub xmm0,xmm2
417 | pmaxub xmm1,xmm2
418 | movdqu xmm2,[rdi+rcx+1]
419 | pminub xmm0,xmm2
420 | pmaxub xmm1,xmm2
421 | psubusb xmm0,xmm3
422 | paddusb xmm1,xmm3
423 | movdqa [rbx+rcx],xmm0
424 | movdqa [rdx+rcx],xmm1
425 |
426 | add rcx,r12
427 | cmp rcx,r10
428 | jl xloop
429 |
430 | add rsi,r8
431 | add rax,r8
432 | add rdi,r8
433 | add rbx,r9
434 | add rdx,r9
435 | dec r11d
436 | jnz yloop
437 |
438 | pop r12
439 | pop rdi
440 | pop rsi
441 | pop rbx
442 | pop rbp
443 |
444 | ret
445 |
446 | MinMax_SSE2 endp
447 |
448 |
449 |
450 | ;checkOscillation5_SSE2 proc p2p:dword,p1p:dword,s1p:dword,n1p:dword,n2p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
451 | ; p2p = rcx
452 | ; p1p = rdx
453 | ; s1p = r8
454 | ; n1p = r9
455 |
456 | checkOscillation5_SSE2 proc public frame
457 |
458 | n2p equ qword ptr [rbp+48]
459 | dstp equ qword ptr [rbp+56]
460 | stride equ dword ptr [rbp+64]
461 | width_ equ dword ptr [rbp+72]
462 | height equ dword ptr [rbp+80]
463 | thresh equ dword ptr [rbp+88]
464 |
465 | push rbp
466 | .pushreg rbp
467 | mov rbp,rsp
468 | push rbx
469 | .pushreg rbx
470 | push rsi
471 | .pushreg rsi
472 | push rdi
473 | .pushreg rdi
474 | push r12
475 | .pushreg r12
476 | sub rsp,64
477 | .allocstack 64
478 | movdqu oword ptr[rsp],xmm6
479 | .savexmm128 xmm6,0
480 | movdqu oword ptr[rsp+16],xmm7
481 | .savexmm128 xmm7,16
482 | movdqu oword ptr[rsp+32],xmm8
483 | .savexmm128 xmm8,32
484 | movdqu oword ptr[rsp+48],xmm9
485 | .savexmm128 xmm9,48
486 | .endprolog
487 |
488 | mov rax,rcx ; p2p
489 | mov rbx,rdx ; p1p
490 | mov rdx,r8 ; s1p
491 | mov rdi,r9 ; n1p
492 | mov rsi,n2p ; n2p
493 | mov r8,dstp
494 | movsxd r9,stride
495 | mov r10d,width_
496 | mov r11d,height
497 | mov r12,16
498 |
499 | pxor xmm6,xmm6
500 |
501 | ; trick:
502 | ; x x<=(thresh-1) ==> x-(thresh-1)<=0 ==> sub_sat(x,thresh-1)==0
503 | ; pcmpeqb(psubusb(x,thresh-1),zero): 0xFF where x max22) || max22 == 0 || (max31 < min22) || max31 == 0) &&
542 | ; max31 - min31 < thresh && max22 - min22 < thresh)
543 | ; No check for (max22 == 0) or (max31 == 0), like in C, sub_sat handles automatically
544 |
545 | movdqa xmm4,xmm3 ; max22
546 | movdqa xmm5,xmm1 ; max31
547 | psubusb xmm4,xmm2 ; max22-min22
548 | psubusb xmm5,xmm0 ; max31-min31
549 | ; minus (thresh-1)
550 | psubusb xmm4,xmm7 ; max22-min22 - (thresh-1)
551 | psubusb xmm5,xmm7 ; max31-min31 - (thresh-1)
552 |
553 | ; minus 1
554 | psubusb xmm2,xmm9 ; min22-1
555 | psubusb xmm0,xmm9 ; min31-1
556 |
557 | psubusb xmm1,xmm2 ; max31 - (min22-1)
558 | psubusb xmm3,xmm0 ; max22 - (min31-1)
559 |
560 | pcmpeqb xmm1,xmm6
561 | pcmpeqb xmm3,xmm6
562 | pcmpeqb xmm4,xmm6
563 | pcmpeqb xmm5,xmm6
564 | por xmm1,xmm3
565 | pand xmm4,xmm5
566 | pand xmm1,xmm4
567 | movdqa [r8+rcx],xmm1
568 |
569 | add rcx,r12
570 | cmp rcx,r10
571 | jl xloop
572 |
573 | add rax,r9
574 | add rbx,r9
575 | add rdx,r9
576 | add rdi,r9
577 | add rsi,r9
578 | add r8,r9
579 | dec r11d
580 | jnz yloop
581 |
582 | movdqu xmm9,oword ptr[rsp+48]
583 | movdqu xmm8,oword ptr[rsp+32]
584 | movdqu xmm7,oword ptr[rsp+16]
585 | movdqu xmm6,oword ptr[rsp]
586 | add rsp,64
587 | pop r12
588 | pop rdi
589 | pop rsi
590 | pop rbx
591 | pop rbp
592 |
593 | ret
594 |
595 | checkOscillation5_SSE2 endp
596 |
597 |
598 |
599 | ;calcAverages_SSE2 proc s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
600 | ; s1p = rcx
601 | ; s2p = rdx
602 | ; dstp = r8
603 | ; stride = r9d
604 |
605 | calcAverages_SSE2 proc public frame
606 |
607 | width_ equ dword ptr [rbp+48]
608 | height equ dword ptr [rbp+56]
609 |
610 | push rbp
611 | .pushreg rbp
612 | mov rbp,rsp
613 | push rbx
614 | .pushreg rbx
615 | push rsi
616 | .pushreg rsi
617 | push rdi
618 | .pushreg rdi
619 | .endprolog
620 |
621 | mov rax,rcx
622 | mov rbx,rdx
623 | mov rdx,r8
624 | movsxd r8,r9d
625 | xor rdi,rdi
626 | mov edi,height
627 | xor rsi,rsi
628 | mov esi,width_
629 | mov r10,16
630 |
631 | yloop:
632 | xor rcx,rcx
633 | xloop:
634 | movdqa xmm0,[rax+rcx]
635 | pavgb xmm0,[rbx+rcx]
636 | movdqa [rdx+rcx],xmm0
637 |
638 | add rcx,r10
639 | cmp rcx,rsi
640 | jl xloop
641 |
642 | add rax,r8
643 | add rbx,r8
644 | add rdx,r8
645 | dec edi
646 | jnz yloop
647 |
648 | pop rdi
649 | pop rsi
650 | pop rbx
651 | pop rbp
652 |
653 | ret
654 |
655 | calcAverages_SSE2 endp
656 |
657 |
658 |
659 | ;checkAvgOscCorrelation_SSE2 proc s1p:dword,s2p:dword,s3p:dword,s4p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
660 | ; s1p = rcx
661 | ; s2p = rdx
662 | ; s3p = r8
663 | ; s4p = r9
664 |
665 | checkAvgOscCorrelation_SSE2 proc public frame
666 |
667 | dstp equ qword ptr [rbp+48]
668 | stride equ dword ptr [rbp+56]
669 | width_ equ dword ptr [rbp+64]
670 | height equ dword ptr [rbp+72]
671 | thresh equ dword ptr [rbp+80]
672 |
673 | push rbp
674 | .pushreg rbp
675 | mov rbp,rsp
676 | push rbx
677 | .pushreg rbx
678 | push rsi
679 | .pushreg rsi
680 | push rdi
681 | .pushreg rdi
682 | .endprolog
683 |
684 | mov rax,rcx
685 | mov rbx,rdx
686 | mov rdx,r8
687 | mov rdi,r9
688 | mov rsi,dstp
689 | movsxd r8,stride
690 | xor r9,r9
691 | mov r9d,width_
692 | mov r10d,height
693 | mov r11,16
694 |
695 | dec thresh
696 | movd xmm2, thresh
697 | punpcklbw xmm2, xmm2
698 | punpcklwd xmm2, xmm2
699 | punpckldq xmm2, xmm2
700 | punpcklqdq xmm2, xmm2
701 |
702 | pxor xmm3,xmm3
703 |
704 | yloop:
705 | xor rcx,rcx
706 | xloop:
707 | movdqa xmm5,[rax+rcx]
708 | movdqa xmm0,xmm5
709 | movdqa xmm1,xmm5
710 | movdqa xmm5,[rbx+rcx]
711 | pminub xmm0,xmm5
712 | pmaxub xmm1,xmm5
713 | movdqa xmm5,[rdx+rcx]
714 | pminub xmm0,xmm5
715 | pmaxub xmm1,xmm5
716 | movdqa xmm5,[rdi+rcx]
717 | pminub xmm0,xmm5
718 | pmaxub xmm1,xmm5
719 | psubusb xmm1,xmm0
720 | movdqa xmm4,[rsi+rcx]
721 | psubusb xmm1,xmm2
722 | pcmpeqb xmm1,xmm3
723 | pand xmm1,xmm4
724 | movdqa [rsi+rcx],xmm1
725 |
726 | add rcx,r11
727 | cmp rcx,r9
728 | jl xloop
729 |
730 | add rax,r8
731 | add rbx,r8
732 | add rdx,r8
733 | add rdi,r8
734 | add rsi,r8
735 | dec r10d
736 | jnz yloop
737 |
738 | pop rdi
739 | pop rsi
740 | pop rbx
741 | pop rbp
742 |
743 | ret
744 |
745 | checkAvgOscCorrelation_SSE2 endp
746 |
747 |
748 |
749 | ;or3Masks_SSE2 proc s1p:dword,s2p:dword,s3p:dword,dstp:dword,stride:dword,width_:dword,height:dword
750 | ; s1p = rcx
751 | ; s2p = rdx
752 | ; s3p = r8
753 | ; dstp = r9
754 |
755 | or3Masks_SSE2 proc public frame
756 |
757 | stride equ dword ptr [rbp+48]
758 | width_ equ dword ptr [rbp+56]
759 | height equ dword ptr [rbp+64]
760 |
761 | push rbp
762 | .pushreg rbp
763 | mov rbp,rsp
764 | push rbx
765 | .pushreg rbx
766 | push rsi
767 | .pushreg rsi
768 | push rdi
769 | .pushreg rdi
770 | .endprolog
771 |
772 | mov rax,rcx
773 | mov rbx,rdx
774 | mov rdx,r8
775 | mov rdi,r9
776 | movsxd r8,stride
777 | xor rsi,rsi
778 | mov esi,width_
779 | xor r9,r9
780 | mov r9d,height
781 | mov r10,16
782 |
783 | yloop:
784 | xor rcx,rcx
785 | xloop:
786 | movdqa xmm0,[rax+rcx]
787 | por xmm0,[rbx+rcx]
788 | por xmm0,[rdx+rcx]
789 | movdqa [rdi+rcx],xmm0
790 |
791 | add rcx,r10
792 | cmp rcx,rsi
793 | jl xloop
794 |
795 | add rax,r8
796 | add rbx,r8
797 | add rdx,r8
798 | add rdi,r8
799 | dec r9d
800 | jnz yloop
801 |
802 | pop rdi
803 | pop rsi
804 | pop rbx
805 | pop rbp
806 |
807 | ret
808 |
809 | or3Masks_SSE2 endp
810 |
811 |
812 |
813 | ;orAndMasks_SSE2 proc s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
814 | ; s1p = rcx
815 | ; s2p = rdx
816 | ; dstp = r8
817 | ; stride = r9d
818 |
819 | orAndMasks_SSE2 proc public frame
820 |
821 | width_ equ dword ptr [rbp+48]
822 | height equ dword ptr [rbp+56]
823 |
824 | push rbp
825 | .pushreg rbp
826 | mov rbp,rsp
827 | push rbx
828 | .pushreg rbx
829 | push rsi
830 | .pushreg rsi
831 | push rdi
832 | .pushreg rdi
833 | .endprolog
834 |
835 | mov rax,rcx
836 | mov rbx,rdx
837 | mov rdx,r8
838 | movsxd r8,r9d
839 | xor rdi,rdi
840 | mov edi,width_
841 | xor rsi,rsi
842 | mov esi,height
843 | mov r10,16
844 |
845 | yloop:
846 | xor rcx,rcx
847 | xloop:
848 | movdqa xmm0,[rax+rcx]
849 | movdqa xmm1,[rdx+rcx]
850 | pand xmm0,[rbx+rcx]
851 | por xmm1,xmm0
852 | movdqa [rdx+rcx],xmm1
853 | add rcx,16
854 | cmp rcx,rdi
855 | jl xloop
856 |
857 | add rax,r8
858 | add rbx,r8
859 | add rdx,r8
860 | dec esi
861 | jnz yloop
862 |
863 | pop rdi
864 | pop rsi
865 | pop rbx
866 | pop rbp
867 |
868 | ret
869 |
870 | orAndMasks_SSE2 endp
871 |
872 |
873 |
874 | ;andMasks_SSE2 proc s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
875 | ; s1p = rcx
876 | ; s2p = rdx
877 | ; dstp = r8
878 | ; stride = r9d
879 |
880 | andMasks_SSE2 proc public frame
881 |
882 | width_ equ dword ptr [rbp+48]
883 | height equ dword ptr [rbp+56]
884 |
885 | push rbp
886 | .pushreg rbp
887 | mov rbp,rsp
888 | push rbx
889 | .pushreg rbx
890 | push rsi
891 | .pushreg rsi
892 | push rdi
893 | .pushreg rdi
894 | .endprolog
895 |
896 | mov rax,rcx
897 | mov rbx,rdx
898 | mov rdx,r8
899 | movsxd r8,r9d
900 | xor rdi,rdi
901 | mov edi,width_
902 | xor rsi,rsi
903 | mov esi,height
904 | mov r10,16
905 |
906 | yloop:
907 | xor rcx,rcx
908 | xloop:
909 | movdqa xmm0,[rax+rcx]
910 | pand xmm0,[rbx+rcx]
911 | movdqa [rdx+rcx],xmm0
912 | add rcx,r10
913 | cmp rcx,rdi
914 | jl xloop
915 |
916 | add rax,r8
917 | add rbx,r8
918 | add rdx,r8
919 | dec esi
920 | jnz yloop
921 |
922 | pop rdi
923 | pop rsi
924 | pop rbx
925 | pop rbp
926 |
927 | ret
928 |
929 | andMasks_SSE2 endp
930 |
931 |
932 |
933 | ;checkSceneChange_SSE2 proc s1p:dword,s2p:dword,stride:dword,width_:dword,height:dword,diffp:dword
934 | ; s1p = rcx
935 | ; s2p = rdx
936 | ; stride = r8d
937 | ; width_ = r9d
938 |
939 | checkSceneChange_SSE2 proc public frame
940 |
941 | height equ dword ptr [rbp+48]
942 | diffp equ qword ptr [rbp+56]
943 |
944 | push rbp
945 | .pushreg rbp
946 | mov rbp,rsp
947 | push rsi
948 | .pushreg rsi
949 | .endprolog
950 |
951 | mov rax,rcx
952 | mov rsi,rdx
953 | movsxd r8,r8d
954 | xor rdx,rdx
955 | mov edx,r9d
956 | xor r9,r9
957 | mov r9d,height
958 | mov r10,16
959 |
960 | pxor xmm1,xmm1
961 |
962 | yloop:
963 | xor rcx,rcx
964 | xloop:
965 | movdqa xmm0,[rax+rcx]
966 | psadbw xmm0,[rsi+rcx]
967 | paddq xmm1,xmm0
968 |
969 | add rcx,r10
970 | cmp rcx,rdx
971 | jl xloop
972 |
973 | add rax,r8
974 | add rsi,r8
975 | dec r9d
976 | jnz yloop
977 |
978 |
979 | movdqa xmm2,xmm1
980 | psrldq xmm1,8
981 | paddq xmm2,xmm1
982 |
983 | mov rax,diffp
984 | movd QWORD PTR [rax],xmm2
985 |
986 | pop rsi
987 | pop rbp
988 |
989 | ret
990 |
991 | checkSceneChange_SSE2 endp
992 |
993 |
994 |
995 | ;VerticalBlur3_SSE2 proc srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
996 | ; srcp = rcx
997 | ; dstp = rdx
998 | ; stride = r8d
999 | ; width_ = r9d
1000 |
1001 | VerticalBlur3_SSE2 proc public frame
1002 |
1003 | height equ dword ptr [rbp+48]
1004 |
1005 | push rbp
1006 | .pushreg rbp
1007 | mov rbp,rsp
1008 | push rbx
1009 | .pushreg rbx
1010 | push rsi
1011 | .pushreg rsi
1012 | push rdi
1013 | .pushreg rdi
1014 |
1015 | sub rsp,32
1016 | .allocstack 32
1017 | movdqu oword ptr[rsp],xmm6
1018 | .savexmm128 xmm6,0
1019 | movdqu oword ptr[rsp+16],xmm7
1020 | .savexmm128 xmm7,16
1021 | .endprolog
1022 |
1023 | mov rax,rcx
1024 | mov rbx,rdx
1025 | movsxd r8,r8d
1026 | mov rsi,rax
1027 | mov rdi,rax
1028 | sub rsi,r8
1029 | add rdi,r8
1030 | xor rdx,rdx
1031 | mov edx,r9d
1032 | xor r9,r9
1033 | mov r9d,height
1034 | mov r10,2
1035 | mov r11,16
1036 |
1037 | ; 0x0002,for rounding
1038 | pcmpeqb xmm6,xmm6
1039 | psrlw xmm6,15
1040 | psllw xmm6,1
1041 |
1042 | pxor xmm7,xmm7
1043 |
1044 | xor rcx,rcx
1045 |
1046 | toploop:
1047 | movdqa xmm0,[rax+rcx]
1048 | pavgb xmm0,[rdi+rcx]
1049 | movdqa [rbx+rcx],xmm0
1050 | add rcx,r11
1051 | cmp rcx,rdx
1052 | jl toploop
1053 |
1054 | add rsi,r8
1055 | add rax,r8
1056 | add rdi,r8
1057 | add rbx,r8
1058 |
1059 | sub r9d,r10d ; the main loop processes 2 lines fewer than the height
1060 |
1061 | yloop:
1062 | xor rcx,rcx
1063 | xloop:
1064 | movdqa xmm0,[rsi+rcx]
1065 | movdqa xmm1,[rax+rcx]
1066 | movdqa xmm2,[rdi+rcx]
1067 | movdqa xmm3,xmm0
1068 | movdqa xmm4,xmm1
1069 | movdqa xmm5,xmm2
1070 | punpcklbw xmm0,xmm7
1071 | punpcklbw xmm1,xmm7
1072 | punpcklbw xmm2,xmm7
1073 | punpckhbw xmm3,xmm7
1074 | punpckhbw xmm4,xmm7
1075 | punpckhbw xmm5,xmm7
1076 |
1077 | ; add bottom to top
1078 | paddw xmm0,xmm2
1079 | paddw xmm3,xmm5
1080 |
1081 | ; multiply center by 2
1082 | psllw xmm1,1
1083 | psllw xmm4,1
1084 |
1085 | ; add center to sum
1086 | paddw xmm0,xmm1
1087 | paddw xmm3,xmm4
1088 |
1089 | ; add 2 to sum
1090 | paddw xmm0,xmm6
1091 | paddw xmm3,xmm6
1092 |
1093 | ; divide by 4
1094 | psrlw xmm0,2
1095 | psrlw xmm3,2
1096 | packuswb xmm0,xmm3
1097 | movdqa [rbx+rcx],xmm0
1098 |
1099 | add rcx,r11
1100 | cmp rcx,rdx
1101 | jl xloop
1102 |
1103 | add rsi,r8
1104 | add rax,r8
1105 | add rdi,r8
1106 | add rbx,r8
1107 | dec r9d
1108 | jnz yloop
1109 |
1110 | xor rcx,rcx
1111 |
1112 | bottomloop:
1113 | movdqa xmm0,[rsi+rcx]
1114 | pavgb xmm0,[rax+rcx]
1115 | movdqa [rbx+rcx],xmm0
1116 | add rcx,r11
1117 | cmp rcx,rdx
1118 | jl bottomloop
1119 |
1120 | movdqu xmm7,oword ptr[rsp+16]
1121 | movdqu xmm6,oword ptr[rsp]
1122 | add rsp,32
1123 |
1124 | pop rdi
1125 | pop rsi
1126 | pop rbx
1127 | pop rbp
1128 |
1129 | ret
1130 |
1131 | VerticalBlur3_SSE2 endp
1132 |
1133 |
1134 |
1135 | ;HorizontalBlur3_SSE2 proc srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
1136 | ; srcp = rcx
1137 | ; dstp = rdx
1138 | ; stride = r8d
1139 | ; width_ = r9d
1140 |
1141 | HorizontalBlur3_SSE2 proc public frame
1142 |
1143 | height equ dword ptr [rbp+48]
1144 |
1145 | push rbp
1146 | .pushreg rbp
1147 | mov rbp,rsp
1148 | sub rsp,32
1149 | .allocstack 32
1150 | movdqu oword ptr[rsp],xmm6
1151 | .savexmm128 xmm6,0
1152 | movdqu oword ptr[rsp+16],xmm7
1153 | .savexmm128 xmm7,16
1154 | .endprolog
1155 |
1156 | mov rax,rcx
1157 | movsxd r8,r8d
1158 | mov r10d,height
1159 | mov r11,16
1160 |
1161 | pxor xmm7,xmm7
1162 |
1163 | ; 0x0002,for rounding
1164 | pcmpeqb xmm6,xmm6
1165 | psrlw xmm6,15
1166 | psllw xmm6,1
1167 |
1168 | yloop:
1169 | xor rcx,rcx
1170 | xloop:
1171 | movdqu xmm0,[rax+rcx-1]
1172 | movdqa xmm1,[rax+rcx]
1173 | movdqu xmm2,[rax+rcx+1]
1174 | movdqa xmm3,xmm0
1175 | movdqa xmm4,xmm1
1176 | movdqa xmm5,xmm2
1177 |
1178 | punpcklbw xmm0,xmm7
1179 | punpcklbw xmm1,xmm7
1180 | punpcklbw xmm2,xmm7
1181 | punpckhbw xmm3,xmm7
1182 | punpckhbw xmm4,xmm7
1183 | punpckhbw xmm5,xmm7
1184 |
1185 | ; center * 2
1186 | psllw xmm1,1
1187 | psllw xmm4,1
1188 | paddw xmm1,xmm0
1189 | paddw xmm4,xmm3
1190 | paddw xmm1,xmm2
1191 | paddw xmm4,xmm5
1192 |
1193 | ; add 2 to sum
1194 | paddw xmm1,xmm6
1195 | paddw xmm4,xmm6
1196 |
1197 | ; divide by 4
1198 | psrlw xmm1,2
1199 | psrlw xmm4,2
1200 | packuswb xmm1,xmm4
1201 | movdqa [rdx+rcx],xmm1
1202 |
1203 | add rcx,r11
1204 | cmp rcx,r9
1205 | jl xloop
1206 |
1207 | add rax,r8
1208 | add rdx,r8
1209 | dec r10d
1210 | jnz yloop
1211 |
1212 | movdqu xmm7,oword ptr[rsp+16]
1213 | movdqu xmm6,oword ptr[rsp]
1214 | add rsp,32
1215 | pop rbp
1216 |
1217 | ret
1218 |
1219 | HorizontalBlur3_SSE2 endp
1220 |
1221 |
1222 |
1223 | ;HorizontalBlur6_SSE2 proc srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
1224 | ; srcp = rcx
1225 | ; dstp = rdx
1226 | ; stride = r8d
1227 | ; width_ = r9d
1228 |
1229 | HorizontalBlur6_SSE2 proc public frame
1230 |
1231 | height equ dword ptr [rbp+48]
1232 |
1233 | push rbp
1234 | .pushreg rbp
1235 | mov rbp,rsp
1236 | sub rsp,112
1237 | .allocstack 112
1238 | movdqu oword ptr[rsp],xmm6
1239 | .savexmm128 xmm6,0
1240 | movdqu oword ptr[rsp+16],xmm7
1241 | .savexmm128 xmm7,16
1242 | movdqu oword ptr[rsp+32],xmm8
1243 | .savexmm128 xmm8,32
1244 | movdqu oword ptr[rsp+48],xmm9
1245 | .savexmm128 xmm9,48
1246 | movdqu oword ptr[rsp+64],xmm10
1247 | .savexmm128 xmm10,64
1248 | movdqu oword ptr[rsp+80],xmm11
1249 | .savexmm128 xmm11,80
1250 | movdqu oword ptr[rsp+96],xmm12
1251 | .savexmm128 xmm12,96
1252 | .endprolog
1253 |
1254 | mov rax,rcx
1255 | movsxd r8,r8d
1256 | mov r10d,height
1257 | mov r11,16
1258 |
1259 | pxor xmm12,xmm12
1260 |
1261 | ; 0x0006
1262 | pcmpeqb xmm11,xmm11
1263 | psrlw xmm11,14
1264 | psllw xmm11,1
1265 |
1266 | ; 0x0008
1267 | pcmpeqb xmm10,xmm10
1268 | psrlw xmm10,15
1269 | psllw xmm10,3
1270 |
1271 | yloop:
1272 | xor rcx,rcx
1273 | xloop:
1274 | movdqu xmm0,[rax+rcx-2]
1275 | movdqu xmm1,[rax+rcx-1]
1276 | movdqa xmm2,[rax+rcx]
1277 | movdqu xmm3,[rax+rcx+1]
1278 | movdqu xmm4,[rax+rcx+2]
1279 | movdqa xmm5,xmm0
1280 | movdqa xmm6,xmm1
1281 | movdqa xmm7,xmm2
1282 | movdqa xmm8,xmm3
1283 | movdqa xmm9,xmm4
1284 | punpcklbw xmm0,xmm12
1285 | punpcklbw xmm1,xmm12
1286 | punpcklbw xmm2,xmm12
1287 | punpcklbw xmm3,xmm12
1288 | punpcklbw xmm4,xmm12
1289 | punpckhbw xmm5,xmm12
1290 | punpckhbw xmm6,xmm12
1291 | punpckhbw xmm7,xmm12
1292 | punpckhbw xmm8,xmm12
1293 | punpckhbw xmm9,xmm12
1294 |
1295 | ; srcp[x-2] + srcp[x+2]
1296 | paddw xmm0,xmm4
1297 | paddw xmm5,xmm9
1298 |
1299 | ; srcp[x-1] + srcp[x+1]
1300 | paddw xmm1,xmm3
1301 | paddw xmm6,xmm8
1302 |
1303 | ; (srcp[x-1 + srcp[x+1])*4
1304 | psllw xmm1,2
1305 | psllw xmm6,2
1306 |
1307 | ; (srcp[x-1] + srcp[x+1])*4 + srcp[x-2] + srcp[x+2]
1308 | paddw xmm0,xmm1
1309 | paddw xmm5,xmm6
1310 |
1311 | ; srcp[x] * 6
1312 | pmullw xmm2,xmm11
1313 | pmullw xmm7,xmm11
1314 | paddw xmm0,xmm2
1315 | paddw xmm5,xmm7
1316 |
1317 | ; add 8
1318 | paddw xmm0,xmm10
1319 | paddw xmm5,xmm10
1320 |
1321 | ; divide by 16
1322 | psrlw xmm0,4
1323 | psrlw xmm5,4
1324 | packuswb xmm0,xmm5
1325 | movdqa [rdx+rcx],xmm0
1326 |
1327 | add rcx,r11
1328 | cmp rcx,r9
1329 | jl xloop
1330 |
1331 | add rax,r8
1332 | add rdx,r8
1333 | dec r10d
1334 | jnz yloop
1335 |
1336 | movdqu xmm12,oword ptr[rsp+96]
1337 | movdqu xmm11,oword ptr[rsp+80]
1338 | movdqu xmm10,oword ptr[rsp+64]
1339 | movdqu xmm9,oword ptr[rsp+48]
1340 | movdqu xmm8,oword ptr[rsp+32]
1341 | movdqu xmm7,oword ptr[rsp+16]
1342 | movdqu xmm6,oword ptr[rsp]
1343 | add rsp,112
1344 | pop rbp
1345 |
1346 | ret
1347 |
1348 | HorizontalBlur6_SSE2 endp
1349 |
1350 |
1351 |
1352 | end
--------------------------------------------------------------------------------
/TComb/TComb_core.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | ** TComb v2.x for Avisynth 2.6 and Avisynth+
3 | **
4 | ** TComb is a temporal comb filter (it reduces cross-luminance (rainbowing)
5 | ** and cross-chrominance (dot crawl) artifacts in static areas of the picture).
6 | ** It will ONLY work with NTSC material, and WILL NOT work with telecined material
7 | ** where the rainbowing/dotcrawl was introduced prior to the telecine process!
8 | ** It must be used before ivtc or deinterlace.
9 | **
10 | ** Copyright (C) 2021 Ferenc Pintér
11 | **
12 | ** Copyright (C) 2015 Shane Panke
13 | **
14 | ** Copyright (C) 2005-2006 Kevin Stone
15 | **
16 | ** This program is free software; you can redistribute it and/or modify
17 | ** it under the terms of the GNU General Public License as published by
18 | ** the Free Software Foundation; either version 2 of the License, or
19 | ** (at your option) any later version.
20 | **
21 | ** This program is distributed in the hope that it will be useful,
22 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of
23 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 | ** GNU General Public License for more details.
25 | **
26 | ** You should have received a copy of the GNU General Public License
27 | ** along with this program; if not, write to the Free Software
28 | ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 | */
30 |
31 | #include "TComb.h"
32 | #include
33 |
34 | #ifdef INTEL_INTRINSICS
35 | #include
36 | #include
37 | #endif
38 | #include
39 |
40 | template
41 | void checkSceneChangePlanar_1_c(const pixel_t* srcp, const pixel_t* nxtp,
42 | int height, int width, int src_pitch, int nxt_pitch, uint64_t& diff)
43 | {
44 | for (int y = 0; y < height; ++y)
45 | {
46 | uint32_t rowdiff = 0;
47 | for (int x = 0; x < width; x += 4)
48 | {
49 | rowdiff += abs(srcp[x + 0] - nxtp[x + 0]);
50 | rowdiff += abs(srcp[x + 1] - nxtp[x + 1]);
51 | rowdiff += abs(srcp[x + 2] - nxtp[x + 2]);
52 | rowdiff += abs(srcp[x + 3] - nxtp[x + 3]);
53 | }
54 | diff += rowdiff;
55 | srcp += src_pitch;
56 | nxtp += nxt_pitch;
57 | }
58 | }
59 |
60 | // instantiate
61 | template void checkSceneChangePlanar_1_c(const uint8_t* srcp, const uint8_t* nxtp,
62 | int height, int width, int src_pitch, int nxt_pitch, uint64_t& diff);
63 |
64 | #ifdef INTEL_INTRINSICS
65 | void checkSceneChangePlanar_1_SSE2_simd(const uint8_t* prvp, const uint8_t* srcp,
66 | int height, int width, int prv_pitch, int src_pitch, uint64_t& diffp)
67 | {
68 | __m128i sum = _mm_setzero_si128();
69 | while (height--) {
70 | for (int x = 0; x < width; x += 16)
71 | {
72 | __m128i src1 = _mm_load_si128(reinterpret_cast(prvp + x));
73 | __m128i src2 = _mm_load_si128(reinterpret_cast(srcp + x));
74 | __m128i sad = _mm_sad_epu8(src1, src2);
75 | sum = _mm_add_epi32(sum, sad);
76 | }
77 | prvp += prv_pitch;
78 | srcp += src_pitch;
79 | }
80 | __m128i res = _mm_add_epi32(sum, _mm_srli_si128(sum, 8));
81 | diffp = _mm_cvtsi128_si32(res);
82 | }
83 | #endif
84 |
85 | #ifdef INTEL_INTRINSICS
86 | void andMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
87 | {
88 | for (int y = 0; y < height; ++y)
89 | {
90 | for (int x = 0; x < width; x += 16)
91 | {
92 | __m128i src1 = _mm_load_si128(reinterpret_cast(s1p + x));
93 | __m128i src2 = _mm_load_si128(reinterpret_cast(s2p + x));
94 | __m128i result = _mm_and_si128(src1, src2);
95 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
96 | }
97 |
98 | s1p += stride;
99 | s2p += stride;
100 | dstp += stride;
101 | }
102 | }
103 | #endif
104 |
105 | void andMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
106 | {
107 | for (int y = 0; y < height; ++y)
108 | {
109 | for (int x = 0; x < width; ++x)
110 | dstp[x] = (s1p[x] & s2p[x]);
111 |
112 | s1p += stride;
113 | s2p += stride;
114 | dstp += stride;
115 | }
116 | }
117 |
118 | #ifdef INTEL_INTRINSICS
119 | void orAndMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
120 | {
121 | for (int y = 0; y < height; ++y)
122 | {
123 | for (int x = 0; x < width; x += 16)
124 | {
125 | __m128i src1 = _mm_load_si128(reinterpret_cast(s1p + x));
126 | __m128i src2 = _mm_load_si128(reinterpret_cast(s2p + x));
127 | __m128i dst = _mm_load_si128(reinterpret_cast(dstp + x));
128 | __m128i result = _mm_or_si128(dst, _mm_and_si128(src1, src2));
129 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
130 | }
131 |
132 | s1p += stride;
133 | s2p += stride;
134 | dstp += stride;
135 | }
136 | }
137 | #endif
138 |
139 | void orAndMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
140 | {
141 | for (int y = 0; y < height; ++y)
142 | {
143 | for (int x = 0; x < width; ++x)
144 | dstp[x] |= (s1p[x] & s2p[x]);
145 |
146 | s1p += stride;
147 | s2p += stride;
148 | dstp += stride;
149 | }
150 | }
151 |
152 | #ifdef INTEL_INTRINSICS
153 | void or3Masks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, uint8_t* dstp, int stride, int width, int height)
154 | {
155 | for (int y = 0; y < height; ++y)
156 | {
157 | for (int x = 0; x < width; x += 16)
158 | {
159 | __m128i src1 = _mm_load_si128(reinterpret_cast(s1p + x));
160 | __m128i src2 = _mm_load_si128(reinterpret_cast(s2p + x));
161 | __m128i src3 = _mm_load_si128(reinterpret_cast(s3p + x));
162 | __m128i result = _mm_or_si128(src1, _mm_or_si128(src2, src3));
163 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
164 | }
165 |
166 | s1p += stride;
167 | s2p += stride;
168 | s3p += stride;
169 | dstp += stride;
170 | }
171 | }
172 | #endif
173 |
174 | void or3Masks_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, uint8_t* dstp, int stride, int width, int height)
175 | {
176 | for (int y = 0; y < height; ++y)
177 | {
178 | for (int x = 0; x < width; ++x)
179 | dstp[x] = (s1p[x] | s2p[x] | s3p[x]);
180 |
181 | s1p += stride;
182 | s2p += stride;
183 | s3p += stride;
184 | dstp += stride;
185 | }
186 | }
187 |
188 | #ifdef INTEL_INTRINSICS
189 | void calcAverages_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
190 | {
191 | for (int y = 0; y < height; ++y)
192 | {
193 | for (int x = 0; x < width; x += 16)
194 | {
195 | __m128i src1 = _mm_load_si128(reinterpret_cast(s1p + x));
196 | __m128i src2 = _mm_load_si128(reinterpret_cast(s2p + x));
197 | __m128i result = _mm_avg_epu8(src1, src2);
198 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
199 | }
200 |
201 | s1p += stride;
202 | s2p += stride;
203 | dstp += stride;
204 | }
205 | }
206 | #endif
207 |
208 | void calcAverages_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
209 | {
210 | for (int y = 0; y < height; ++y)
211 | {
212 | for (int x = 0; x < width; ++x)
213 | dstp[x] = (s1p[x] + s2p[x] + 1) >> 1;
214 |
215 | s1p += stride;
216 | s2p += stride;
217 | dstp += stride;
218 | }
219 | }
220 |
221 | #ifdef INTEL_INTRINSICS
222 | void MinMax_SSE2_simd(const uint8_t* srcp, uint8_t* dstpMin, uint8_t* dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh)
223 | {
224 | const uint8_t* srcpp = srcp - src_stride;
225 | const uint8_t* srcpn = srcp + src_stride;
226 |
227 | const auto threshp = _mm_set1_epi8(thresh);
228 |
229 | for (int y = 0; y < height; ++y)
230 | {
231 | for (int x = 0; x < width; x += 16)
232 | {
233 | __m128i srcpp_m_1 = _mm_load_si128(reinterpret_cast(srcpp + x - 1));
234 | __m128i srcpp_0 = _mm_loadu_si128(reinterpret_cast(srcpp + x));
235 | __m128i srcpp_p_1 = _mm_loadu_si128(reinterpret_cast(srcpp + x + 1));
236 |
237 | __m128i srcp_m_1 = _mm_load_si128(reinterpret_cast(srcp + x - 1));
238 | __m128i srcp_0 = _mm_loadu_si128(reinterpret_cast(srcp + x));
239 | __m128i srcp_p_1 = _mm_loadu_si128(reinterpret_cast(srcp + x + 1));
240 |
241 | __m128i srcpn_m_1 = _mm_load_si128(reinterpret_cast(srcpn + x - 1));
242 | __m128i srcpn_0 = _mm_loadu_si128(reinterpret_cast(srcpn + x));
243 | __m128i srcpn_p_1 = _mm_loadu_si128(reinterpret_cast(srcpn + x + 1));
244 |
245 | auto tmpmin = _mm_min_epu8(_mm_min_epu8(_mm_min_epu8(_mm_min_epu8(srcpp_m_1, srcpp_0),
246 | _mm_min_epu8(srcpp_p_1, srcp_m_1)),
247 | _mm_min_epu8(_mm_min_epu8(srcp_0, srcp_p_1),
248 | _mm_min_epu8(srcpn_m_1, srcpn_0))), srcpn_p_1);
249 |
250 | auto min = _mm_subs_epu8(tmpmin, threshp);
251 |
252 | _mm_store_si128(reinterpret_cast<__m128i*>(dstpMin + x), min);
253 |
254 | auto tmpmax = _mm_max_epu8(_mm_max_epu8(_mm_max_epu8(_mm_max_epu8(srcpp_m_1, srcpp_0),
255 | _mm_max_epu8(srcpp_p_1, srcp_m_1)),
256 | _mm_max_epu8(_mm_max_epu8(srcp_0, srcp_p_1),
257 | _mm_max_epu8(srcpn_m_1, srcpn_0))), srcpn_p_1);
258 |
259 | auto max = _mm_adds_epu8(tmpmax, threshp); // future warning: 10-14 bitss
260 |
261 | _mm_store_si128(reinterpret_cast<__m128i*>(dstpMax + x), max);
262 | }
263 |
264 | srcpp += src_stride;
265 | srcp += src_stride;
266 | srcpn += src_stride;
267 | dstpMin += dmin_stride;
268 | dstpMax += dmin_stride;
269 | }
270 | }
271 | #endif
272 |
273 | void MinMax_c(const uint8_t* srcp, uint8_t* dstpMin, uint8_t* dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh)
274 | {
275 | const uint8_t* srcpp = srcp - src_stride;
276 | const uint8_t* srcpn = srcp + src_stride;
277 |
278 | for (int y = 0; y < height; ++y)
279 | {
280 | for (int x = 0; x < width; ++x)
281 | {
282 | dstpMin[x] = std::max(std::min(std::min(std::min(std::min(srcpp[x - 1], srcpp[x]),
283 | std::min(srcpp[x + 1], srcp[x - 1])),
284 | std::min(std::min(srcp[x], srcp[x + 1]),
285 | std::min(srcpn[x - 1], srcpn[x]))), srcpn[x + 1]) - thresh, 0);
286 | dstpMax[x] = std::min(std::max(std::max(std::max(std::max(srcpp[x - 1], srcpp[x]),
287 | std::max(srcpp[x + 1], srcp[x - 1])),
288 | std::max(std::max(srcp[x], srcp[x + 1]),
289 | std::max(srcpn[x - 1], srcpn[x]))), srcpn[x + 1]) + thresh, 255);
290 | }
291 |
292 | srcpp += src_stride;
293 | srcp += src_stride;
294 | srcpn += src_stride;
295 | dstpMin += dmin_stride;
296 | dstpMax += dmin_stride;
297 | }
298 | }
299 |
300 | #ifdef INTEL_INTRINSICS
301 | void absDiff_SSE2_simd(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height)
302 | {
303 | for (int y = 0; y < height; ++y)
304 | {
305 | for (int x = 0; x < width; x += 16) {
306 | auto src1 = _mm_load_si128(reinterpret_cast(srcp1 + x));
307 | auto src2 = _mm_load_si128(reinterpret_cast(srcp2 + x));
308 | auto diff12 = _mm_subs_epu8(src1, src2);
309 | auto diff21 = _mm_subs_epu8(src2, src1);
310 | auto diff = _mm_or_si128(diff12, diff21);
311 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), diff);
312 | }
313 |
314 | srcp1 += stride;
315 | srcp2 += stride;
316 | dstp += stride;
317 | }
318 | }
319 | #endif
320 |
321 | void absDiff_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height)
322 | {
323 | for (int y = 0; y < height; ++y)
324 | {
325 | for (int x = 0; x < width; ++x)
326 | dstp[x] = abs(srcp1[x] - srcp2[x]);
327 |
328 | srcp1 += stride;
329 | srcp2 += stride;
330 | dstp += stride;
331 | }
332 | }
333 |
334 | #ifdef INTEL_INTRINSICS
335 | void buildFinalMask_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* m1p, uint8_t* dstp, int stride, int width, int height, int thresh)
336 | {
337 | auto thresh_minus1 = _mm_set1_epi8(thresh-1);
338 | auto zero = _mm_setzero_si128();
339 |
340 | for (int y = 0; y < height; ++y)
341 | {
342 | for (int x = 0; x < width; x += 16)
343 | {
344 | auto src1 = _mm_load_si128(reinterpret_cast(s1p + x));
345 | auto src2 = _mm_load_si128(reinterpret_cast(s2p + x));
346 | auto diff12 = _mm_subs_epu8(src1, src2);
347 | auto diff21 = _mm_subs_epu8(src2, src1);
348 | auto diff = _mm_or_si128(diff12, diff21);
349 | auto addedsthresh = _mm_subs_epu8(diff, thresh_minus1);
350 | auto cmpresult = _mm_cmpeq_epi8(addedsthresh, zero);
351 | auto m1 = _mm_load_si128(reinterpret_cast(m1p + x));
352 | auto tmp = _mm_and_si128(cmpresult, m1);
353 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), tmp);
354 |
355 | /*
356 | if (m1p[x] && abs(s1p[x] - s2p[x]) < thresh)
357 | dstp[x] = 0xFF;
358 | else
359 | dstp[x] = 0;
360 | */
361 | }
362 |
363 | m1p += stride;
364 | s1p += stride;
365 | s2p += stride;
366 | dstp += stride;
367 | }
368 | }
369 | #endif
370 |
371 | void buildFinalMask_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* m1p, uint8_t* dstp, int stride, int width, int height, int thresh)
372 | {
373 | for (int y = 0; y < height; ++y)
374 | {
375 | for (int x = 0; x < width; ++x)
376 | {
377 | if (m1p[x] && abs(s1p[x] - s2p[x]) < thresh)
378 | dstp[x] = 0xFF;
379 | else
380 | dstp[x] = 0;
381 | }
382 |
383 | m1p += stride;
384 | s1p += stride;
385 | s2p += stride;
386 | dstp += stride;
387 | }
388 | }
389 |
390 | #ifdef INTEL_INTRINSICS
391 | void checkOscillation5_SSE2_simd(const uint8_t* p2p, const uint8_t* p1p, const uint8_t* s1p, const uint8_t* n1p, const uint8_t* n2p, uint8_t* dstp, int stride, int width, int height, int thresh)
392 | {
393 | int threshm1 = std::min(std::max(thresh - 1, 0), 255);
394 | auto thresh_minus1 = _mm_set1_epi8(threshm1);
395 | auto one = _mm_set1_epi8(1);
396 | auto zero = _mm_setzero_si128();
397 |
398 | for (int y = 0; y < height; ++y)
399 | {
400 | for (int x = 0; x < width; x += 16)
401 | {
402 | // trick: x < thresh ==> x <= (thresh - 1) ==> x - (thresh - 1) <= 0 ==> sub_sat(x, thresh - 1) == 0
403 | // pcmpeqb(psubusb(x, thresh - 1), zero) : 0xFF where x < thresh
404 |
405 | __m128i src_p2p = _mm_load_si128(reinterpret_cast(p2p + x));
406 | __m128i src_s1p = _mm_load_si128(reinterpret_cast(s1p + x));
407 | __m128i src_n2p = _mm_load_si128(reinterpret_cast(n2p + x));
408 | __m128i src_p1p = _mm_load_si128(reinterpret_cast(p1p + x));
409 | __m128i src_n1p = _mm_load_si128(reinterpret_cast(n1p + x));
410 |
411 | auto min31 = _mm_min_epu8(_mm_min_epu8(src_p2p, src_s1p), src_n2p);
412 | auto max31 = _mm_max_epu8(_mm_max_epu8(src_p2p, src_s1p), src_n2p);
413 | auto min22 = _mm_min_epu8(src_p1p, src_n1p);
414 | auto max22 = _mm_max_epu8(src_p1p, src_n1p);
415 |
416 | auto cmp1 = _mm_cmpeq_epi8(_mm_subs_epu8(max22, _mm_subs_epu8(min31, one)), zero);
417 | auto cmp2 = _mm_cmpeq_epi8(_mm_subs_epu8(max31, _mm_subs_epu8(min22, one)), zero);
418 | // No check for (max22 == 0) or (max31 == 0), like in C, sub_sat handles automatically
419 | auto maxmindiff31 = _mm_subs_epu8(max31, min31);
420 | auto cmp3 = _mm_cmpeq_epi8(_mm_subs_epu8(maxmindiff31, thresh_minus1), zero);
421 | auto maxmindiff22 = _mm_subs_epu8(max22, min22);
422 | auto cmp4 = _mm_cmpeq_epi8(_mm_subs_epu8(maxmindiff22, thresh_minus1), zero);
423 |
424 | auto result = _mm_and_si128(_mm_or_si128(cmp1, cmp2), _mm_and_si128(cmp3, cmp4));
425 | /*
426 | if (((max22 < min31) || max22 == 0 || (max31 < min22) || max31 == 0) &&
427 | max31 - min31 < thresh && max22 - min22 < thresh)
428 | dstp[x] = 0xFF;
429 | else dstp[x] = 0;
430 | */
431 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
432 | }
433 |
434 | p2p += stride;
435 | p1p += stride;
436 | s1p += stride;
437 | n1p += stride;
438 | n2p += stride;
439 | dstp += stride;
440 | }
441 | }
442 | #endif
443 |
444 | void checkOscillation5_c(const uint8_t* p2p, const uint8_t* p1p, const uint8_t* s1p, const uint8_t* n1p, const uint8_t* n2p, uint8_t* dstp, int stride, int width, int height, int thresh)
445 | {
446 | for (int y = 0; y < height; ++y)
447 | {
448 | for (int x = 0; x < width; ++x)
449 | {
450 | const int min31 = min3(p2p[x], s1p[x], n2p[x]);
451 | const int max31 = max3(p2p[x], s1p[x], n2p[x]);
452 | const int min22 = std::min(p1p[x], n1p[x]);
453 | const int max22 = std::max(p1p[x], n1p[x]);
454 | if (((max22 < min31) || max22 == 0 || (max31 < min22) || max31 == 0) &&
455 | max31 - min31 < thresh && max22 - min22 < thresh)
456 | dstp[x] = 0xFF;
457 | else dstp[x] = 0;
458 | }
459 |
460 | p2p += stride;
461 | p1p += stride;
462 | s1p += stride;
463 | n1p += stride;
464 | n2p += stride;
465 | dstp += stride;
466 | }
467 | }
468 |
469 | #ifdef INTEL_INTRINSICS
470 | void absDiffAndMinMaskThresh_SSE2_simd(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height, int thresh)
471 | {
472 | int threshm1 = std::min(std::max(thresh - 1, 0), 255);
473 | auto thresh_minus1 = _mm_set1_epi8(threshm1);
474 | auto zero = _mm_setzero_si128();
475 |
476 | for (int y = 0; y < height; ++y)
477 | {
478 | for (int x = 0; x < width; x += 16)
479 | {
480 | __m128i src1 = _mm_load_si128(reinterpret_cast(srcp1 + x));
481 | __m128i src2 = _mm_load_si128(reinterpret_cast(srcp2 + x));
482 | __m128i dst = _mm_load_si128(reinterpret_cast(dstp + x));
483 | auto diff12 = _mm_subs_epu8(src1, src2);
484 | auto diff21 = _mm_subs_epu8(src2, src1);
485 | auto diff = _mm_or_si128(diff12, diff21);
486 |
487 | auto tmp_min = _mm_min_epu8(diff, dst);
488 | auto result = _mm_cmpeq_epi8(_mm_subs_epu8(tmp_min, thresh_minus1), zero);
489 | /*
490 | if (diff < dstp[x]) dstp[x] = diff; // min
491 | if (dstp[x] < thresh)
492 | dstp[x] = 0xFF;
493 | else
494 | dstp[x] = 0;
495 | */
496 |
497 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
498 |
499 | }
500 |
501 | srcp1 += stride;
502 | srcp2 += stride;
503 | dstp += stride;
504 | }
505 | }
506 | #endif
507 |
508 | void absDiffAndMinMaskThresh_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height, int thresh)
509 | {
510 | for (int y = 0; y < height; ++y)
511 | {
512 | for (int x = 0; x < width; ++x)
513 | {
514 | const int diff = abs(srcp1[x] - srcp2[x]);
515 | if (diff < dstp[x])
516 | dstp[x] = diff;
517 | if (dstp[x] < thresh)
518 | dstp[x] = 0xFF;
519 | else
520 | dstp[x] = 0;
521 | }
522 |
523 | srcp1 += stride;
524 | srcp2 += stride;
525 | dstp += stride;
526 | }
527 | }
528 |
529 | #ifdef INTEL_INTRINSICS
530 | void absDiffAndMinMask_SSE2_simd(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height)
531 | {
532 | for (int y = 0; y < height; ++y)
533 | {
534 | for (int x = 0; x < width; x += 16)
535 | {
536 | __m128i src1 = _mm_load_si128(reinterpret_cast(srcp1 + x));
537 | __m128i src2 = _mm_load_si128(reinterpret_cast(srcp2 + x));
538 | __m128i dst = _mm_load_si128(reinterpret_cast(dstp + x));
539 | auto diff12 = _mm_subs_epu8(src1, src2);
540 | auto diff21 = _mm_subs_epu8(src2, src1);
541 | auto diff = _mm_or_si128(diff12, diff21);
542 |
543 | auto tmp_min = _mm_min_epu8(diff, dst);
544 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), tmp_min);
545 |
546 | /*
547 | const int diff = abs(srcp1[x] - srcp2[x]);
548 | if (diff < dstp[x])
549 | dstp[x] = diff;
550 | */
551 | }
552 |
553 | srcp1 += stride;
554 | srcp2 += stride;
555 | dstp += stride;
556 | }
557 | }
558 | #endif
559 |
560 | void absDiffAndMinMask_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height)
561 | {
562 | for (int y = 0; y < height; ++y)
563 | {
564 | for (int x = 0; x < width; ++x)
565 | {
566 | const int diff = abs(srcp1[x] - srcp2[x]);
567 | if (diff < dstp[x])
568 | dstp[x] = diff;
569 | }
570 |
571 | srcp1 += stride;
572 | srcp2 += stride;
573 | dstp += stride;
574 | }
575 | }
576 |
577 | #ifdef INTEL_INTRINSICS
578 | void checkAvgOscCorrelation_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, const uint8_t* s4p, uint8_t* dstp, int stride, int width, int height, int thresh)
579 | {
580 | int threshm1 = std::min(std::max(thresh - 1, 0), 255);
581 | auto thresh_minus1 = _mm_set1_epi8(threshm1);
582 | auto zero = _mm_setzero_si128();
583 |
584 | for (int y = 0; y < height; ++y)
585 | {
586 | for (int x = 0; x < width; x += 16)
587 | {
588 | __m128i s1 = _mm_load_si128(reinterpret_cast(s1p + x));
589 | __m128i s2 = _mm_load_si128(reinterpret_cast(s2p + x));
590 | __m128i s3 = _mm_load_si128(reinterpret_cast(s3p + x));
591 | __m128i s4 = _mm_load_si128(reinterpret_cast(s4p + x));
592 |
593 | auto min = _mm_min_epu8(_mm_min_epu8(_mm_min_epu8(s1, s2), s3), s4);
594 | auto max = _mm_max_epu8(_mm_max_epu8(_mm_max_epu8(s1, s2), s3), s4);
595 |
596 | auto diffmaxmin = _mm_subs_epu8(max, min);
597 | auto cmp = _mm_cmpeq_epi8(_mm_subs_epu8(diffmaxmin, thresh_minus1), zero);
598 |
599 | __m128i dst = _mm_load_si128(reinterpret_cast(dstp + x));
600 | auto result = _mm_and_si128(cmp, dst);
601 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
602 |
603 | /*
604 | if (max4(s1p[x], s2p[x], s3p[x], s4p[x]) - min4(s1p[x], s2p[x], s3p[x], s4p[x]) >= thresh)
605 | dstp[x] = 0;
606 | that is:
607 | if(max-min < thresh) dstp[x] = dstp[x] else 0 (dst=dst&FF 0=dst&00)
608 | */
609 | }
610 |
611 | s1p += stride;
612 | s2p += stride;
613 | s3p += stride;
614 | s4p += stride;
615 | dstp += stride;
616 | }
617 | }
618 | #endif
619 |
620 | void checkAvgOscCorrelation_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, const uint8_t* s4p, uint8_t* dstp, int stride, int width, int height, int thresh)
621 | {
622 | for (int y = 0; y < height; ++y)
623 | {
624 | for (int x = 0; x < width; ++x)
625 | {
626 | if (max4(s1p[x], s2p[x], s3p[x], s4p[x]) -
627 | min4(s1p[x], s2p[x], s3p[x], s4p[x]) >= thresh)
628 | dstp[x] = 0;
629 | }
630 |
631 | s1p += stride;
632 | s2p += stride;
633 | s3p += stride;
634 | s4p += stride;
635 | dstp += stride;
636 | }
637 | }
638 |
639 | #ifdef INTEL_INTRINSICS
640 | void VerticalBlur3_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
641 | {
642 | const uint8_t* srcpp = srcp - stride;
643 | const uint8_t* srcpn = srcp + stride;
644 |
645 | auto zero = _mm_setzero_si128();
646 | auto two = _mm_set1_epi16(2);
647 |
648 | // top line
649 | for (int x = 0; x < width; x += 16) {
650 | __m128i s1 = _mm_load_si128(reinterpret_cast(srcp + x));
651 | __m128i s2 = _mm_load_si128(reinterpret_cast(srcpn + x));
652 | auto avg = _mm_avg_epu8(s1, s2);
653 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), avg);
654 | // dstp[x] = (srcp[x] + srcpn[x] + 1) >> 1;
655 | }
656 |
657 | srcpp += stride;
658 | srcp += stride;
659 | srcpn += stride;
660 | dstp += stride;
661 |
662 | for (int y = 1; y < height - 1; ++y)
663 | {
664 | for (int x = 0; x < width; x += 16) {
665 | __m128i p = _mm_load_si128(reinterpret_cast(srcpp + x));
666 | __m128i s = _mm_load_si128(reinterpret_cast(srcp + x));
667 | __m128i n = _mm_load_si128(reinterpret_cast(srcpn + x));
668 |
669 | auto p_lo = _mm_unpacklo_epi8(p, zero);
670 | auto p_hi = _mm_unpackhi_epi8(p, zero);
671 | auto s_lo = _mm_unpacklo_epi8(s, zero);
672 | auto s_hi = _mm_unpackhi_epi8(s, zero);
673 | auto n_lo = _mm_unpacklo_epi8(n, zero);
674 | auto n_hi = _mm_unpackhi_epi8(n, zero);
675 | auto res_lo = _mm_add_epi16(_mm_add_epi16(p_lo, _mm_slli_epi16(s_lo, 1)), n_lo);
676 | auto res_hi = _mm_add_epi16(_mm_add_epi16(p_hi, _mm_slli_epi16(s_hi, 1)), n_hi);
677 | res_lo = _mm_srli_epi16(_mm_add_epi16(res_lo, two), 2);
678 | res_hi = _mm_srli_epi16(_mm_add_epi16(res_hi, two), 2);
679 | auto result = _mm_packus_epi16(res_lo, res_hi);
680 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
681 | // dstp[x] = (srcpp[x] + (srcp[x] << 1) + srcpn[x] + 2) >> 2;
682 | }
683 |
684 | srcpp += stride;
685 | srcp += stride;
686 | srcpn += stride;
687 | dstp += stride;
688 | }
689 |
690 | // bottom
691 | for (int x = 0; x < width; x += 16) {
692 | __m128i s1 = _mm_load_si128(reinterpret_cast(srcpp + x));
693 | __m128i s2 = _mm_load_si128(reinterpret_cast(srcp + x));
694 | auto avg = _mm_avg_epu8(s1, s2);
695 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), avg);
696 | //dstp[x] = (srcpp[x] + srcp[x] + 1) >> 1;
697 | }
698 |
699 | }
700 | #endif
701 |
702 | void VerticalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
703 | {
704 | const uint8_t* srcpp = srcp - stride;
705 | const uint8_t* srcpn = srcp + stride;
706 |
707 | for (int x = 0; x < width; ++x)
708 | dstp[x] = (srcp[x] + srcpn[x] + 1) >> 1;
709 |
710 | srcpp += stride;
711 | srcp += stride;
712 | srcpn += stride;
713 | dstp += stride;
714 |
715 | for (int y = 1; y < height - 1; ++y)
716 | {
717 | for (int x = 0; x < width; ++x)
718 | dstp[x] = (srcpp[x] + (srcp[x] << 1) + srcpn[x] + 2) >> 2;
719 |
720 | srcpp += stride;
721 | srcp += stride;
722 | srcpn += stride;
723 | dstp += stride;
724 | }
725 |
726 | for (int x = 0; x < width; ++x)
727 | dstp[x] = (srcpp[x] + srcp[x] + 1) >> 1;
728 | }
729 |
730 | #ifdef INTEL_INTRINSICS
731 | // width mod 16 and srcp alignment guaranteed
732 | void HorizontalBlur3_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
733 | {
734 | auto zero = _mm_setzero_si128();
735 | auto two = _mm_set1_epi16(2);
736 |
737 | for (int y = 0; y < height; ++y)
738 | {
739 | for (int x = 0; x < width; x += 16)
740 | {
741 | __m128i p = _mm_loadu_si128(reinterpret_cast(srcp + x - 1));
742 | __m128i s = _mm_load_si128(reinterpret_cast(srcp + x));
743 | __m128i n = _mm_loadu_si128(reinterpret_cast(srcp + x + 1));
744 |
745 | auto p_lo = _mm_unpacklo_epi8(p, zero);
746 | auto p_hi = _mm_unpackhi_epi8(p, zero);
747 | auto s_lo = _mm_unpacklo_epi8(s, zero);
748 | auto s_hi = _mm_unpackhi_epi8(s, zero);
749 | auto n_lo = _mm_unpacklo_epi8(n, zero);
750 | auto n_hi = _mm_unpackhi_epi8(n, zero);
751 | auto res_lo = _mm_add_epi16(_mm_add_epi16(p_lo, _mm_slli_epi16(s_lo, 1)), n_lo);
752 | auto res_hi = _mm_add_epi16(_mm_add_epi16(p_hi, _mm_slli_epi16(s_hi, 1)), n_hi);
753 | res_lo = _mm_srli_epi16(_mm_add_epi16(res_lo, two), 2);
754 | res_hi = _mm_srli_epi16(_mm_add_epi16(res_hi, two), 2);
755 | auto result = _mm_packus_epi16(res_lo, res_hi);
756 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
757 | // dstp[x] = (srcp[x - 1] + (srcp[x] << 1) + srcp[x + 1] + 2) >> 2;
758 | }
759 |
760 | srcp += stride;
761 | dstp += stride;
762 | }
763 |
764 | }
765 | #endif
766 |
767 | void HorizontalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
768 | {
769 | for (int y = 0; y < height; ++y)
770 | {
771 | dstp[0] = (srcp[0] + srcp[1] + 1) >> 1;
772 |
773 | for (int x = 1; x < width - 1; ++x)
774 | dstp[x] = (srcp[x - 1] + (srcp[x] << 1) + srcp[x + 1] + 2) >> 2;
775 |
776 | dstp[width - 1] = (srcp[width - 2] + srcp[width - 1] + 1) >> 1;
777 |
778 | srcp += stride;
779 | dstp += stride;
780 | }
781 | }
782 |
783 | #ifdef INTEL_INTRINSICS
784 | void HorizontalBlur6_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
785 | {
786 | auto zero = _mm_setzero_si128();
787 | auto eight = _mm_set1_epi16(8);
788 | auto six = _mm_set1_epi16(6);
789 |
790 | for (int y = 0; y < height; y++)
791 | {
792 | for (int x = 0; x < width; x += 16) {
793 | __m128i pp = _mm_loadu_si128(reinterpret_cast(srcp + x - 2));
794 | __m128i p = _mm_loadu_si128(reinterpret_cast(srcp + x - 1));
795 | __m128i s = _mm_load_si128(reinterpret_cast(srcp + x));
796 | __m128i n = _mm_loadu_si128(reinterpret_cast(srcp + x + 1));
797 | __m128i nn = _mm_loadu_si128(reinterpret_cast(srcp + x + 2));
798 |
799 | auto pp_lo = _mm_unpacklo_epi8(pp, zero);
800 | auto pp_hi = _mm_unpackhi_epi8(pp, zero);
801 | auto p_lo = _mm_unpacklo_epi8(p, zero);
802 | auto p_hi = _mm_unpackhi_epi8(p, zero);
803 | auto s_lo = _mm_unpacklo_epi8(s, zero);
804 | auto s_hi = _mm_unpackhi_epi8(s, zero);
805 | auto n_lo = _mm_unpacklo_epi8(n, zero);
806 | auto n_hi = _mm_unpackhi_epi8(n, zero);
807 | auto nn_lo = _mm_unpacklo_epi8(nn, zero);
808 | auto nn_hi = _mm_unpackhi_epi8(nn, zero);
809 |
810 | auto centermulsix_lo = _mm_mullo_epi16(s_lo, six);
811 | auto centermulsix_hi = _mm_mullo_epi16(s_hi, six);
812 | auto res_lo = _mm_add_epi16(centermulsix_lo, _mm_add_epi16(_mm_add_epi16(pp_lo, _mm_slli_epi16(_mm_add_epi16(p_lo, n_lo), 2)), nn_lo));
813 | auto res_hi = _mm_add_epi16(centermulsix_hi, _mm_add_epi16(_mm_add_epi16(pp_hi, _mm_slli_epi16(_mm_add_epi16(p_hi, n_hi), 2)), nn_hi));
814 |
815 | res_lo = _mm_srli_epi16(_mm_add_epi16(res_lo, eight), 4);
816 | res_hi = _mm_srli_epi16(_mm_add_epi16(res_hi, eight), 4);
817 | auto result = _mm_packus_epi16(res_lo, res_hi);
818 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
819 | // dstp[x] = (srcp[x - 2] + ((srcp[x - 1] + srcp[x + 1]) << 2) + srcp[x] * 6 + srcp[x + 2] + 8) >> 4;
820 | }
821 |
822 | srcp += stride;
823 | dstp += stride;
824 | }
825 | }
826 | #endif
827 |
828 | void HorizontalBlur6_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
829 | {
830 | for (int y = 0; y < height; y++)
831 | {
832 | dstp[0] = (srcp[0] * 6 + (srcp[1] << 3) + (srcp[2] << 1) + 8) >> 4;
833 | dstp[1] = (((srcp[0] + srcp[2]) << 2) + srcp[1] * 6 + (srcp[3] << 1) + 8) >> 4;
834 |
835 | for (int x = 2; x < width - 2; ++x)
836 | dstp[x] = (srcp[x - 2] + ((srcp[x - 1] + srcp[x + 1]) << 2) + srcp[x] * 6 + srcp[x + 2] + 8) >> 4;
837 |
838 | dstp[width - 2] = ((srcp[width - 4] << 1) + ((srcp[width - 3] + srcp[width - 1]) << 2) + srcp[width - 2] * 6 + 8) >> 4;
839 | dstp[width - 1] = ((srcp[width - 3] << 1) + (srcp[width - 2] << 3) + srcp[width - 1] * 6 + 8) >> 4;
840 |
841 | srcp += stride;
842 | dstp += stride;
843 | }
844 | }
845 |
846 | #ifdef INTEL_INTRINSICS
847 | void andNeighborsInPlace_SSE2_simd(uint8_t* srcp, int stride, int width, int height)
848 | {
849 | uint8_t* srcpp = srcp - stride;
850 | uint8_t* srcpn = srcp + stride;
851 |
852 | for (int y = 0; y < height; y++)
853 | {
854 | for (int x = 0; x < width; x += 16) {
855 | __m128i src_0 = _mm_load_si128(reinterpret_cast(srcp + x));
856 | __m128i src_p_m1 = _mm_loadu_si128(reinterpret_cast(srcpp + x - 1));
857 | __m128i src_p = _mm_loadu_si128(reinterpret_cast(srcpp + x));
858 | __m128i src_p_p1 = _mm_loadu_si128(reinterpret_cast(srcpp + x + 1));
859 | __m128i src_n_m1 = _mm_loadu_si128(reinterpret_cast(srcpn + x - 1));
860 | __m128i src_n = _mm_loadu_si128(reinterpret_cast(srcpn + x));
861 | __m128i src_n_p1 = _mm_loadu_si128(reinterpret_cast(srcpn + x + 1));
862 | auto result_p = _mm_or_si128(_mm_or_si128(src_p_m1, src_p), src_p_p1);
863 | auto result_n = _mm_or_si128(_mm_or_si128(src_n_m1, src_n), src_n_p1);
864 | auto result = _mm_and_si128(src_0, _mm_or_si128(result_p, result_n));
865 | _mm_store_si128(reinterpret_cast<__m128i*>(srcp + x), result);
866 | // srcp[x] &= (srcpp[x - 1] | srcpp[x] | srcpp[x + 1] | srcpn[x - 1] | srcpn[x] | srcpn[x + 1]);
867 | }
868 |
869 | srcpp += stride;
870 | srcp += stride;
871 | srcpn += stride;
872 | }
873 | }
874 | #endif
875 |
--------------------------------------------------------------------------------
/TComb/avs/alignment.h:
--------------------------------------------------------------------------------
1 | // Avisynth C Interface Version 0.20
2 | // Copyright 2003 Kevin Atkinson
3 |
4 | // This program is free software; you can redistribute it and/or modify
5 | // it under the terms of the GNU General Public License as published by
6 | // the Free Software Foundation; either version 2 of the License, or
7 | // (at your option) any later version.
8 | //
9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception. An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 |
33 | #ifndef AVS_ALIGNMENT_H
34 | #define AVS_ALIGNMENT_H
35 |
36 | // Functions and macros to help work with alignment requirements.
37 |
38 | // Tells if a number is a power of two.
39 | #define IS_POWER2(n) ((n) && !((n) & ((n) - 1)))
40 |
41 | // Tells if the pointer "ptr" is aligned to "align" bytes.
42 | #define IS_PTR_ALIGNED(ptr, align) (((uintptr_t)ptr & ((uintptr_t)(align-1))) == 0)
43 |
44 | // Rounds up the number "n" to the next greater multiple of "align"
45 | #define ALIGN_NUMBER(n, align) (((n) + (align)-1) & (~((align)-1)))
46 |
47 | // Rounds up the pointer address "ptr" to the next greater multiple of "align"
48 | #define ALIGN_POINTER(ptr, align) (((uintptr_t)(ptr) + (align)-1) & (~(uintptr_t)((align)-1)))
49 |
50 | #ifdef __cplusplus
51 |
52 | #include
53 | #include
54 | #include
55 | #include "config.h"
56 |
57 | #if defined(MSVC) && _MSC_VER<1400
58 | // needed for VS2013, otherwise C++11 'alignas' works
59 | #define avs_alignas(x) __declspec(align(x))
60 | #else
61 | // assumes C++11 support
62 | #define avs_alignas(x) alignas(x)
63 | #endif
64 |
65 | template
66 | static bool IsPtrAligned(T* ptr, size_t align)
67 | {
68 | assert(IS_POWER2(align));
69 | return (bool)IS_PTR_ALIGNED(ptr, align);
70 | }
71 |
72 | template
73 | static T AlignNumber(T n, T align)
74 | {
75 | assert(IS_POWER2(align));
76 | return ALIGN_NUMBER(n, align);
77 | }
78 |
79 | template
80 | static T* AlignPointer(T* ptr, size_t align)
81 | {
82 | assert(IS_POWER2(align));
83 | return (T*)ALIGN_POINTER(ptr, align);
84 | }
85 |
86 | extern "C"
87 | {
88 | #else
89 | #include
90 | #endif // __cplusplus
91 |
92 | // Returns a new buffer that is at least the size "nbytes".
93 | // The buffer will be aligned to "align" bytes.
94 | // Returns NULL on error. On successful allocation,
95 | // the returned buffer must be freed using "avs_free".
96 | inline void* avs_malloc(size_t nbytes, size_t align)
97 | {
98 | if (!IS_POWER2(align))
99 | return NULL;
100 |
101 | size_t offset = sizeof(void*) + align - 1;
102 |
103 | void *orig = malloc(nbytes + offset);
104 | if (orig == NULL)
105 | return NULL;
106 |
107 | void **aligned = (void**)(((uintptr_t)orig + (uintptr_t)offset) & (~(uintptr_t)(align-1)));
108 | aligned[-1] = orig;
109 | return aligned;
110 | }
111 |
112 | // Buffers allocated using "avs_malloc" must be freed
113 | // using "avs_free" instead of "free".
114 | inline void avs_free(void *ptr)
115 | {
116 | // Mirroring free()'s semantic requires us to accept NULLs
117 | if (ptr == NULL)
118 | return;
119 |
120 | free(((void**)ptr)[-1]);
121 | }
122 |
123 | #ifdef __cplusplus
124 | } // extern "C"
125 |
126 | // The point of these undef's is to force using the template functions
127 | // if we are in C++ mode. For C, the user can rely only on the macros.
128 | #undef IS_PTR_ALIGNED
129 | #undef ALIGN_NUMBER
130 | #undef ALIGN_POINTER
131 |
132 | #endif // __cplusplus
133 |
134 | #endif //AVS_ALIGNMENT_H
135 |
--------------------------------------------------------------------------------
/TComb/avs/capi.h:
--------------------------------------------------------------------------------
1 | // Avisynth C Interface Version 0.20
2 | // Copyright 2003 Kevin Atkinson
3 |
4 | // This program is free software; you can redistribute it and/or modify
5 | // it under the terms of the GNU General Public License as published by
6 | // the Free Software Foundation; either version 2 of the License, or
7 | // (at your option) any later version.
8 | //
9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception. An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 |
33 | #ifndef AVS_CAPI_H
34 | #define AVS_CAPI_H
35 |
36 | #include "config.h"
37 |
38 | #ifdef AVS_POSIX
39 | // this is also defined in avs/posix.h
40 | #ifndef AVS_HAIKU
41 | #define __declspec(x)
42 | #endif
43 | #endif
44 |
45 | #ifdef __cplusplus
46 | # define EXTERN_C extern "C"
47 | #else
48 | # define EXTERN_C
49 | #endif
50 |
51 | #ifdef AVS_WINDOWS
52 | #ifdef BUILDING_AVSCORE
53 | # if defined(GCC) && defined(X86_32)
54 | # define AVSC_CC
55 | # else // MSVC builds and 64-bit GCC
56 | # ifndef AVSC_USE_STDCALL
57 | # define AVSC_CC __cdecl
58 | # else
59 | # define AVSC_CC __stdcall
60 | # endif
61 | # endif
62 | #else // needed for programs that talk to AviSynth+
63 | # ifndef AVSC_WIN32_GCC32 // see comment below
64 | # ifndef AVSC_USE_STDCALL
65 | # define AVSC_CC __cdecl
66 | # else
67 | # define AVSC_CC __stdcall
68 | # endif
69 | # else
70 | # define AVSC_CC
71 | # endif
72 | #endif
73 | # else
74 | # define AVSC_CC
75 | #endif
76 |
77 | // On 64-bit Windows, there's only one calling convention,
78 | // so there is no difference between MSVC and GCC. On 32-bit,
79 | // this isn't true. The convention that GCC needs to use to
80 | // even build AviSynth+ as 32-bit makes anything that uses
81 | // it incompatible with 32-bit MSVC builds of AviSynth+.
82 | // The AVSC_WIN32_GCC32 define is meant to provide a user
83 | // switchable way to make builds of FFmpeg to test 32-bit
84 | // GCC builds of AviSynth+ without having to screw around
85 | // with alternate headers, while still default to the usual
86 | // situation of using 32-bit MSVC builds of AviSynth+.
87 |
88 | // Hopefully, this situation will eventually be resolved
89 | // and a broadly compatible solution will arise so the
90 | // same 32-bit FFmpeg build can handle either MSVC or GCC
91 | // builds of AviSynth+.
92 |
93 | #define AVSC_INLINE static __inline
94 |
95 | #ifdef BUILDING_AVSCORE
96 | #ifdef AVS_WINDOWS
97 | # define AVSC_EXPORT __declspec(dllexport)
98 | # define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name
99 | #else
100 | # define AVSC_EXPORT EXTERN_C
101 | # define AVSC_API(ret, name) EXTERN_C ret AVSC_CC name
102 | #endif
103 | #else
104 | # define AVSC_EXPORT EXTERN_C __declspec(dllexport)
105 | # ifndef AVSC_NO_DECLSPEC
106 | # define AVSC_API(ret, name) EXTERN_C __declspec(dllimport) ret AVSC_CC name
107 | # else
108 | # define AVSC_API(ret, name) typedef ret (AVSC_CC *name##_func)
109 | # endif
110 | #endif
111 |
112 | #endif //AVS_CAPI_H
113 |
--------------------------------------------------------------------------------
/TComb/avs/config.h:
--------------------------------------------------------------------------------
1 | // Avisynth C Interface Version 0.20
2 | // Copyright 2003 Kevin Atkinson
3 |
4 | // This program is free software; you can redistribute it and/or modify
5 | // it under the terms of the GNU General Public License as published by
6 | // the Free Software Foundation; either version 2 of the License, or
7 | // (at your option) any later version.
8 | //
9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception. An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 |
33 | #ifndef AVS_CONFIG_H
34 | #define AVS_CONFIG_H
35 |
36 | // Undefine this to get cdecl calling convention
37 | #define AVSC_USE_STDCALL 1
38 |
39 | // NOTE TO PLUGIN AUTHORS:
40 | // Because FRAME_ALIGN can be substantially higher than the alignment
41 | // a plugin actually needs, plugins should not use FRAME_ALIGN to check for
42 | // alignment. They should always request the exact alignment value they need.
43 | // This is to make sure that plugins work over the widest range of AviSynth
44 | // builds possible.
45 | #define FRAME_ALIGN 64
46 |
47 | #if defined(_M_AMD64) || defined(__x86_64)
48 | # define X86_64
49 | #elif defined(_M_IX86) || defined(__i386__)
50 | # define X86_32
51 | // VS2017 introduced _M_ARM64
52 | #elif defined(_M_ARM64) || defined(__aarch64__)
53 | # define ARM64
54 | #elif defined(_M_ARM) || defined(__arm__)
55 | # define ARM32
56 | #elif defined(__PPC64__)
57 | # define PPC64
58 | #elif defined(_M_PPC) || defined(__PPC__) || defined(__POWERPC__)
59 | # define PPC32
60 | #else
61 | # error Unsupported CPU architecture.
62 | #endif
63 |
64 | // VC++ LLVM-Clang-cl MinGW-Gnu
65 | // MSVC x x
66 | // MSVC_PURE x
67 | // CLANG x
68 | // GCC x
69 |
70 | #if defined(__clang__)
71 | // Check clang first. clang-cl also defines __MSC_VER
72 | // We set MSVC because they are mostly compatible
73 | # define CLANG
74 | #if defined(_MSC_VER)
75 | # define MSVC
76 | # define AVS_FORCEINLINE __attribute__((always_inline))
77 | #else
78 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline
79 | #endif
80 | #elif defined(_MSC_VER)
81 | # define MSVC
82 | # define MSVC_PURE
83 | # define AVS_FORCEINLINE __forceinline
84 | #elif defined(__GNUC__)
85 | # define GCC
86 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline
87 | #else
88 | # error Unsupported compiler.
89 | # define AVS_FORCEINLINE inline
90 | # undef __forceinline
91 | # define __forceinline inline
92 | #endif
93 |
94 | #if defined(_WIN32)
95 | # define AVS_WINDOWS
96 | #elif defined(__linux__)
97 | # define AVS_LINUX
98 | # define AVS_POSIX
99 | #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
100 | # define AVS_BSD
101 | # define AVS_POSIX
102 | #elif defined(__APPLE__)
103 | # define AVS_MACOS
104 | # define AVS_POSIX
105 | #elif defined(__HAIKU__)
106 | # define AVS_HAIKU
107 | # define AVS_POSIX
108 | #else
109 | # error Operating system unsupported.
110 | #endif
111 |
112 | // useful warnings disabler macros for supported compilers
113 |
114 | #if defined(_MSC_VER)
115 | #define DISABLE_WARNING_PUSH __pragma(warning( push ))
116 | #define DISABLE_WARNING_POP __pragma(warning( pop ))
117 | #define DISABLE_WARNING(warningNumber) __pragma(warning( disable : warningNumber ))
118 |
119 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(4101)
120 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(4505)
121 | // other warnings you want to deactivate...
122 |
123 | #elif defined(__GNUC__) || defined(__clang__)
124 | #define DO_PRAGMA(X) _Pragma(#X)
125 | #define DISABLE_WARNING_PUSH DO_PRAGMA(GCC diagnostic push)
126 | #define DISABLE_WARNING_POP DO_PRAGMA(GCC diagnostic pop)
127 | #define DISABLE_WARNING(warningName) DO_PRAGMA(GCC diagnostic ignored #warningName)
128 |
129 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(-Wunused-variable)
130 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(-Wunused-function)
131 | // other warnings you want to deactivate...
132 |
133 | #else
134 | #define DISABLE_WARNING_PUSH
135 | #define DISABLE_WARNING_POP
136 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE
137 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION
138 | // other warnings you want to deactivate...
139 |
140 | #endif
141 |
142 | #if defined(AVS_POSIX)
143 | #define NEW_AVSVALUE
144 | #else
145 | #define NEW_AVSVALUE
146 | #endif
147 |
148 | #if defined(AVS_WINDOWS)
149 | // Windows XP does not have proper initialization for
150 | // thread local variables.
151 | // Use workaround instead __declspec(thread)
152 | #define XP_TLS
153 | #endif
154 |
155 | #endif //AVS_CONFIG_H
156 |
--------------------------------------------------------------------------------
/TComb/avs/cpuid.h:
--------------------------------------------------------------------------------
1 | // This program is free software; you can redistribute it and/or modify
2 | // it under the terms of the GNU General Public License as published by
3 | // the Free Software Foundation; either version 2 of the License, or
4 | // (at your option) any later version.
5 | //
6 | // This program is distributed in the hope that it will be useful,
7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception. An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 |
32 | #ifndef AVSCORE_CPUID_H
33 | #define AVSCORE_CPUID_H
34 |
35 | // For GetCPUFlags. These are backwards-compatible with those in VirtualDub.
36 | // ending with SSE4_2
37 | // For emulation see https://software.intel.com/en-us/articles/intel-software-development-emulator
38 | enum {
39 | /* oldest CPU to support extension */
40 | CPUF_FORCE = 0x01, // N/A
41 | CPUF_FPU = 0x02, // 386/486DX
42 | CPUF_MMX = 0x04, // P55C, K6, PII
43 | CPUF_INTEGER_SSE = 0x08, // PIII, Athlon
44 | CPUF_SSE = 0x10, // PIII, Athlon XP/MP
45 | CPUF_SSE2 = 0x20, // PIV, K8
46 | CPUF_3DNOW = 0x40, // K6-2
47 | CPUF_3DNOW_EXT = 0x80, // Athlon
48 | CPUF_X86_64 = 0xA0, // Hammer (note: equiv. to 3DNow + SSE2, which
49 | // only Hammer will have anyway)
50 | CPUF_SSE3 = 0x100, // PIV+, K8 Venice
51 | CPUF_SSSE3 = 0x200, // Core 2
52 | CPUF_SSE4 = 0x400,
53 | CPUF_SSE4_1 = 0x400, // Penryn, Wolfdale, Yorkfield
54 | CPUF_AVX = 0x800, // Sandy Bridge, Bulldozer
55 | CPUF_SSE4_2 = 0x1000, // Nehalem
56 | // AVS+
57 | CPUF_AVX2 = 0x2000, // Haswell
58 | CPUF_FMA3 = 0x4000,
59 | CPUF_F16C = 0x8000,
60 | CPUF_MOVBE = 0x10000, // Big Endian move
61 | CPUF_POPCNT = 0x20000,
62 | CPUF_AES = 0x40000,
63 | CPUF_FMA4 = 0x80000,
64 |
65 | CPUF_AVX512F = 0x100000, // AVX-512 Foundation.
66 | CPUF_AVX512DQ = 0x200000, // AVX-512 DQ (Double/Quad granular) Instructions
67 | CPUF_AVX512PF = 0x400000, // AVX-512 Prefetch
68 | CPUF_AVX512ER = 0x800000, // AVX-512 Exponential and Reciprocal
69 | CPUF_AVX512CD = 0x1000000, // AVX-512 Conflict Detection
70 | CPUF_AVX512BW = 0x2000000, // AVX-512 BW (Byte/Word granular) Instructions
71 | CPUF_AVX512VL = 0x4000000, // AVX-512 VL (128/256 Vector Length) Extensions
72 | CPUF_AVX512IFMA = 0x8000000, // AVX-512 IFMA integer 52 bit
73 | CPUF_AVX512VBMI = 0x10000000,// AVX-512 VBMI
74 | };
75 |
76 | #ifdef BUILDING_AVSCORE
77 | int GetCPUFlags();
78 | void SetMaxCPU(int new_flags);
79 | #endif
80 |
81 | #endif // AVSCORE_CPUID_H
82 |
--------------------------------------------------------------------------------
/TComb/avs/filesystem.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | // Snippet copied from filesystem/README.md
4 |
5 | #if defined(__cplusplus) && __cplusplus >= 201703L && defined(__has_include)
6 | #if __has_include()
7 | #define GHC_USE_STD_FS
8 | #include
9 | namespace fs = std::filesystem;
10 | #endif
11 | #endif
12 | #ifndef GHC_USE_STD_FS
13 | #include
14 | namespace fs = ghc::filesystem;
15 | #endif
16 |
--------------------------------------------------------------------------------
/TComb/avs/minmax.h:
--------------------------------------------------------------------------------
1 | // This program is free software; you can redistribute it and/or modify
2 | // it under the terms of the GNU General Public License as published by
3 | // the Free Software Foundation; either version 2 of the License, or
4 | // (at your option) any later version.
5 | //
6 | // This program is distributed in the hope that it will be useful,
7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception. An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 |
32 | #ifndef AVSCORE_MINMAX_H
33 | #define AVSCORE_MINMAX_H
34 |
35 | template
36 | T min(T v1, T v2)
37 | {
38 | return v1 < v2 ? v1 : v2;
39 | }
40 |
41 | template
42 | T max(T v1, T v2)
43 | {
44 | return v1 > v2 ? v1 : v2;
45 | }
46 |
47 | template
48 | T clamp(T n, T min, T max)
49 | {
50 | n = n > max ? max : n;
51 | return n < min ? min : n;
52 | }
53 |
54 | #endif // AVSCORE_MINMAX_H
55 |
--------------------------------------------------------------------------------
/TComb/avs/posix.h:
--------------------------------------------------------------------------------
1 | // This program is free software; you can redistribute it and/or modify
2 | // it under the terms of the GNU General Public License as published by
3 | // the Free Software Foundation; either version 2 of the License, or
4 | // (at your option) any later version.
5 | //
6 | // This program is distributed in the hope that it will be useful,
7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception. An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 |
32 | #ifdef AVS_POSIX
33 | #ifndef AVSCORE_POSIX_H
34 | #define AVSCORE_POSIX_H
35 |
36 | #ifdef __cplusplus
37 | #include
38 | #endif
39 | #include
40 | #include
41 |
42 | // Define these MSVC-extension used in Avisynth
43 | #define __single_inheritance
44 |
45 | // These things don't exist in Linux
46 | #if defined(AVS_HAIKU)
47 | #undef __declspec
48 | #endif
49 | #define __declspec(x)
50 | #define lstrlen strlen
51 | #define lstrcmp strcmp
52 | #define lstrcmpi strcasecmp
53 | #define _stricmp strcasecmp
54 | #define _strnicmp strncasecmp
55 | #define _strdup strdup
56 | #define SetCurrentDirectory(x) chdir(x)
57 | #define SetCurrentDirectoryW(x) chdir(x)
58 | #define GetCurrentDirectoryW(x) getcwd(x)
59 | #define _putenv putenv
60 | #define _alloca alloca
61 |
62 | // Borrowing some compatibility macros from AvxSynth, slightly modified
63 | #define UInt32x32To64(a, b) ((uint64_t)(((uint64_t)((uint32_t)(a))) * ((uint32_t)(b))))
64 | #define Int64ShrlMod32(a, b) ((uint64_t)((uint64_t)(a) >> (b)))
65 | #define Int32x32To64(a, b) ((int64_t)(((int64_t)((long)(a))) * ((long)(b))))
66 |
67 | #define InterlockedIncrement(x) __sync_add_and_fetch((x), 1)
68 | #define InterlockedDecrement(x) __sync_sub_and_fetch((x), 1)
69 | #define MulDiv(nNumber, nNumerator, nDenominator) (int32_t) (((int64_t) (nNumber) * (int64_t) (nNumerator) + (int64_t) ((nDenominator)/2)) / (int64_t) (nDenominator))
70 |
71 | #ifndef TRUE
72 | #define TRUE true
73 | #endif
74 |
75 | #ifndef FALSE
76 | #define FALSE false
77 | #endif
78 |
79 | #define S_FALSE (0x00000001)
80 | #define E_FAIL (0x80004005)
81 | #define FAILED(hr) ((hr) & 0x80000000)
82 | #define SUCCEEDED(hr) (!FAILED(hr))
83 |
84 | // Statuses copied from comments in exception.cpp
85 | #define STATUS_GUARD_PAGE_VIOLATION 0x80000001
86 | #define STATUS_DATATYPE_MISALIGNMENT 0x80000002
87 | #define STATUS_BREAKPOINT 0x80000003
88 | #define STATUS_SINGLE_STEP 0x80000004
89 | #define STATUS_ACCESS_VIOLATION 0xc0000005
90 | #define STATUS_IN_PAGE_ERROR 0xc0000006
91 | #define STATUS_INVALID_HANDLE 0xc0000008
92 | #define STATUS_NO_MEMORY 0xc0000017
93 | #define STATUS_ILLEGAL_INSTRUCTION 0xc000001d
94 | #define STATUS_NONCONTINUABLE_EXCEPTION 0xc0000025
95 | #define STATUS_INVALID_DISPOSITION 0xc0000026
96 | #define STATUS_ARRAY_BOUNDS_EXCEEDED 0xc000008c
97 | #define STATUS_FLOAT_DENORMAL_OPERAND 0xc000008d
98 | #define STATUS_FLOAT_DIVIDE_BY_ZERO 0xc000008e
99 | #define STATUS_FLOAT_INEXACT_RESULT 0xc000008f
100 | #define STATUS_FLOAT_INVALID_OPERATION 0xc0000090
101 | #define STATUS_FLOAT_OVERFLOW 0xc0000091
102 | #define STATUS_FLOAT_STACK_CHECK 0xc0000092
103 | #define STATUS_FLOAT_UNDERFLOW 0xc0000093
104 | #define STATUS_INTEGER_DIVIDE_BY_ZERO 0xc0000094
105 | #define STATUS_INTEGER_OVERFLOW 0xc0000095
106 | #define STATUS_PRIVILEGED_INSTRUCTION 0xc0000096
107 | #define STATUS_STACK_OVERFLOW 0xc00000fd
108 |
109 | // Calling convension
110 | #ifndef AVS_HAIKU
111 | #define __stdcall
112 | #define __cdecl
113 | #endif
114 |
115 | // PowerPC OS X is really niche these days, but this painless equivocation
116 | // of the function/macro names used in posix_get_available_memory()
117 | // is all it takes to let it work. The G5 was 64-bit, and if 10.5 Leopard
118 | // can run in native 64-bit, it probably uses the names in that block as-is.
119 | #ifdef AVS_MACOS
120 | #ifdef PPC32
121 | #define vm_statistics64_data_t vm_statistics_data_t
122 | #define HOST_VM_INFO64_COUNT HOST_VM_INFO_COUNT
123 | #define HOST_VM_INFO64 HOST_VM_INFO
124 | #define host_statistics64 host_statistics
125 | #endif // PPC32
126 | #endif // AVS_MACOS
127 |
128 | #endif // AVSCORE_POSIX_H
129 | #endif // AVS_POSIX
130 |
--------------------------------------------------------------------------------
/TComb/avs/types.h:
--------------------------------------------------------------------------------
1 | // Avisynth C Interface Version 0.20
2 | // Copyright 2003 Kevin Atkinson
3 |
4 | // This program is free software; you can redistribute it and/or modify
5 | // it under the terms of the GNU General Public License as published by
6 | // the Free Software Foundation; either version 2 of the License, or
7 | // (at your option) any later version.
8 | //
9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception. An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 |
33 | #ifndef AVS_TYPES_H
34 | #define AVS_TYPES_H
35 |
36 | // Define all types necessary for interfacing with avisynth.dll
37 | #include
38 | #include
39 | #ifdef __cplusplus
40 | #include
41 | #include
42 | #else
43 | #include
44 | #include
45 | #endif
46 |
47 | // Raster types used by VirtualDub & Avisynth
48 | typedef uint32_t Pixel32;
49 | typedef uint8_t BYTE;
50 |
51 | // Audio Sample information
52 | typedef float SFLOAT;
53 |
54 | #endif //AVS_TYPES_H
55 |
--------------------------------------------------------------------------------
/TComb/avs/win.h:
--------------------------------------------------------------------------------
1 | // This program is free software; you can redistribute it and/or modify
2 | // it under the terms of the GNU General Public License as published by
3 | // the Free Software Foundation; either version 2 of the License, or
4 | // (at your option) any later version.
5 | //
6 | // This program is distributed in the hope that it will be useful,
7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception. An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 |
32 | #ifndef AVSCORE_WIN_H
33 | #define AVSCORE_WIN_H
34 |
35 | // Whenever you need windows headers, start by including this file, then the rest.
36 |
37 | // WWUUT? We require XP now?
38 | #if !defined(NTDDI_VERSION) && !defined(_WIN32_WINNT)
39 | #define NTDDI_VERSION 0x05020000
40 | #define _WIN32_WINNT 0x0502
41 | #endif
42 |
43 | #define WIN32_LEAN_AND_MEAN
44 | #define STRICT
45 | #if !defined(NOMINMAX)
46 | #define NOMINMAX
47 | #endif
48 |
49 | #include
50 |
51 | // Provision for UTF-8 max 4 bytes per code point
52 | #define AVS_MAX_PATH MAX_PATH*4
53 |
54 | #endif // AVSCORE_WIN_H
55 |
--------------------------------------------------------------------------------
/TComb/common.h:
--------------------------------------------------------------------------------
1 | #ifndef __COMMON_H__
2 | #define __COMMON_H__
3 |
4 | #include "avisynth.h"
5 | #include
6 |
7 | #if defined(__clang__)
8 | // Check clang first. clang-cl also defines __MSC_VER
9 | // We set MSVC because they are mostly compatible
10 | # define CLANG
11 | #if defined(_MSC_VER)
12 | # define MSVC
13 | # define TC_FORCEINLINE __attribute__((always_inline))
14 | #else
15 | # define TC_FORCEINLINE __attribute__((always_inline)) inline
16 | #endif
17 | #elif defined(_MSC_VER)
18 | # define MSVC
19 | # define MSVC_PURE
20 | # define TC_FORCEINLINE __forceinline
21 | #elif defined(__GNUC__)
22 | # define GCC
23 | # define TC_FORCEINLINE __attribute__((always_inline)) inline
24 | #else
25 | # error Unsupported compiler.
26 | # define TC_FORCEINLINE inline
27 | # undef __forceinline
28 | # define __forceinline inline
29 | #endif
30 |
31 |
32 | #ifndef _WIN32
33 | #define OutputDebugString(x)
34 | #endif
35 |
36 | #if (defined(GCC) || defined(CLANG)) && !defined(_WIN32)
37 | #include
38 | #define _aligned_malloc(size, alignment) aligned_alloc(alignment, size)
39 | #define _aligned_free(ptr) free(ptr)
40 | #endif
41 |
42 | #ifndef _WIN32
43 | #include
44 | #ifdef AVS_POSIX
45 | #ifndef _POSIX_C_SOURCE
46 | #define _POSIX_C_SOURCE 1
47 | #endif
48 | #include
49 | #endif
50 | #endif
51 |
52 | #endif
53 |
--------------------------------------------------------------------------------
/TComb/resource.h:
--------------------------------------------------------------------------------
1 | //{{NO_DEPENDENCIES}}
2 | // Microsoft Visual C++ generated include file.
3 | // Used by TComb.rc
4 |
5 | // Next default values for new objects
6 | //
7 | #ifdef APSTUDIO_INVOKED
8 | #ifndef APSTUDIO_READONLY_SYMBOLS
9 | #define _APS_NEXT_RESOURCE_VALUE 101
10 | #define _APS_NEXT_COMMAND_VALUE 40001
11 | #define _APS_NEXT_CONTROL_VALUE 1001
12 | #define _APS_NEXT_SYMED_VALUE 101
13 | #endif
14 | #endif
15 |
--------------------------------------------------------------------------------
/cmake_uninstall.cmake.in:
--------------------------------------------------------------------------------
1 | if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
2 | message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
3 | endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
4 |
5 | file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
6 | string(REGEX REPLACE "\n" ";" files "${files}")
7 | foreach(file ${files})
8 | message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
9 | if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
10 | exec_program(
11 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
12 | OUTPUT_VARIABLE rm_out
13 | RETURN_VALUE rm_retval
14 | )
15 | if(NOT "${rm_retval}" STREQUAL 0)
16 | message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
17 | endif(NOT "${rm_retval}" STREQUAL 0)
18 | else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
19 | message(STATUS "File $ENV{DESTDIR}${file} does not exist.")
20 | endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
21 | endforeach(file)
22 |
--------------------------------------------------------------------------------