├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── TComb - ReadMe.txt
├── TComb
    ├── CMakeLists.txt
    ├── Files.cmake
    ├── PlanarFrame.cpp
    ├── PlanarFrame.h
    ├── TComb.cpp
    ├── TComb.h
    ├── TComb.rc
    ├── TComb.sln
    ├── TComb.vcproj
    ├── TComb.vcxproj
    ├── TComb.vcxproj.filters
    ├── TComb_asm.asm
    ├── TComb_asm_x64.asm
    ├── TComb_core.cpp
    ├── avisynth.h
    ├── avs
    │   ├── alignment.h
    │   ├── capi.h
    │   ├── config.h
    │   ├── cpuid.h
    │   ├── filesystem.h
    │   ├── minmax.h
    │   ├── posix.h
    │   ├── types.h
    │   └── win.h
    ├── common.h
    └── resource.h
└── cmake_uninstall.cmake.in


/.gitignore:
--------------------------------------------------------------------------------
  1 | CMakeCache.txt
  2 | CMakeFiles/*
  3 | 
  4 | #cmake generated files
  5 | cmake_install.cmake
  6 | cmake_uninstall.cmake
  7 | generate.stamp
  8 | generate.stamp.depend
  9 | makefile
 10 | 
 11 | #make
 12 | install_manifest.txt
 13 | 
 14 | ## Ignore Visual Studio temporary files, build results, and
 15 | ## files generated by popular Visual Studio add-ons.
 16 | 
 17 | # User-specific files
 18 | *.suo
 19 | *.user
 20 | *.userosscache
 21 | *.sln.docstates
 22 | 
 23 | # User-specific files (MonoDevelop/Xamarin Studio)
 24 | *.userprefs
 25 | 
 26 | # Build results
 27 | [Dd]ebug/
 28 | [Dd]ebugPublic/
 29 | [Rr]elease/
 30 | [Rr]eleases/
 31 | x64/
 32 | x86/
 33 | build/
 34 | bld/
 35 | [Bb]in/
 36 | [Oo]bj/
 37 | 
 38 | # Visual Studo 2015 cache/options directory
 39 | .vs/
 40 | 
 41 | # MSTest test Results
 42 | [Tt]est[Rr]esult*/
 43 | [Bb]uild[Ll]og.*
 44 | 
 45 | # NUNIT
 46 | *.VisualState.xml
 47 | TestResult.xml
 48 | 
 49 | # Build Results of an ATL Project
 50 | [Dd]ebugPS/
 51 | [Rr]eleasePS/
 52 | dlldata.c
 53 | 
 54 | *_i.c
 55 | *_p.c
 56 | *_i.h
 57 | *.ilk
 58 | *.meta
 59 | *.obj
 60 | *.pch
 61 | *.pdb
 62 | *.pgc
 63 | *.pgd
 64 | *.rsp
 65 | *.sbr
 66 | *.tlb
 67 | *.tli
 68 | *.tlh
 69 | *.tmp
 70 | *.tmp_proj
 71 | *.log
 72 | *.vspscc
 73 | *.vssscc
 74 | .builds
 75 | *.pidb
 76 | *.svclog
 77 | *.scc
 78 | 
 79 | # Chutzpah Test files
 80 | _Chutzpah*
 81 | 
 82 | # Visual C++ cache files
 83 | ipch/
 84 | *.aps
 85 | *.ncb
 86 | *.opensdf
 87 | *.sdf
 88 | *.cachefile
 89 | 
 90 | # Visual Studio profiler
 91 | *.psess
 92 | *.vsp
 93 | *.vspx
 94 | 
 95 | # TFS 2012 Local Workspace
 96 | $tf/
 97 | 
 98 | # Guidance Automation Toolkit
 99 | *.gpState
100 | 
101 | # ReSharper is a .NET coding add-in
102 | _ReSharper*/
103 | *.[Rr]e[Ss]harper
104 | *.DotSettings.user
105 | 
106 | # JustCode is a .NET coding addin-in
107 | .JustCode
108 | 
109 | # TeamCity is a build add-in
110 | _TeamCity*
111 | 
112 | # DotCover is a Code Coverage Tool
113 | *.dotCover
114 | 
115 | # NCrunch
116 | _NCrunch_*
117 | .*crunch*.local.xml
118 | 
119 | # MightyMoose
120 | *.mm.*
121 | AutoTest.Net/
122 | 
123 | # Web workbench (sass)
124 | .sass-cache/
125 | 
126 | # Installshield output folder
127 | [Ee]xpress/
128 | 
129 | # DocProject is a documentation generator add-in
130 | DocProject/buildhelp/
131 | DocProject/Help/*.HxT
132 | DocProject/Help/*.HxC
133 | DocProject/Help/*.hhc
134 | DocProject/Help/*.hhk
135 | DocProject/Help/*.hhp
136 | DocProject/Help/Html2
137 | DocProject/Help/html
138 | 
139 | # Click-Once directory
140 | publish/
141 | 
142 | # Publish Web Output
143 | *.[Pp]ublish.xml
144 | *.azurePubxml
145 | # TODO: Comment the next line if you want to checkin your web deploy settings 
146 | # but database connection strings (with potential passwords) will be unencrypted
147 | *.pubxml
148 | *.publishproj
149 | 
150 | # NuGet Packages
151 | *.nupkg
152 | # The packages folder can be ignored because of Package Restore
153 | **/packages/*
154 | # except build/, which is used as an MSBuild target.
155 | !**/packages/build/
156 | # Uncomment if necessary however generally it will be regenerated when needed
157 | #!**/packages/repositories.config
158 | 
159 | # Windows Azure Build Output
160 | csx/
161 | *.build.csdef
162 | 
163 | # Windows Store app package directory
164 | AppPackages/
165 | 
166 | # Others
167 | *.[Cc]ache
168 | ClientBin/
169 | [Ss]tyle[Cc]op.*
170 | ~$*
171 | *~
172 | *.dbmdl
173 | *.dbproj.schemaview
174 | *.pfx
175 | *.publishsettings
176 | node_modules/
177 | bower_components/
178 | 
179 | # RIA/Silverlight projects
180 | Generated_Code/
181 | 
182 | # Backup & report files from converting an old project file
183 | # to a newer Visual Studio version. Backup files are not needed,
184 | # because we have git ;-)
185 | _UpgradeReport_Files/
186 | Backup*/
187 | UpgradeLog*.XML
188 | UpgradeLog*.htm
189 | 
190 | # SQL Server files
191 | *.mdf
192 | *.ldf
193 | 
194 | # Business Intelligence projects
195 | *.rdl.data
196 | *.bim.layout
197 | *.bim_*.settings
198 | 
199 | # Microsoft Fakes
200 | FakesAssemblies/
201 | 
202 | # Node.js Tools for Visual Studio
203 | .ntvs_analysis.dat
204 | 
205 | # Visual Studio 6 build log
206 | *.plg
207 | 
208 | # Visual Studio 6 workspace options file
209 | *.opt
210 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # We need CMake 3.8 at least, because we require
  2 | # CMAKE_CXX_STANDARD to be set to C++17.
  3 | # Visual Studio 2019 is supported from CMake 3.14.1
  4 | # Possible generators:
  5 | # "MinGW Makefiles": MSYS2/Mingw32 GCC 8.3 build
  6 | # "Visual Studio 15 2017" optional platform generator Win32 and x64
  7 | 
  8 | # "Visual Studio 16 2019" optional platform generator Win32 and x64
  9 | # "Visual Studio 16 2019" + LLVM 8.0 (clang) optional platform generator Win32 and x64
 10 | CMAKE_MINIMUM_REQUIRED( VERSION 3.8.2 )
 11 | 
 12 | project("TComb" LANGUAGES CXX)
 13 |   include(GNUInstallDirs)
 14 | 
 15 | # Avoid uselessly linking to unused libraries
 16 | set(CMAKE_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
 17 | set(CMAKE_C_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
 18 | set(CMAKE_CXX_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
 19 | 
 20 | # We require C++17 or higher.
 21 | set(CMAKE_CXX_STANDARD 17)
 22 | set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
 23 | set(CMAKE_CXX_EXTENSIONS FALSE)
 24 | 
 25 | # Detect Intel processors and turn Intel SIMD on or off automatically.
 26 | message("-- Detected target processor as: ${CMAKE_SYSTEM_PROCESSOR}")
 27 | string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" ARCHID)
 28 | if( ("${ARCHID}" STREQUAL "x86") OR
 29 |     ("${ARCHID}" STREQUAL "x64") OR
 30 |     ("${ARCHID}" STREQUAL "i686") OR
 31 |     ("${ARCHID}" STREQUAL "amd64") OR
 32 |     ("${ARCHID}" STREQUAL "x86_64") )
 33 |   set(INTEL_SIMD "ON")
 34 | else()
 35 |   set(INTEL_SIMD "OFF")
 36 | endif()
 37 | 
 38 | option(ENABLE_INTEL_SIMD "Enable SIMD intrinsics for Intel processors" "${INTEL_SIMD}")
 39 | 
 40 | if(CMAKE_CONFIGURATION_TYPES)
 41 |   set(CMAKE_CONFIGURATION_TYPES Debug Release RelWithDebInfo)
 42 |   set(CMAKE_CONFIGURATION_TYPES "${CMAKE_CONFIGURATION_TYPES}" CACHE STRING "Reset the configurations to what we need" FORCE)
 43 | endif()
 44 | 
 45 | IF( MSVC )  # Check for Visual Studio
 46 |   # We do not allow creating Visual Studio solutions, existing .sln file contains 
 47 |   # all x86/x64 versions of MSVC and LLVM builds.
 48 |   MESSAGE(FATAL_ERROR "Please use the existing sln file both for MS VC and also for LLVM toolset in VS")
 49 |   # anyway we keep all things below
 50 |   # ** not tested **
 51 | 
 52 | 
 53 | 
 54 | 
 55 |   #1910-1919 = VS 15.0 (v141 toolset) Visual Studio 2017
 56 |   #1920      = VS 16.0 (v142 toolset) Visual Studio 2019
 57 | 
 58 |   IF( MSVC_VERSION VERSION_LESS 1910 )
 59 |     MESSAGE(FATAL_ERROR "Visual C++ 2017 or newer required.")
 60 |   ENDIF()
 61 | 
 62 |   IF(MSVC_IDE)
 63 |     message("Reported CMAKE_GENERATOR_TOOLSET is: ${CMAKE_GENERATOR_TOOLSET}")
 64 |     
 65 |       # For LLVM Clang installed separately, specify llvm or LLVM
 66 |       # Since Visual Studio 2019 v16.4, LLVM 9.0 is integrated, for this use Toolset: ClangCL
 67 |       IF(CMAKE_GENERATOR_TOOLSET STREQUAL "LLVM" OR CMAKE_GENERATOR_TOOLSET STREQUAL "llvm" OR CMAKE_GENERATOR_TOOLSET STREQUAL "ClangCL")
 68 |         if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")  # hope: always
 69 |           message("LLVM toolset was specified via -T. Compiler ID is: ${CMAKE_CXX_COMPILER_ID}; CMAKE_CXX_COMPILER_VERSION is: ${CMAKE_CXX_COMPILER_VERSION}")
 70 |           # Clang; 9.0.0
 71 |           # These are probably not supported when clang is downloaded as a ready-made binary: CLANG_VERSION_MAJOR CLANG_VERSION_MINOR CLANG_VERSION_STRING
 72 |           # string (REGEX REPLACE ".*clang version ([0-9]+\\.[0-9]+).*" "\\1" CLANG_VERSION_STRING ${clang_full_version_string})
 73 |           if( CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.1 )
 74 |             MESSAGE(FATAL_ERROR "Clang 7.0.1 or newer required") # as of 2019.december actually we are using 9.0
 75 |           endif()
 76 |         endif()
 77 |         set(CLANG_IN_VS "1")
 78 |       ELSEIF(CMAKE_GENERATOR_TOOLSET STREQUAL "v141_clang_c2")
 79 |          #1900 is reported
 80 |         message("v141_clang_c2 toolset was specified via -T. Reported MSVC_VERSION is: ${MSVC_VERSION}")
 81 |         message("May not work, try LLVM")
 82 |         set(CLANG_IN_VS "1")
 83 |       ENDIF()
 84 | 
 85 |       option(WINXP_SUPPORT "Make binaries compatible with Windows XP and Vista" OFF)
 86 |       if(WINXP_SUPPORT)
 87 |         # We want our project to also run on Windows XP
 88 |         # Not for LLVM: Clang stopped XP support in 2016
 89 |         # 1900 (VS2015) is not supported but we leave here
 90 |         IF(MSVC_VERSION VERSION_LESS 1910 )
 91 |           IF(NOT CLANG_IN_VS STREQUAL "1")
 92 |             set(CMAKE_GENERATOR_TOOLSET "v140_xp" CACHE STRING "The compiler toolset to use for Visual Studio." FORCE) # VS2015
 93 |             # https://connect.microsoft.com/VisualStudio/feedback/details/1789709/visual-c-2015-runtime-broken-on-windows-server-2003-c-11-magic-statics
 94 |             message("CMAKE_GENERATOR_TOOLSET is forced to: ${CMAKE_GENERATOR_TOOLSET}")
 95 |             add_definitions("/Zc:threadSafeInit-")
 96 |           ENDIF()
 97 |         ELSE()
 98 |           IF(NOT CLANG_IN_VS STREQUAL "1")
 99 |             set(CMAKE_GENERATOR_TOOLSET "v141_xp" CACHE STRING "The compiler toolset to use for Visual Studio." FORCE) # VS2017, also choosable for VS2019
100 |             # https://connect.microsoft.com/VisualStudio/feedback/details/1789709/visual-c-2015-runtime-broken-on-windows-server-2003-c-11-magic-statics
101 |             message("CMAKE_GENERATOR_TOOLSET is forced to: ${CMAKE_GENERATOR_TOOLSET}")
102 |             add_definitions("/Zc:threadSafeInit-")
103 |           ENDIF()
104 |         ENDIF()
105 |       endif()
106 |   ENDIF()
107 | 
108 |   IF(CLANG_IN_VS STREQUAL "1")
109 |       #these are unknown
110 |       #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexceptions")
111 |       #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
112 |       STRING( REPLACE "/EHsc" "/EHa" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
113 |       STRING( REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
114 |       set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-inconsistent-missing-override")
115 |       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-inconsistent-missing-override")
116 |   ELSE()
117 |       # Enable C++ with SEH exceptions
118 |       # Avoid an obnoxious 'overrriding /EHsc with /EHa' warning when
119 |       # using something other than MSBuild
120 |       STRING( REPLACE "/EHsc" "/EHa" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
121 |       STRING( REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
122 |   ENDIF()
123 |   # Prevent VC++ from complaining about not using MS-specific functions
124 |   add_definitions("/D _CRT_SECURE_NO_WARNINGS /D _SECURE_SCL=0")
125 |   
126 |   # Enable CRT heap debugging - only effective in debug builds
127 |   add_definitions("/D _CRTDBG_MAP_ALLOC")
128 | 
129 |   # Set additional optimization flags
130 |   set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Oy /Ot /GS- /Oi")
131 |   set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oy /Ot /GS- /Oi")
132 | 
133 |   # CPU_ARCH can be overridden with the corresponding values when using MSVC:
134 |   # IA32 (disabled),
135 |   # SSE (Pentium III and higher, 1999),
136 |   # SSE2 (Pentium 4 and higher, 2000/2001),
137 |   # AVX (Sandy Bridge and higher, 2011),
138 |   # AVX2 (Haswell and higher, 2013)
139 |   set(MSVC_CPU_ARCH "SSE2" CACHE STRING "Set MSVC architecture optimization level (default: SSE2)")
140 | 
141 |   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:${MSVC_CPU_ARCH}")
142 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:${MSVC_CPU_ARCH}")
143 | 
144 |   if(CMAKE_SIZEOF_VOID_P EQUAL 8)
145 |   # MSVC doesn't allow 64-bit builds to have their /arch set to SSE2 (no-op) or below
146 |     if("${MSVC_CPU_ARCH}" MATCHES "(IA32|SSE|SSE2)")
147 |       set(DELETE_THIS "/arch:${MSVC_CPU_ARCH}")
148 |       message("MSVC doesn't allow x86-64 builds to define /arch:${MSVC_CPU_ARCH}. Setting will be ignored.")
149 |       STRING( REPLACE "${DELETE_THIS}" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
150 |       STRING( REPLACE "${DELETE_THIS}" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
151 |     endif()
152 |   endif()
153 | 
154 |   IF(CLANG_IN_VS STREQUAL "1")
155 |       # suppress other frequent but harmless/unavoidable warnings
156 |       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-function")
157 |       set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function")
158 |       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-reorder")
159 |       set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-reorder")
160 |       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-value")
161 |     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-value")
162 |     # allow per-function attributes like __attribute__((__target__("sse4.1")))
163 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-gcc-compat")
164 |     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-gcc-compat")
165 |   ENDIF()
166 | 
167 | # Set C++17 flag
168 |   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /std:c++17")
169 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++17")
170 | 
171 |   # Enable standards-conformance mode for MSVC compilers that support this
172 |   # flag (Visual C++ 2017 and later).
173 |   if (NOT (MSVC_VERSION LESS 1910))
174 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /permissive-")
175 |     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /permissive-")
176 |   endif()
177 | 
178 |   if(ENABLE_INTEL_SIMD)
179 |     add_definitions("/D INTEL_INTRINSICS")
180 |   endif()
181 | 
182 | ELSE()
183 | 
184 |   if(ENABLE_INTEL_SIMD)
185 |     SET( CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -msse2 -DINTEL_INTRINSICS" )
186 |   endif()
187 | 
188 |   if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
189 |     SET( CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wno-format-security" )
190 |   endif()
191 | 
192 |   IF(WIN32)
193 |     SET( CMAKE_SHARED_LINKER_FLAGS "-Wl,--enable-stdcall-fixup" )
194 |   ELSE()
195 |     if(APPLE)
196 |       # macOS uses Clang's linker, doesn't like --no-undefined
197 |       SET( CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-undefined,error" )
198 |     else()
199 |       if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
200 |         # make sure there are no undefined symbols
201 |         SET( CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined" )
202 |       endif()
203 |     endif()
204 |   ENDIF()
205 | ENDIF()
206 | 
207 | IF(ENABLE_INTEL_SIMD)
208 |   message("Intel SIMD enabled")
209 | ELSE()
210 |   message("Intel SIMD disabled")
211 | ENDIF()
212 | 
213 | add_subdirectory("TComb")
214 | 
215 | # uninstall target
216 | configure_file(
217 |     "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in"
218 |     "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake"
219 |     IMMEDIATE @ONLY)
220 | 
221 | add_custom_target(uninstall
222 |     COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
223 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 
341 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TComb
 2 | 
 3 | This is an update to tritical's TComb v2.0 Beta 2 moving it from beta to release as it encompasses all the changes in tritical's To-Do-List.
 4 | 
 5 | ### Requirements
 6 | 
 7 | This filter requires AviSynth 2.6.0 or AviSynth+ as well as the Visual C++ Redistributable Package for Visual Studio 2015-19.
 8 | 
 9 | ### Syntax and Parameters
10 | 
11 | The syntax and parameters are identical to the original TComb with the exception of the "opt" parameter. To see a list refer to this [link](http://avisynth.nl/index.php/TComb).
12 | 
13 | ### Changes
14 | 
15 | In 2015 Elegant made many changes when updating TComb in order to improve speed (see full changelog for more details):
16 | 
17 | * Removed buffering of frames/info that weren't actually used
18 | * Switched to AVS 2.6 API
19 | * Added x64 support which also utilizes SSE2
20 | * Restructured debug and error messages
21 | * Removed MMX/ISSE support
22 | * Removed/changed "opt" parameter
23 | 
24 | In 2021 came a general bugfix release by pinterf.
25 | Added linux port, the missing 8 bit Y and YUV formats, external assembler was rewritten in SIMD intrinsics.
26 | 
27 | ### Programmer Notes
28 | 
29 | This program was compiled using Visual Studio 2019 and falls under the GNU General Public License.
30 | 
31 | I (Elegant) would like to thank jpsdr and dubhater for their work on nnedi3 and the VapourSynth version of TComb (respectively). Their work led to the port of this project.
32 | I'd also like to thank the masm32 community who were very helpful as I explored assembly.
33 | 
34 | Build instructions
35 | ==================
36 | VS2019: 
37 |   use IDE
38 | 
39 | Windows GCC (mingw installed by msys2):
40 |   from the 'build' folder under project root:
41 | 
42 |   del ..\CMakeCache.txt
43 |   cmake .. -G "MinGW Makefiles" -DENABLE_INTEL_SIMD:bool=on
44 |   @rem test: cmake .. -G "MinGW Makefiles" -DENABLE_INTEL_SIMD:bool=off
45 |   cmake --build . --config Release  
46 | 
47 | Linux
48 |   note: ENABLE_INTEL_SIMD is automatically off for non x86 arhitectures
49 |   
50 | * Clone repo and build
51 |     
52 |         git clone https://github.com/pinterf/TComb
53 |         cd TComb
54 |         cmake -B build -S .
55 |         cmake --build build
56 | 
57 |   Useful hints:        
58 |    build after clean:
59 | 
60 |         cmake --build build --clean-first
61 | 
62 |    Force no asm support
63 | 
64 |         cmake -B build -S . -DENABLE_INTEL_SIMD:bool=off
65 | 
66 |    delete cmake cache
67 | 
68 |         rm build/CMakeCache.txt
69 | 
70 | * Find binaries at
71 |     
72 |         build/TComb/libtcomb.so
73 | 
74 | * Install binaries
75 | 
76 |         cd build
77 |         sudo make install
78 |   
79 | 


--------------------------------------------------------------------------------
/TComb - ReadMe.txt:
--------------------------------------------------------------------------------
  1 |                                                                                                       |
  2 |                                          TComb for AviSynth                                           |
  3 |                                        v2.3 (24 February 2021)                                        |
  4 |                                            by tritical                                                |
  5 |                                    modified by Elegant (v2.0; 17 July 2015)                           |
  6 |                                        additional work by pinterf                                     |
  7 |                                                                                                       |
  8 |                                             HELP FILE                                                 |
  9 | -------------------------------------------------------------------------------------------------------
 10 | -------------------------------------------------------------------------------------------------------
 11 | 
 12 | 
 13 | INFO:
 14 | 
 15 | 
 16 |       TComb is a temporal comb filter (it reduces cross-luminance (rainbowing) and cross-chrominance
 17 |    (dot crawl) artifacts in static areas of the picture).  It will ONLY work with NTSC material, and
 18 |    WILL NOT work with telecined material where the rainbowing/dotcrawl was introduced prior to the
 19 |    telecine process!  It must be used before ivtc or deinterlace in order to work.  In terms of what
 20 |    it does it is similar to guavacomb/dedot.
 21 | 
 22 |       TComb currently supports Y8, YV12, YV16, YV24, YV411 and YUY2 colorspaces.
 23 | 
 24 |       TComb does support seeking... that is, jumping to a random frame will produce the same result
 25 |    as if you had linearly run up to that frame.  For dot crawl removal tcomb requires at least 3
 26 |    static fields of the same parity and for rainbow removal tcomb requires at least 5 static fields
 27 |    of the same parity.
 28 | 
 29 | 
 30 |    Syntax =>
 31 | 
 32 |       TComb(int mode, int fthreshL, int fthreshC, int othreshL, int othreshC, bool map,
 33 |               double scthresh, bool debug, int opt)
 34 | 
 35 | 
 36 | 
 37 | PARAMETERS:
 38 | 
 39 | 
 40 |    mode - (limit processing to luma or chroma only)
 41 | 
 42 |        Controls whether both luma/chroma are processed or only one or the other.  Possible settings:
 43 | 
 44 |            0 - process luma only    (dot crawl removal)
 45 |            1 - process chroma only  (rainbow removal)
 46 |            2 - process both
 47 | 
 48 |        For greyscale clips mode=0 is used regardless the settings
 49 | 
 50 |        default:  2  (int)
 51 | 
 52 | 
 53 |    fthreshL/fthreshC - (filtered pixel correlation thresholds)
 54 | 
 55 |        One of the things TComb checks for is correlation between filtered values over the length
 56 |        of the filtering window.  If all values differ by less than fthreshL (for luma) or fthreshC
 57 |        (for chroma) then the filtered values are considered to be correlated.  Larger values will
 58 |        allow more filtering (will be more effective at removing rainbowing/dot crawl), but will also
 59 |        create more artifacts.  Smaller values will produce less artifacts, but will be less effective
 60 |        in removing rainbowing/dot crawl. A good range of values is between 4 and 7.
 61 | 
 62 |        default:  fthreshL -> 4  (int)
 63 |                  fthreshC -> 5
 64 | 
 65 | 
 66 |    othreshL/othreshC - (original pixel correlation thresholds)
 67 | 
 68 |        One of the things TComb checks for is correlation between original pixel values from every
 69 |        other field of the same parity.  Due to the oscillation period, these values should be equal
 70 |        or very similar in static areas containing dot crawl or rainbowing.  If the pixel values
 71 |        differ by less than othreshL (for luma) or othreshC (for chroma) then the pixels are considered
 72 |        to be correlated.  Larger values will allow more filtering (will be more effective at removing
 73 |        rainbowing/dotcrawl), but will also create more artifacts.  Smaller values will produce less
 74 |        artifacts, but will be less effective in removing rainbowing/dotcrawl. A good range of values
 75 |        is between 4 and 8.
 76 | 
 77 |        default:  othreshL -> 5  (int)
 78 |                  othreshC -> 6
 79 | 
 80 | 
 81 |    map -
 82 | 
 83 |        Identifies pixels that are being replaced with filtered values.  Each pixel in the output
 84 |        frame will have one of the following values indicating how it is being filtered:
 85 | 
 86 |             0 - not being filtered
 87 |            85 - [1 2 1] average of (n,n+1,n+2)
 88 |           170 - [1 2 1] average of (n-2,n-1,n)
 89 |           255 - [1 2 1] average of (n-1,n,n+1)
 90 | 
 91 |            ** n = current frame
 92 | 
 93 |        default:  false  (bool)
 94 | 
 95 | 
 96 |    scthresh - (scenechange threshold)
 97 | 
 98 |        Sets the scenechange detection threshold as a percentage of maximum change on the luma
 99 |        plane.  Use the debug output to see which frames are detected as scenechanges and the
100 |        scenechange statistics.
101 | 
102 |        default:  12.0  (float)
103 | 
104 | 
105 |    debug -
106 | 
107 |        Will enable debug output.  The only thing it shows are the scenechange stats.  The info
108 |        is output via OutputDebugString().  You can use the utility "DebugView" from sysinternals
109 |        to view the output.  The frame numbers in the debug output correspond to the input clip
110 |        after a separatefields() call.  TComb internally invokes separatefields() before itself
111 |        and weave() after itself.
112 | 
113 |        default:  false  (bool)
114 | 
115 |    opt - (another debug parameter: CPU)
116 | 
117 |        0: C only (no assembly at all)
118 |        other: automatically choose SSE2 or C
119 | 
120 |        For development use: opt parameters can appear/disappear/change their meaning between versions
121 | 
122 |        default:  -1 (int)
123 | 
124 | 
125 | BASIC SETUP/USAGE:
126 | 
127 | 
128 |    Setting up TComb is pretty simple.  The only values that would ever really need adjusting
129 |    are fthreshL/fthreshC, othreshL/othreshC, and mode.
130 | 
131 |    Set mode to 0 if you want to do dot crawl removal only, set it to 1 if you want to
132 |    do rainbow removal only, or set it to 2 to do both.
133 | 
134 |    Dot Crawl Removal Tweaking (fthreshL/othreshL):
135 | 
136 |       To find good values for fthreshL/othreshL, start with the following line:
137 | 
138 |              tcomb(mode=0,fthreshL=255,othreshL=255)
139 | 
140 |       Now, keep othreshL at 255 but set fthreshL down to 1.  Keep increasing fthreshL
141 |       in steps of 1 to 2 until you find the point at which all dot crawl is removed.
142 |       Remember that value.  Next, set fthreshL back to 255, and set othreshL to 1.
143 |       Now, increase othreshL in steps of 1 or 2 until you find the point at which all
144 |       dot crawl is removed.  You've now got values for fthreshL/othreshL.
145 | 
146 |    Rainbowing Removal Tweaking (fthreshC/othreshC):
147 | 
148 |       To find good values for fthreshC/othreshC, start with the following line:
149 | 
150 |              tcomb(mode=1,fthreshC=255,othreshC=255)
151 | 
152 |       Now, keep othreshC at 255 but set fthreshC down to 1.  Keep increasing fthreshC
153 |       in steps of 1 to 2 until you find the point at which all (or most) rainbowing is
154 |       removed.  Remember that value.  Next, set fthreshC back to 255, and set othreshC
155 |       to 1. Now, increase othreshC in steps of 1 or 2 until you find the point at which
156 |       all (or most) rainbowing is removed.  You've now got values for fthreshC/othreshC.
157 | 
158 |    Once you've got values for mode, fthreshL/fthreshC, and othreshL/othreshC, add the
159 |    necessary tcomb() line into your script and run through part of it.  If you see any
160 |    artifacts try lowering your fthresh/othresh values.
161 | 
162 | 
163 | 
164 | CHANGE LIST:
165 | 
166 | ** v2.3 (20210224 pinterf)**
167 | - Y8, YV16, YV24, YV411 support
168 | 
169 | ** v2.2 (20210223 pinterf)**
170 | - Fix: unsave register x64 assembler causing artifacts
171 | - Drop all external asm
172 | - Rewrite assembler in SIMD intrinsics (old stuff is not removed yet, only conditionally ignored)
173 | - Add CMake build system
174 | - Add MinGW/gcc support 
175 | - Add linux support (with ENABLE_INTEL_SIMD=off option as well)
176 | - Add build instructions to README.md
177 | 
178 | ** v2.1 (20210222 pinterf)**
179 | - project forked to https://github.com/pinterf/TComb/ 
180 | - param 'opt' is back for debug. 0 means pure C code
181 | - Fix bug in x64 assembler buildFinalMask_SSE2
182 | - Fix crash in 32bit version of VerticalBlur3_SSE2
183 | - Fix: scenechange SSE2 did not work
184 | - Fix: x64 assembler HorizontalBlur6_SSE2
185 | - Fix: HorizontalBlur6: C only did top 2 lines. SSE2 bad top 2 lines
186 | - Fix: HorizontalBlur3_SSE2 artifacts (both x86 and x64)
187 | - Fix: HorizontalBlur3_SSE2 missing rounder (both x86 and x64) (now C and SSE2 is giving identical results)
188 | - Code:
189 |   - Update to Visual Studio 2019
190 |   - update to actual Avisynth+ headers
191 |   - clang-friendly code
192 |   - removed memcpy and bitblt variants
193 |   - replaced planarframes module with the one I updated in tivtc project for Avisynth+ and hbd preparation    
194 |   - Fix debug build configuration in VS project settings
195 | 
196 | ** v2.0.0.1 (20150726 Elegant)**
197 | - Corrected the masks used in HorizontalBlur6 for x64.
198 | 
199 | ** v2.0 (20150717 Elegant)**
200 | 
201 | - Removed buffering of frames/info that weren't actually used (was there for
202 |   development/testing purposes). Should save a lot of RAM usage.
203 | - Switched to AVS 2.6 API since AviSynth 2.6.0 was released.
204 | - Added x64 support which also utilizes SSE2. This also includes some missing SSE2 functions (andNeighborsInPlace_SSE2).
205 | - Restructured debug and error messages so that it was apparent that TComb was responsible.
206 | - Removed MMX/ISSE support as times have changed and the support was not going to be carried over to x64.
207 | - Removed "opt" parameter. TComb will now use SSE2 if available and will fallback on C++ if it is not supported.
208 | 
209 | End of tritical version history
210 | ------------------------------------------------------------------------
211 | 
212 |    05/16/2006  v2.0 Beta 2
213 | 
214 |        + Stricter checking of othreshL/othreshC when looking for oscillation
215 |        + For dot crawl detection require at least one vertical neighbor (y-1/y+1, x-1/x/x+1)
216 |        - fixed possible crash with yuy2 input (sse2 planar<->packed conversions)
217 | 
218 | 
219 |    03/31/2006  v2.0 Beta 1
220 | 
221 |        - complete rewrite
222 | 
223 | 
224 |    06/24/2005  v0.9.0
225 | 
226 |        - Initial Release
227 | 	   
228 | 
229 | 
230 | contact:    GitHub (@Elegant996)
231 | 


--------------------------------------------------------------------------------
/TComb/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # Visual Studio 2019 is supported from CMake 3.14.1
  2 | # Tested generators:
  3 | # "MinGW Makefiles": MSYS2/Mingw32 GCC 8.3 build
  4 | # "Visual Studio 16 2019" optional platform generator Win32 and x64
  5 | # "Visual Studio 16 2019" + LLVM 8.0 (clang) optional platform generator Win32 and x64
  6 | CMAKE_MINIMUM_REQUIRED( VERSION 3.8.2 )
  7 | 
  8 | set(PluginName "TComb")
  9 | 
 10 | if (NOT WIN32)
 11 |   string(TOLOWER "${PluginName}" PluginName)
 12 | endif()
 13 | 
 14 | set(ProjectName "${PluginName}")
 15 | project(${ProjectName} LANGUAGES CXX)
 16 | 
 17 | Include("Files.cmake")
 18 | 
 19 | add_library(${PluginName} SHARED ${TComb_Sources})
 20 | 
 21 | set_target_properties(${PluginName} PROPERTIES "OUTPUT_NAME" "${PluginName}")
 22 | if (MINGW)
 23 |   set_target_properties(${PluginName} PROPERTIES PREFIX "")
 24 |   set_target_properties(${PluginName} PROPERTIES IMPORT_PREFIX "")
 25 | endif()
 26 | 
 27 | IF(ENABLE_INTEL_SIMD)
 28 |   #require sse2, some other plugins may need to set sse4.1 for quick msvc->gcc porting
 29 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DINTEL_INTRINSICS -msse2")
 30 | ENDIF()
 31 | 
 32 | # Automatically group source files according to directory structure
 33 | foreach(FILE ${TComb_Sources}) 
 34 |   get_filename_component(PARENT_DIR "${FILE}" PATH)
 35 | 
 36 |   string(REGEX REPLACE "(\\./)" "" GROUP "${PARENT_DIR}")
 37 |   string(REPLACE "/" "\\" GROUP "${GROUP}")
 38 | 
 39 |   # group into "Source Files" and "Header Files"
 40 |   if ("${FILE}" MATCHES ".*\\.cpp")
 41 |     set(GROUP "Source Files\\${GROUP}")
 42 |   elseif("${FILE}" MATCHES ".*\\.h")
 43 |     set(GROUP "Header Files\\${GROUP}")
 44 |   elseif("${FILE}" MATCHES ".*\\.asm")
 45 |     set(GROUP "Assembler Files\\${GROUP}")
 46 |   endif()
 47 | 
 48 |   source_group("${GROUP}" FILES "${FILE}")
 49 | endforeach()
 50 | 
 51 | if (MSVC_IDE)
 52 |   IF(CLANG_IN_VS STREQUAL "1")
 53 |       # special SSSE3 option for source files with *_ssse3.cpp pattern
 54 |       file(GLOB_RECURSE SRCS_SSSE3 "*_ssse3.cpp")
 55 |       set_source_files_properties(${SRCS_SSSE3} PROPERTIES COMPILE_FLAGS " -mssse3 ")
 56 | 
 57 |       # special SSE4.1 option for source files with *_sse41.cpp pattern
 58 |       file(GLOB_RECURSE SRCS_SSE41 "*_sse41.cpp")
 59 |       set_source_files_properties(${SRCS_SSE41} PROPERTIES COMPILE_FLAGS " -msse4.1 ")
 60 | 
 61 |       # special AVX option for source files with *_avx.cpp pattern
 62 |       file(GLOB_RECURSE SRCS_AVX "*_avx.cpp")
 63 |       set_source_files_properties(${SRCS_AVX} PROPERTIES COMPILE_FLAGS " -mavx ")
 64 | 
 65 |       # special AVX2 option for source files with *_avx2.cpp pattern
 66 |       file(GLOB_RECURSE SRCS_AVX2 "*_avx2.cpp")
 67 |       set_source_files_properties(${SRCS_AVX2} PROPERTIES COMPILE_FLAGS " -mavx2 -mfma ")
 68 | 
 69 |       # special AVX512 option for source files with *_avx512.cpp pattern
 70 |       file(GLOB_RECURSE SRCS_AVX512 "*_avx512.cpp")
 71 |       set_source_files_properties(${SRCS_AVX512} PROPERTIES COMPILE_FLAGS " -mavx512f -mavx512bw ")
 72 |   ELSE()
 73 |       # special AVX option for source files with *_avx.cpp pattern
 74 |       file(GLOB_RECURSE SRCS_AVX "*_avx.cpp")
 75 |       set_source_files_properties(${SRCS_AVX} PROPERTIES COMPILE_FLAGS " /arch:AVX ")
 76 | 
 77 |       # special AVX2 option for source files with *_avx2.cpp pattern
 78 |       file(GLOB_RECURSE SRCS_AVX2 "*_avx2.cpp")
 79 |       set_source_files_properties(${SRCS_AVX2} PROPERTIES COMPILE_FLAGS " /arch:AVX2 ")
 80 | 
 81 |       # special AVX512 option for source files with *_avx512.cpp pattern
 82 |       file(GLOB_RECURSE SRCS_AVX512 "*_avx512.cpp")
 83 |       set_source_files_properties(${SRCS_AVX512} PROPERTIES COMPILE_FLAGS " /arch:AVX512 ")
 84 |   ENDIF()
 85 | else()
 86 |   # special SSSE3 option for source files with *_ssse3.cpp pattern
 87 |   file(GLOB_RECURSE SRCS_SSSE3 "*_ssse3.cpp")
 88 |   set_source_files_properties(${SRCS_SSSE3} PROPERTIES COMPILE_FLAGS " -mssse3 ")
 89 | 
 90 |   # special SSE4.1 option for source files with *_sse41.cpp pattern
 91 |   file(GLOB_RECURSE SRCS_SSE41 "*_sse41.cpp")
 92 |   set_source_files_properties(${SRCS_SSE41} PROPERTIES COMPILE_FLAGS " -msse4.1 ")
 93 | 
 94 |   # special AVX option for source files with *_avx.cpp pattern
 95 |   file(GLOB_RECURSE SRCS_AVX "*_avx.cpp")
 96 |   set_source_files_properties(${SRCS_AVX} PROPERTIES COMPILE_FLAGS " -mavx ")
 97 | 
 98 |   # special AVX2 option for source files with *_avx2.cpp pattern
 99 |   file(GLOB_RECURSE SRCS_AVX2 "*_avx2.cpp")
100 |   set_source_files_properties(${SRCS_AVX2} PROPERTIES COMPILE_FLAGS " -mavx2 -mfma ")
101 | 
102 |   # special AVX512 option for source files with *_avx512.cpp pattern
103 |   file(GLOB_RECURSE SRCS_AVX512 "*_avx512.cpp")
104 |   set_source_files_properties(${SRCS_AVX512} PROPERTIES COMPILE_FLAGS " -mavx512f -mavx512bw ")
105 | endif()
106 | 
107 | 
108 | # Specify include directories
109 | target_include_directories(${ProjectName} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
110 | #dedicated include dir for avisynth.h
111 | #target_include_directories(${ProjectName} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
112 | 
113 | # Windows DLL dependencies 
114 | if (MSVC OR MINGW)
115 |   target_link_libraries(${ProjectName} "uuid" "winmm" "vfw32" "msacm32" "gdi32" "user32" "advapi32" "ole32" "imagehlp")
116 | else()
117 |   #non Windows
118 |   target_link_libraries(${ProjectName})
119 |   # "pthread"  "dl"
120 | endif()
121 | 
122 | include(GNUInstallDirs)
123 | 
124 | INSTALL(TARGETS ${ProjectName}
125 |         LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/avisynth")
126 | 


--------------------------------------------------------------------------------
/TComb/Files.cmake:
--------------------------------------------------------------------------------
 1 | FILE(GLOB TComb_Sources RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
 2 |   "*.c"
 3 |   "*.cpp"
 4 |   "*.hpp"
 5 |   "*.h"
 6 | 
 7 |   "avs/*.h"
 8 | )
 9 | 
10 | IF( MSVC OR MINGW )
11 |     # Export definitions in general are not needed on x64 and only cause warnings,
12 |     # unfortunately we still must need a .def file for some COM functions.
13 |     # NO C interface for this plugin
14 |     # if(CMAKE_SIZEOF_VOID_P EQUAL 8)
15 |     #  LIST(APPEND TComb_Sources "TComb64.def")
16 |     # else()
17 |     #  LIST(APPEND TComb_Sources "TComb.def")
18 |     # endif() 
19 | ENDIF()
20 | 
21 | IF( MSVC_IDE )
22 |     # Ninja, unfortunately, seems to have some issues with using rc.exe
23 |     LIST(APPEND TComb_Sources "TComb.rc")
24 | ENDIF()
25 | 


--------------------------------------------------------------------------------
/TComb/PlanarFrame.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | **   My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar
  3 | **   to packed conversions, and always gives 16 bit alignment for all
  4 | **   planes.  Supports YV12/YUY2 frames from avisynth, can do any planar format
  5 | **   internally.
  6 | **
  7 | **   Copyright (C) 2005-2006 Kevin Stone
  8 | **
  9 | **   This program is free software; you can redistribute it and/or modify
 10 | **   it under the terms of the GNU General Public License as published by
 11 | **   the Free Software Foundation; either version 2 of the License, or
 12 | **   (at your option) any later version.
 13 | **
 14 | **   This program is distributed in the hope that it will be useful,
 15 | **   but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | **   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | **   GNU General Public License for more details.
 18 | **
 19 | **   You should have received a copy of the GNU General Public License
 20 | **   along with this program; if not, write to the Free Software
 21 | **   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 22 | */
 23 | 
 24 | #include "PlanarFrame.h"
 25 | #include "avs/cpuid.h"
 26 | #include "common.h"
 27 | #include <stdint.h>
 28 | #ifdef INTEL_INTRINSICS
 29 | #include <emmintrin.h>
 30 | #endif
 31 | 
 32 | // 8 bits only!!!
 33 | 
 34 | PlanarFrame::PlanarFrame(int cpuFlags)
 35 | {
 36 |   ypitch = uvpitch = 0;
 37 |   ywidth = uvwidth = 0;
 38 |   yheight = uvheight = 0;
 39 |   y = u = v = NULL;
 40 |   useSIMD = true;
 41 |   packed = false;
 42 |   cpu = cpuFlags;
 43 | }
 44 | 
 45 | PlanarFrame::PlanarFrame(VideoInfo &viInfo, int cpuFlags)
 46 | {
 47 |   ypitch = uvpitch = 0;
 48 |   ywidth = uvwidth = 0;
 49 |   yheight = uvheight = 0;
 50 |   y = u = v = NULL;
 51 |   useSIMD = true;
 52 |   packed = false;
 53 |   cpu = cpuFlags;
 54 |   allocSpace(viInfo);
 55 | }
 56 | 
 57 | PlanarFrame::PlanarFrame(VideoInfo &viInfo, bool _packed, int cpuFlags)
 58 | {
 59 |   ypitch = uvpitch = 0;
 60 |   ywidth = uvwidth = 0;
 61 |   yheight = uvheight = 0;
 62 |   y = u = v = NULL;
 63 |   useSIMD = true;
 64 |   packed = _packed;
 65 |   cpu = cpuFlags;
 66 |   allocSpace(viInfo);
 67 | }
 68 | 
 69 | PlanarFrame::~PlanarFrame()
 70 | {
 71 |   if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; }
 72 |   if (u != NULL) { _aligned_free(u); u = NULL; }
 73 |   if (v != NULL) { _aligned_free(v); v = NULL; }
 74 | }
 75 | 
 76 | void PlanarFrame::FillMemDebug()
 77 | {
 78 |   if (!debug) return;
 79 |   // MIN_ALIGNMENT bytes before
 80 |   uint32_t* pInt = (uint32_t*)(y);
 81 |   for (int i = 0; i < MIN_ALIGNMENT / sizeof(uint32_t); i++)
 82 |     pInt[i] = 0xDEADBEEF;
 83 |   // MIN_ALIGNMENT bytes after
 84 |   pInt = (uint32_t*)(y + MIN_ALIGNMENT + ypitch * yheight);
 85 |   for (int i = 0; i < MIN_ALIGNMENT / sizeof(uint32_t); i++)
 86 |     pInt[i] = 0xDEADBEEF;
 87 |   y = y + MIN_ALIGNMENT; // our real pointer after guard area
 88 | }
 89 | 
 90 | bool PlanarFrame::allocSpace(VideoInfo &viInfo)
 91 | {
 92 |   if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; }
 93 |   if (u != NULL) { _aligned_free(u); u = NULL; }
 94 |   if (v != NULL) { _aligned_free(v); v = NULL; }
 95 |   int height = viInfo.height;
 96 |   int width = viInfo.width;
 97 |   if (viInfo.IsPlanar())
 98 |   {
 99 |     ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
100 |     ywidth = width;
101 |     yheight = height;
102 | 
103 |     debug_padding = debug ? MIN_ALIGNMENT : 0;
104 |     y = (uint8_t*)_aligned_malloc(ypitch * yheight + 2 * debug_padding, MIN_ALIGNMENT);
105 |     if (y == NULL) return false;
106 |     FillMemDebug();
107 | 
108 |     if (!viInfo.IsY()) {
109 |       width >>= viInfo.GetPlaneWidthSubsampling(PLANAR_U);
110 |       height >>= viInfo.GetPlaneHeightSubsampling(PLANAR_U);
111 |       uvpitch = width + ((width % MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width % MIN_ALIGNMENT));
112 |       uvwidth = width;
113 |       uvheight = height;
114 |       u = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT);
115 |       if (u == NULL) return false;
116 |       v = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT);
117 |       if (v == NULL) return false;
118 |     }
119 |     return true;
120 |   }
121 |   else if (viInfo.IsYUY2())
122 |   {
123 |     debug_padding = 0;
124 | 
125 |     if (!packed)
126 |     {
127 |       ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
128 |       ywidth = width;
129 |       yheight = height;
130 |       width >>= 1;
131 |       uvpitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
132 |       uvwidth = width;
133 |       uvheight = height;
134 |       y = (uint8_t*)_aligned_malloc(ypitch*yheight, MIN_ALIGNMENT);
135 |       if (y == NULL) return false;
136 |       u = (uint8_t*)_aligned_malloc(uvpitch*uvheight, MIN_ALIGNMENT);
137 |       if (u == NULL) return false;
138 |       v = (uint8_t*)_aligned_malloc(uvpitch*uvheight, MIN_ALIGNMENT);
139 |       if (v == NULL) return false;
140 |       return true;
141 |     }
142 |     else
143 |     {
144 |       width *= 2;
145 |       ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
146 |       ywidth = width;
147 |       yheight = height;
148 |       y = (uint8_t*)_aligned_malloc(ypitch*yheight, MIN_ALIGNMENT);
149 |       if (y == NULL) return false;
150 |       uvpitch = uvwidth = uvheight = 0;
151 |       u = v = NULL;
152 |       return true;
153 |     }
154 |   }
155 |   return false;
156 | }
157 | 
158 | bool PlanarFrame::allocSpace(int specs[4])
159 | {
160 |   if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; }
161 |   if (u != NULL) { _aligned_free(u); u = NULL; }
162 |   if (v != NULL) { _aligned_free(v); v = NULL; }
163 |   int height = specs[0];
164 |   int width = specs[2];
165 |   ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
166 |   ywidth = width;
167 |   yheight = height;
168 |   height = specs[1];
169 |   width = specs[3];
170 |   uvpitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT));
171 |   uvwidth = width;
172 |   uvheight = height;
173 | 
174 |   const int debugpadding = debug ? MIN_ALIGNMENT : 0;
175 |   y = (uint8_t*)_aligned_malloc(ypitch * yheight + 2 * debugpadding, MIN_ALIGNMENT);
176 |   if (y == NULL) return false;
177 |   FillMemDebug();
178 | 
179 |   if (uvpitch) {
180 |     u = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT);
181 |     if (u == NULL) return false;
182 |     v = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT);
183 |     if (v == NULL) return false;
184 |   }
185 |   return true;
186 | }
187 | 
188 | 
189 | void PlanarFrame::createPlanar(int yheight, int uvheight, int ywidth, int uvwidth)
190 | {
191 |   int specs[4] = { yheight, uvheight, ywidth, uvwidth };
192 |   allocSpace(specs);
193 | }
194 | 
195 | void PlanarFrame::createPlanar(int height, int width, int chroma_format)
196 | {
197 |   int specs[4];
198 |   if (chroma_format <= PLANAR_420) // 420
199 |   {
200 |     specs[0] = height; specs[1] = height >> 1;
201 |     specs[2] = width; specs[3] = width >> 1;
202 |   }
203 |   else if (chroma_format == PLANAR_422) // 422
204 |   {
205 |     specs[0] = height; specs[1] = height;
206 |     specs[2] = width; specs[3] = width >> 1;
207 |   }
208 |   else if (chroma_format == PLANAR_444) // 444
209 |   {
210 |     specs[0] = height; specs[1] = height;
211 |     specs[2] = width; specs[3] = width;
212 |   }
213 |   else if (chroma_format == PLANAR_411) // 411
214 |   {
215 |     specs[0] = height; specs[1] = height;
216 |     specs[2] = width; specs[3] = width >> 2;
217 |   }
218 |   else if (chroma_format == PLANAR_400) // greyscale
219 |   {
220 |     specs[0] = height; specs[1] = 0;
221 |     specs[2] = width; specs[3] = 0;
222 |   }
223 |   allocSpace(specs);
224 | }
225 | 
226 | void PlanarFrame::createFromProfile(VideoInfo &viInfo)
227 | {
228 |   allocSpace(viInfo);
229 | }
230 | 
231 | void PlanarFrame::createFromFrame(PVideoFrame &frame, VideoInfo &viInfo)
232 | {
233 |   allocSpace(viInfo);
234 |   copyInternalFrom(frame, viInfo);
235 | }
236 | 
237 | void PlanarFrame::createFromPlanar(PlanarFrame &frame)
238 | {
239 |   int specs[4] = { frame.yheight, frame.uvheight, frame.ywidth, frame.uvwidth };
240 |   allocSpace(specs);
241 |   copyInternalFrom(frame);
242 | }
243 | 
244 | void PlanarFrame::copyFrom(PVideoFrame &frame, VideoInfo &viInfo)
245 | {
246 |   copyInternalFrom(frame, viInfo);
247 | }
248 | 
249 | void PlanarFrame::copyFrom(PlanarFrame &frame)
250 | {
251 |   copyInternalFrom(frame);
252 | }
253 | 
254 | void PlanarFrame::copyTo(PVideoFrame &frame, VideoInfo &viInfo)
255 | {
256 |   copyInternalTo(frame, viInfo);
257 | }
258 | 
259 | void PlanarFrame::copyTo(PlanarFrame &frame)
260 | {
261 |   copyInternalTo(frame);
262 | }
263 | 
264 | void PlanarFrame::copyPlaneTo(PlanarFrame &frame, int plane)
265 | {
266 |   copyInternalPlaneTo(frame, plane);
267 | }
268 | 
269 | uint8_t* PlanarFrame::GetPtr(int plane)
270 | {
271 |   if (plane == 0) return y;
272 |   if (plane == 1) return u;
273 |   return v;
274 | }
275 | 
276 | int PlanarFrame::NumComponents() {
277 |   if (uvpitch)
278 |     return 3;
279 |   return 1;
280 | }
281 | 
282 | int PlanarFrame::GetWidth(int plane)
283 | {
284 |   if (plane == 0) return ywidth;
285 |   else return uvwidth;
286 | }
287 | 
288 | int PlanarFrame::GetHeight(int plane)
289 | {
290 |   if (plane == 0) return yheight;
291 |   else return uvheight;
292 | }
293 | 
294 | int PlanarFrame::GetPitch(int plane)
295 | {
296 |   if (plane == 0) return ypitch;
297 |   else return uvpitch;
298 | }
299 | 
300 | void PlanarFrame::freePlanar()
301 | {
302 |   if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; }
303 |   if (u != NULL) { _aligned_free(u); u = NULL; }
304 |   if (v != NULL) { _aligned_free(v); v = NULL; }
305 |   ypitch = uvpitch = 0;
306 |   ywidth = uvwidth = 0;
307 |   yheight = uvheight = 0;
308 | }
309 | 
310 | void PlanarFrame::copyInternalFrom(PVideoFrame &frame, VideoInfo &viInfo)
311 | {
312 |   if (y == NULL) return;
313 |   if (viInfo.IsPlanar())
314 |   {
315 |     BitBlt(y, ypitch, frame->GetReadPtr(PLANAR_Y), frame->GetPitch(PLANAR_Y),
316 |       frame->GetRowSize(PLANAR_Y), frame->GetHeight(PLANAR_Y));
317 |     if (u == NULL || v == NULL) return;
318 |     BitBlt(u, uvpitch, frame->GetReadPtr(PLANAR_U), frame->GetPitch(PLANAR_U),
319 |       frame->GetRowSize(PLANAR_U), frame->GetHeight(PLANAR_U));
320 |     BitBlt(v, uvpitch, frame->GetReadPtr(PLANAR_V), frame->GetPitch(PLANAR_V),
321 |       frame->GetRowSize(PLANAR_V), frame->GetHeight(PLANAR_V));
322 |   }
323 |   else if (viInfo.IsYUY2())
324 |   {
325 |     convYUY2to422(frame->GetReadPtr(), y, u, v, frame->GetPitch(), ypitch, uvpitch,
326 |       viInfo.width, viInfo.height);
327 |   }
328 | }
329 | 
330 | void PlanarFrame::copyInternalFrom(PlanarFrame &frame)
331 | {
332 |   if (y == NULL) return;
333 |   BitBlt(y, ypitch, frame.y, frame.ypitch, frame.ywidth, frame.yheight);
334 |   if (u == NULL || v == NULL) return;
335 |   BitBlt(u, uvpitch, frame.u, frame.uvpitch, frame.uvwidth, frame.uvheight);
336 |   BitBlt(v, uvpitch, frame.v, frame.uvpitch, frame.uvwidth, frame.uvheight);
337 | }
338 | 
339 | void PlanarFrame::copyInternalTo(PVideoFrame &frame, VideoInfo &viInfo)
340 | {
341 |   if (y == NULL) return;
342 |   if (viInfo.IsPlanar())
343 |   {
344 |     BitBlt(frame->GetWritePtr(PLANAR_Y), frame->GetPitch(PLANAR_Y), y, ypitch, ywidth, yheight);
345 |     if (u == NULL || v == NULL) return;
346 |     BitBlt(frame->GetWritePtr(PLANAR_U), frame->GetPitch(PLANAR_U), u, uvpitch, uvwidth, uvheight);
347 |     BitBlt(frame->GetWritePtr(PLANAR_V), frame->GetPitch(PLANAR_V), v, uvpitch, uvwidth, uvheight);
348 |   }
349 |   else if (viInfo.IsYUY2())
350 |   {
351 |     conv422toYUY2(y, u, v, frame->GetWritePtr(), ypitch, uvpitch, frame->GetPitch(), ywidth, yheight);
352 |   }
353 | }
354 | 
355 | void PlanarFrame::copyInternalTo(PlanarFrame &frame)
356 | {
357 |   if (y == NULL) return;
358 |   BitBlt(frame.y, frame.ypitch, y, ypitch, ywidth, yheight);
359 |   if (u == NULL || v == NULL) return;
360 |   BitBlt(frame.u, frame.uvpitch, u, uvpitch, uvwidth, uvheight);
361 |   BitBlt(frame.v, frame.uvpitch, v, uvpitch, uvwidth, uvheight);
362 | }
363 | 
364 | void PlanarFrame::copyInternalPlaneTo(PlanarFrame &frame, int plane)
365 | {
366 |   if (plane == 0 && y != NULL)
367 |     BitBlt(frame.y, frame.ypitch, y, ypitch, ywidth, yheight);
368 |   else if (plane == 1 && u != NULL)
369 |     BitBlt(frame.u, frame.uvpitch, u, uvpitch, uvwidth, uvheight);
370 |   else if (plane == 2 && v != NULL)
371 |     BitBlt(frame.v, frame.uvpitch, v, uvpitch, uvwidth, uvheight);
372 | }
373 | 
374 | void PlanarFrame::copyChromaTo(PlanarFrame &dst)
375 | {
376 |   BitBlt(dst.u, dst.uvpitch, u, uvpitch, dst.uvwidth, dst.uvheight);
377 |   BitBlt(dst.v, dst.uvpitch, v, uvpitch, dst.uvwidth, dst.uvheight);
378 | }
379 | 
380 | void PlanarFrame::copyToForBMP(PVideoFrame &dst, VideoInfo &viInfo)
381 | {
382 |   uint8_t *dstp = dst->GetWritePtr(PLANAR_Y);
383 |   if (viInfo.IsPlanar())
384 |   {
385 |     int out_pitch = (ywidth + 3) & -4;
386 |     BitBlt(dstp, out_pitch, y, ypitch, ywidth, yheight);
387 |     BitBlt(dstp + (out_pitch*yheight), out_pitch >> 1, v, uvpitch, uvwidth, uvheight);
388 |     BitBlt(dstp + (out_pitch*yheight) + ((out_pitch >> 1)*uvheight), out_pitch >> 1, u, uvpitch, uvwidth, uvheight);
389 |   }
390 |   else
391 |   {
392 |     int out_pitch = (dst->GetRowSize(PLANAR_Y) + 3) & -4;
393 |     conv422toYUY2(y, u, v, dstp, ypitch, uvpitch, out_pitch, viInfo.width, viInfo.height);
394 |   }
395 | }
396 | 
397 | PlanarFrame& PlanarFrame::operator=(PlanarFrame &ob2)
398 | {
399 |   cpu = ob2.cpu;
400 |   ypitch = ob2.ypitch;
401 |   yheight = ob2.yheight;
402 |   ywidth = ob2.ywidth;
403 |   uvpitch = ob2.uvpitch;
404 |   uvheight = ob2.uvheight;
405 |   uvwidth = ob2.uvwidth;
406 |   this->copyFrom(ob2);
407 |   return *this;
408 | }
409 | 
410 | void PlanarFrame::convYUY2to422(const uint8_t *src, uint8_t *py, uint8_t *pu,
411 |   uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height)
412 | {
413 | #ifdef INTEL_INTRINSICS
414 |   if ((cpu&CPUF_SSE2) && useSIMD)
415 |     convYUY2to422_SSE2(src, py, pu, pv, pitch1, pitch2Y, pitch2UV, width, height);
416 |   else
417 | #endif
418 |   {
419 |     width >>= 1;
420 |     for (int y = 0; y < height; ++y)
421 |     {
422 |       for (int x = 0; x < width; ++x)
423 |       {
424 |         py[x << 1] = src[x << 2];
425 |         pu[x] = src[(x << 2) + 1];
426 |         py[(x << 1) + 1] = src[(x << 2) + 2];
427 |         pv[x] = src[(x << 2) + 3];
428 |       }
429 |       py += pitch2Y;
430 |       pu += pitch2UV;
431 |       pv += pitch2UV;
432 |       src += pitch1;
433 |     }
434 |   }
435 | }
436 | 
437 | 
438 | #ifdef INTEL_INTRINSICS
439 | void PlanarFrame::convYUY2to422_SSE2(const uint8_t *src, uint8_t *py, uint8_t *pu,
440 |   uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height)
441 | {
442 |   width >>= 1; // mov ecx, width
443 |   __m128i Ymask = _mm_set1_epi16(0x00FF);
444 |   for (int y = 0; y < height; y++) {
445 |     for (int x = 0; x < width; x += 4) {
446 |       __m128i fullsrc = _mm_load_si128(reinterpret_cast<const __m128i *>(src + x * 4)); // VYUYVYUYVYUYVYUY
447 |       __m128i yy = _mm_and_si128(fullsrc, Ymask); // 0Y0Y0Y0Y0Y0Y0Y0Y
448 |       __m128i uvuv = _mm_srli_epi16(fullsrc, 8); // 0V0U0V0U0V0U0V0U
449 |       yy = _mm_packus_epi16(yy, yy); // xxxxxxxxYYYYYYYY
450 |       uvuv = _mm_packus_epi16(uvuv, uvuv); // xxxxxxxxVUVUVUVU
451 |       __m128i uu = _mm_and_si128(uvuv, Ymask); // xxxxxxxx0U0U0U0U
452 |       __m128i vv = _mm_srli_epi16(uvuv, 8); // xxxxxxxx0V0V0V0V
453 |       uu = _mm_packus_epi16(uu, uu); // xxxxxxxxxxxxUUUU
454 |       vv = _mm_packus_epi16(vv, vv); // xxxxxxxxxxxxVVVV
455 |       _mm_storel_epi64(reinterpret_cast<__m128i *>(py + x * 2), yy); // store y
456 |       *(uint32_t *)(pu + x) = _mm_cvtsi128_si32(uu); // store u
457 |       *(uint32_t *)(pv + x) = _mm_cvtsi128_si32(vv); // store v
458 |     }
459 |     src += pitch1;
460 |     py += pitch2Y;
461 |     pu += pitch2UV;
462 |     pv += pitch2UV;
463 |   }
464 | }
465 | #endif
466 | 
467 | void PlanarFrame::conv422toYUY2(uint8_t *py, uint8_t *pu, uint8_t *pv,
468 |   uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height)
469 | {
470 | #ifdef INTEL_INTRINSICS
471 |   if ((cpu&CPUF_SSE2) && useSIMD)
472 |     conv422toYUY2_SSE2(py, pu, pv, dst, pitch1Y, pitch1UV, pitch2, width, height);
473 |   else
474 | #endif
475 |   {
476 |     width >>= 1;
477 |     for (int y = 0; y < height; ++y)
478 |     {
479 |       for (int x = 0; x < width; ++x)
480 |       {
481 |         dst[x << 2] = py[x << 1];
482 |         dst[(x << 2) + 1] = pu[x];
483 |         dst[(x << 2) + 2] = py[(x << 1) + 1];
484 |         dst[(x << 2) + 3] = pv[x];
485 |       }
486 |       py += pitch1Y;
487 |       pu += pitch1UV;
488 |       pv += pitch1UV;
489 |       dst += pitch2;
490 |     }
491 |   }
492 | }
493 | 
494 | 
495 | #ifdef INTEL_INTRINSICS
496 | void PlanarFrame::conv422toYUY2_SSE2(uint8_t *py, uint8_t *pu, uint8_t *pv,
497 |   uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height)
498 | {
499 |   width >>= 1; // mov ecx, width
500 |   for (int y = 0; y < height; y++) {
501 |     for (int x = 0; x < width; x += 4) {
502 |       __m128i yy = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(py + x * 2)); // YYYYYYYY
503 |       __m128i uu = _mm_castps_si128(_mm_load_ss(reinterpret_cast<float *>(pu + x))); // 000000000000UUUU
504 |       __m128i vv = _mm_castps_si128(_mm_load_ss(reinterpret_cast<float *>(pv + x))); // 000000000000VVVV
505 |       __m128i uvuv = _mm_unpacklo_epi8(uu, vv); // 00000000VUVUVUVU
506 |       __m128i yuyv = _mm_unpacklo_epi8(yy,uvuv); // VYUYVYUYVYUYVYUY
507 |       _mm_store_si128(reinterpret_cast<__m128i *>(dst + x * 4), yuyv);
508 |     }
509 |     dst += pitch2;
510 |     py += pitch1Y;
511 |     pu += pitch1UV;
512 |     pv += pitch1UV;
513 |   }
514 | }
515 | #endif
516 | 
517 | // Avisynth v2.5.  Copyright 2002 Ben Rudiak-Gould et al.
518 | // http://www.avisynth.org
519 | 
520 | // This program is free software; you can redistribute it and/or modify
521 | // it under the terms of the GNU General Public License as published by
522 | // the Free Software Foundation; either version 2 of the License, or
523 | // (at your option) any later version.
524 | //
525 | // This program is distributed in the hope that it will be useful,
526 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
527 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
528 | // GNU General Public License for more details.
529 | //
530 | // You should have received a copy of the GNU General Public License
531 | // along with this program; if not, write to the Free Software
532 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
533 | // http://www.gnu.org/copyleft/gpl.html .
534 | //
535 | // Linking Avisynth statically or dynamically with other modules is making a
536 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
537 | // General Public License cover the whole combination.
538 | //
539 | // As a special exception, the copyright holders of Avisynth give you
540 | // permission to link Avisynth with independent modules that communicate with
541 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
542 | // terms of these independent modules, and to copy and distribute the
543 | // resulting combined work under terms of your choice, provided that
544 | // every copy of the combined work is accompanied by a complete copy of
545 | // the source code of Avisynth (the version of Avisynth used to produce the
546 | // combined work), being distributed under the terms of the GNU General
547 | // Public License plus this exception.  An independent module is a module
548 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
549 | // import and export plugins, or graphical user interfaces.
550 | 
551 | // from Avisynth 2.55 source...
552 | // copied so we don't need an
553 | // IScriptEnvironment pointer 
554 | // to call it
555 | 
556 | #include "avisynth.h"
557 | #include <memory.h>
558 | 
559 | void PlanarFrame::BitBlt(uint8_t* dstp, int dst_pitch, const uint8_t* srcp,
560 |   int src_pitch, int row_size, int height)
561 | {
562 |   if (!height || !row_size) return;
563 |   if (height == 1 || (dst_pitch == src_pitch && src_pitch == row_size))
564 |     memcpy(dstp, srcp, src_pitch * height);
565 |   else
566 |   {
567 |     for (int y = height; y > 0; --y)
568 |     {
569 |       memcpy(dstp, srcp, row_size);
570 |       dstp += dst_pitch;
571 |       srcp += src_pitch;
572 |     }
573 |   }
574 | }
575 | 
576 | int PlanarFrame::CheckMemory() 
577 | {
578 |   if (!debug) return 0;
579 |   if (!y) return 0;
580 |     // check buffer overrun
581 |   uint32_t* pInt = (uint32_t*)(y - MIN_ALIGNMENT);
582 |   for (int i = 0; i < MIN_ALIGNMENT / sizeof(uint32_t); i++)
583 |     if (pInt[i] != 0xDEADBEEF) 
584 |       return 1;
585 |   return 0;
586 | }
587 | 
588 | 


--------------------------------------------------------------------------------
/TComb/PlanarFrame.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | **   My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar
 3 | **   to packed conversions, and always gives 16 bit alignment for all
 4 | **   planes.  Supports YV12/YUY2 frames from avisynth, can do any planar format
 5 | **   internally.
 6 | **
 7 | **   Copyright (C) 2005-2006 Kevin Stone
 8 | **
 9 | **   This program is free software; you can redistribute it and/or modify
10 | **   it under the terms of the GNU General Public License as published by
11 | **   the Free Software Foundation; either version 2 of the License, or
12 | **   (at your option) any later version.
13 | **
14 | **   This program is distributed in the hope that it will be useful,
15 | **   but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | **   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | **   GNU General Public License for more details.
18 | **
19 | **   You should have received a copy of the GNU General Public License
20 | **   along with this program; if not, write to the Free Software
21 | **   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22 | */
23 | 
24 | #ifndef __PlanarFrame_H__
25 | #define __PlanarFrame_H__
26 | 
27 | #include <stdlib.h>
28 | #include "avisynth.h"
29 | 
30 | #define MIN_ALIGNMENT 64
31 | 
32 | #define PLANAR_420 1
33 | #define PLANAR_422 2
34 | #define PLANAR_444 3
35 | #define PLANAR_411 4
36 | #define PLANAR_400 5
37 | 
38 | class PlanarFrame
39 | {
40 | private:
41 |   int cpu;
42 |   bool useSIMD, packed;
43 |   int ypitch, uvpitch;
44 |   int ywidth, uvwidth;
45 |   int yheight, uvheight;
46 |   bool debug = false;
47 |   int debug_padding = 0;
48 |   uint8_t *y, *u, *v;
49 |   bool allocSpace(VideoInfo &viInfo);
50 |   bool allocSpace(int specs[4]);
51 |   void copyInternalFrom(PVideoFrame &frame, VideoInfo &viInfo);
52 |   void copyInternalFrom(PlanarFrame &frame);
53 |   void copyInternalTo(PVideoFrame &frame, VideoInfo &viInfo);
54 |   void copyInternalTo(PlanarFrame &frame);
55 |   void copyInternalPlaneTo(PlanarFrame &frame, int plane);
56 |   void convYUY2to422(const uint8_t *src, uint8_t *py, uint8_t *pu,
57 |     uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height);
58 |   void conv422toYUY2(uint8_t *py, uint8_t *pu, uint8_t *pv,
59 |     uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height);
60 | #ifdef INTEL_INTRINSICS
61 |   void convYUY2to422_SSE2(const uint8_t* src, uint8_t* py, uint8_t* pu,
62 |     uint8_t* pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height);
63 |   void conv422toYUY2_SSE2(uint8_t *py, uint8_t *pu, uint8_t *pv,
64 |     uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height);
65 | #endif
66 | 
67 | public:
68 |   PlanarFrame(int cpuInfo);
69 |   PlanarFrame(VideoInfo &viInfo, int cpuInfo);
70 |   PlanarFrame(VideoInfo &viInfo, bool _packed, int cpuInfo);
71 |   ~PlanarFrame();
72 |   void createPlanar(int yheight, int uvheight, int ywidth, int uvwidth);
73 |   void createPlanar(int height, int width, int chroma_format);
74 |   void createFromProfile(VideoInfo &viInfo);
75 |   void createFromFrame(PVideoFrame &frame, VideoInfo &viInfo);
76 |   void createFromPlanar(PlanarFrame &frame);
77 |   void copyFrom(PVideoFrame &frame, VideoInfo &viInfo);
78 |   void copyTo(PVideoFrame &frame, VideoInfo &viInfo);
79 |   void copyFrom(PlanarFrame &frame);
80 |   void copyTo(PlanarFrame &frame);
81 |   void copyChromaTo(PlanarFrame &dst);
82 |   void copyToForBMP(PVideoFrame &dst, VideoInfo &viInfo);
83 |   void copyPlaneTo(PlanarFrame &dst, int plane);
84 |   void freePlanar();
85 |   uint8_t* GetPtr(int plane = 0);
86 |   int NumComponents();
87 |   int GetWidth(int plane = 0);
88 |   int GetHeight(int plane = 0);
89 |   int GetPitch(int plane = 0);
90 |   void BitBlt(uint8_t* dstp, int dst_pitch, const uint8_t* srcp,
91 |     int src_pitch, int row_size, int height);
92 |   int CheckMemory();
93 |   void FillMemDebug();
94 |   PlanarFrame& operator=(PlanarFrame &ob2);
95 | };
96 | 
97 | #endif


--------------------------------------------------------------------------------
/TComb/TComb.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | **                    TComb v2.x for Avisynth 2.6 and Avisynth+
  3 | **
  4 | **   TComb is a temporal comb filter (it reduces cross-luminance (rainbowing)
  5 | **   and cross-chrominance (dot crawl) artifacts in static areas of the picture).
  6 | **   It will ONLY work with NTSC material, and WILL NOT work with telecined material
  7 | **   where the rainbowing/dotcrawl was introduced prior to the telecine process!
  8 | **   It must be used before ivtc or deinterlace.
  9 | **
 10 | **   Copyright (C) 2021 Ferenc Pintér
 11 | **
 12 | **   Copyright (C) 2015 Shane Panke
 13 | **
 14 | **   Copyright (C) 2005-2006 Kevin Stone
 15 | **
 16 | **   This program is free software; you can redistribute it and/or modify
 17 | **   it under the terms of the GNU General Public License as published by
 18 | **   the Free Software Foundation; either version 2 of the License, or
 19 | **   (at your option) any later version.
 20 | **
 21 | **   This program is distributed in the hope that it will be useful,
 22 | **   but WITHOUT ANY WARRANTY; without even the implied warranty of
 23 | **   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 24 | **   GNU General Public License for more details.
 25 | **
 26 | **   You should have received a copy of the GNU General Public License
 27 | **   along with this program; if not, write to the Free Software
 28 | **   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 29 | */
 30 | 
 31 | #if defined(_WIN32) && !defined(INTEL_INTRINSICS)
 32 | #error Forgot to set INTEL_INTRINSICS? Comment out this line if not
 33 | #endif
 34 | 
 35 | #include "avisynth.h"
 36 | #include "common.h"
 37 | #include <stdint.h>
 38 | #include <stdio.h>
 39 | #include "PlanarFrame.h"
 40 | 
 41 | // version appears in .rc as well
 42 | #define VERSION "v2.3"
 43 | 
 44 | //#define OLD_ASM
 45 | 
 46 | #define min3(a,b,c) std::min(std::min(a,b),c)
 47 | #define max3(a,b,c) std::max(std::max(a,b),c)
 48 | #define min4(a,b,c,d) std::min(std::min(a,b),std::min(c,d))
 49 | #define max4(a,b,c,d) std::max(std::max(a,b),std::max(c,d))
 50 | 
 51 | class TCombFrame
 52 | {
 53 | public:
 54 |   int fnum;
 55 |   bool sc;
 56 |   bool isValid[11];
 57 |   PlanarFrame* orig, * msk1, * msk2;
 58 |   PlanarFrame** b, * avg, * omsk;
 59 |   TCombFrame();
 60 |   TCombFrame(VideoInfo& vi, int cpuFlags);
 61 |   ~TCombFrame();
 62 |   void setFNum(int i);
 63 | };
 64 | 
 65 | class TCombCache
 66 | {
 67 | public:
 68 |   TCombFrame** frames;
 69 |   int start_pos, size;
 70 |   TCombCache();
 71 |   TCombCache(int _size, VideoInfo& vi, int cpuFlags);
 72 |   ~TCombCache();
 73 |   void resetCacheStart(int first, int last);
 74 |   int getCachePos(int n);
 75 | };
 76 | 
 77 | class TComb : public GenericVideoFilter
 78 | {
 79 | public:
 80 |   PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
 81 |   TComb(PClip _child, int _mode, int _fthreshL, int _fthreshC, int _othreshL,
 82 |     int othreshC, bool _map, double _scthresh, bool _debug, int _opt, IScriptEnvironment* env);
 83 |   ~TComb();
 84 | private:
 85 |   bool map, debug;
 86 |   int fthreshL, fthreshC;
 87 |   int othreshL, othreshC;
 88 |   int mode, opt;
 89 |   unsigned long diffmaxsc;
 90 |   double scthresh;
 91 |   PlanarFrame* dstPF, * tmpPF;
 92 |   PlanarFrame* minPF, * maxPF;
 93 |   PlanarFrame* padPF;
 94 |   TCombCache* tdc;
 95 |   char buf[256];
 96 |   int mapn(int n);
 97 |   void getAverages(int lc, IScriptEnvironment* env);
 98 |   void buildOscillationMasks(int lc, IScriptEnvironment* env);
 99 |   void getFinalMasks(int lc, IScriptEnvironment* env);
100 |   void insertFrame(PVideoFrame& src, int pos, int fnum, int lc, IScriptEnvironment* env);
101 |   void buildDiffMask(TCombFrame* tf1, TCombFrame* tf2, int lc, IScriptEnvironment* env);
102 |   void buildDiffMasks(int lc, IScriptEnvironment* env);
103 |   void absDiff(PlanarFrame* src1, PlanarFrame* src2, PlanarFrame* dst,
104 |     int lc, IScriptEnvironment* env);
105 |   void absDiffAndMinMask(PlanarFrame* src1, PlanarFrame* src2, PlanarFrame* dst,
106 |     int lc, IScriptEnvironment* env);
107 |   void VerticalBlur3(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env);
108 |   void HorizontalBlur3(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env);
109 |   void getStartStop(int lc, int& start, int& stop);
110 |   void buildFinalFrame(PlanarFrame* p2, PlanarFrame* p1, PlanarFrame* src,
111 |     PlanarFrame* n1, PlanarFrame* n2, PlanarFrame* m1, PlanarFrame* m2, PlanarFrame* m3,
112 |     PlanarFrame* dst, int lc, IScriptEnvironment* env);
113 |   void copyPad(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env);
114 |   void MinMax(PlanarFrame* src, PlanarFrame* dmin, PlanarFrame* dmax, int lc,
115 |     IScriptEnvironment* env);
116 |   void HorizontalBlur6(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env);
117 |   void absDiffAndMinMaskThresh(PlanarFrame* src1, PlanarFrame* src2, PlanarFrame* dst,
118 |     int lc, IScriptEnvironment* env);
119 |   void buildFinalMask(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* m1,
120 |     PlanarFrame* dst, int lc, IScriptEnvironment* env);
121 |   void calcAverages(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* dst, int lc, IScriptEnvironment* env);
122 |   void checkOscillation5(PlanarFrame* p2, PlanarFrame* p1, PlanarFrame* s1,
123 |     PlanarFrame* n1, PlanarFrame* n2, PlanarFrame* dst, int lc, IScriptEnvironment* env);
124 |   void checkAvgOscCorrelation(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* s3,
125 |     PlanarFrame* s4, PlanarFrame* dst, int lc, IScriptEnvironment* env);
126 |   void or3Masks(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* s3,
127 |     PlanarFrame* dst, int lc, IScriptEnvironment* env);
128 |   void orAndMasks(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* dst, int lc, IScriptEnvironment* env);
129 |   void andMasks(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* dst, int lc, IScriptEnvironment* env);
130 |   bool checkSceneChange(PlanarFrame* s1, PlanarFrame* s2, int n, IScriptEnvironment* env);
131 |   void andNeighborsInPlace(PlanarFrame* src, int lc, IScriptEnvironment* env);
132 | };
133 | 
134 | void checkSceneChangePlanar_1_SSE2_simd(const uint8_t* prvp, const uint8_t* srcp,
135 |   int height, int width, int prv_pitch, int src_pitch, uint64_t& diffp);
136 | 
137 | template<typename pixel_t>
138 | void checkSceneChangePlanar_1_c(const pixel_t* prvp, const pixel_t* srcp,
139 |   int height, int width, int prv_pitch, int src_pitch, uint64_t& diffp);
140 | 
141 | void andMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height);
142 | void andMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height);
143 | 
144 | void orAndMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height);
145 | void orAndMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height);
146 | 
147 | void or3Masks_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * s3p, uint8_t * dstp, int stride, int width, int height);
148 | void or3Masks_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, uint8_t* dstp, int stride, int width, int height);
149 | 
150 | void calcAverages_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, uint8_t * dstp, int stride, int width, int height);
151 | void calcAverages_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height);
152 | 
153 | void MinMax_SSE2_simd(const uint8_t * srcp, uint8_t * dstpMin, uint8_t * dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh);
154 | void MinMax_c(const uint8_t* srcp, uint8_t* dstpMin, uint8_t* dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh);
155 | 
156 | void absDiff_SSE2_simd(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height);
157 | void absDiff_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height);
158 | 
159 | void buildFinalMask_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * m1p, uint8_t * dstp, int stride, int width, int height, int thresh);
160 | void buildFinalMask_c(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * m1p, uint8_t * dstp, int stride, int width, int height, int thresh);
161 | 
162 | void checkOscillation5_SSE2_simd(const uint8_t * p2p, const uint8_t * p1p, const uint8_t * s1p, const uint8_t * n1p, const uint8_t * n2p, uint8_t * dstp, int stride, int width, int height, int thresh);
163 | void checkOscillation5_c(const uint8_t * p2p, const uint8_t * p1p, const uint8_t * s1p, const uint8_t * n1p, const uint8_t * n2p, uint8_t * dstp, int stride, int width, int height, int thresh);
164 | 
165 | void absDiffAndMinMaskThresh_SSE2_simd(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height, int thresh);
166 | void absDiffAndMinMaskThresh_c(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height, int thresh);
167 | 
168 | void absDiffAndMinMask_SSE2_simd(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height);
169 | void absDiffAndMinMask_c(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height);
170 | 
171 | void checkAvgOscCorrelation_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * s3p, const uint8_t * s4p, uint8_t * dstp, int stride, int width, int height, int thresh);
172 | void checkAvgOscCorrelation_c(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * s3p, const uint8_t * s4p, uint8_t * dstp, int stride, int width, int height, int thresh);
173 | 
174 | void VerticalBlur3_SSE2_simd(const uint8_t * srcp, uint8_t * dstp, int stride, int width, int height);
175 | void VerticalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height);
176 | 
177 | void HorizontalBlur3_SSE2_simd(const uint8_t * srcp, uint8_t * dstp, int stride, int width, int height);
178 | void HorizontalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height);
179 | 
180 | void HorizontalBlur6_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height);
181 | void HorizontalBlur6_c(const uint8_t * srcp, uint8_t * dstp, int stride, int width, int height);
182 | 
183 | void andNeighborsInPlace_SSE2_simd(uint8_t * srcp, int stride, int width, int height);
184 | // no distinct C here
185 | 
186 | 


--------------------------------------------------------------------------------
/TComb/TComb.rc:
--------------------------------------------------------------------------------
 1 | // Microsoft Visual C++ generated resource script.
 2 | //
 3 | #include "resource.h"
 4 | 
 5 | #define APSTUDIO_READONLY_SYMBOLS
 6 | /////////////////////////////////////////////////////////////////////////////
 7 | //
 8 | // Generated from the TEXTINCLUDE 2 resource.
 9 | //
10 | #include "winres.h"
11 | 
12 | /////////////////////////////////////////////////////////////////////////////
13 | #undef APSTUDIO_READONLY_SYMBOLS
14 | 
15 | /////////////////////////////////////////////////////////////////////////////
16 | // English (United States) resources
17 | 
18 | #if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
19 | LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
20 | #pragma code_page(1252)
21 | 
22 | #ifdef APSTUDIO_INVOKED
23 | /////////////////////////////////////////////////////////////////////////////
24 | //
25 | // TEXTINCLUDE
26 | //
27 | 
28 | 1 TEXTINCLUDE 
29 | BEGIN
30 |     "resource.h\0"
31 | END
32 | 
33 | 2 TEXTINCLUDE 
34 | BEGIN
35 |     "#include ""winres.h""\r\n"
36 |     "\0"
37 | END
38 | 
39 | 3 TEXTINCLUDE 
40 | BEGIN
41 |     "\r\n"
42 |     "\0"
43 | END
44 | 
45 | #endif    // APSTUDIO_INVOKED
46 | 
47 | 
48 | /////////////////////////////////////////////////////////////////////////////
49 | //
50 | // Version
51 | //
52 | 
53 | VS_VERSION_INFO VERSIONINFO
54 |  FILEVERSION 2,3,0,0
55 |  PRODUCTVERSION 2,3,0,0
56 |  FILEFLAGSMASK 0x17L
57 | #ifdef _DEBUG
58 |  FILEFLAGS 0x1L
59 | #else
60 |  FILEFLAGS 0x0L
61 | #endif
62 |  FILEOS 0x4L
63 |  FILETYPE 0x2L
64 |  FILESUBTYPE 0x0L
65 | BEGIN
66 |     BLOCK "StringFileInfo"
67 |     BEGIN
68 |         BLOCK "040904b0"
69 |         BEGIN
70 |             VALUE "FileDescription", "TComb for Avisynth 2.6 and Avisynth+"
71 |             VALUE "FileVersion", "2.3.0.0"
72 |             VALUE "LegalCopyright", "Copyright (C) 2005-2006 Kevin Stone 2015- et al."
73 |             VALUE "OriginalFilename", "TComb.dll"
74 |             VALUE "ProductVersion", "2.3.0.0"
75 |         END
76 |     END
77 |     BLOCK "VarFileInfo"
78 |     BEGIN
79 |         VALUE "Translation", 0x409, 1200
80 |     END
81 | END
82 | 
83 | #endif    // English (United States) resources
84 | /////////////////////////////////////////////////////////////////////////////
85 | 
86 | 
87 | 
88 | #ifndef APSTUDIO_INVOKED
89 | /////////////////////////////////////////////////////////////////////////////
90 | //
91 | // Generated from the TEXTINCLUDE 3 resource.
92 | //
93 | 
94 | 
95 | /////////////////////////////////////////////////////////////////////////////
96 | #endif    // not APSTUDIO_INVOKED
97 | 
98 | 


--------------------------------------------------------------------------------
/TComb/TComb.sln:
--------------------------------------------------------------------------------
 1 | Microsoft Visual Studio Solution File, Format Version 12.00
 2 | # Visual Studio 2013
 3 | VisualStudioVersion = 12.0.30501.0
 4 | MinimumVisualStudioVersion = 10.0.40219.1
 5 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TComb", "TComb.vcxproj", "{B4188B7A-C76E-4E35-946F-3477273D0A44}"
 6 | EndProject
 7 | Global
 8 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 9 | 		Debug|Win32 = Debug|Win32
10 | 		Debug|x64 = Debug|x64
11 | 		Release|Win32 = Release|Win32
12 | 		Release|x64 = Release|x64
13 | 	EndGlobalSection
14 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
15 | 		{B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|Win32.ActiveCfg = Debug|Win32
16 | 		{B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|Win32.Build.0 = Debug|Win32
17 | 		{B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|x64.ActiveCfg = Debug|x64
18 | 		{B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|x64.Build.0 = Debug|x64
19 | 		{B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|Win32.ActiveCfg = Release|Win32
20 | 		{B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|Win32.Build.0 = Release|Win32
21 | 		{B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|x64.ActiveCfg = Release|x64
22 | 		{B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|x64.Build.0 = Release|x64
23 | 	EndGlobalSection
24 | 	GlobalSection(SolutionProperties) = preSolution
25 | 		HideSolutionNode = FALSE
26 | 	EndGlobalSection
27 | EndGlobal
28 | 


--------------------------------------------------------------------------------
/TComb/TComb.vcproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="Windows-1252"?>
  2 | <VisualStudioProject
  3 | 	ProjectType="Visual C++"
  4 | 	Version="7.10"
  5 | 	Name="TComb"
  6 | 	ProjectGUID="{B4188B7A-C76E-4E35-946F-3477273D0A44}"
  7 | 	Keyword="Win32Proj">
  8 | 	<Platforms>
  9 | 		<Platform
 10 | 			Name="Win32"/>
 11 | 	</Platforms>
 12 | 	<Configurations>
 13 | 		<Configuration
 14 | 			Name="Debug|Win32"
 15 | 			OutputDirectory="Debug"
 16 | 			IntermediateDirectory="Debug"
 17 | 			ConfigurationType="2"
 18 | 			CharacterSet="2">
 19 | 			<Tool
 20 | 				Name="VCCLCompilerTool"
 21 | 				Optimization="0"
 22 | 				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS"
 23 | 				MinimalRebuild="TRUE"
 24 | 				BasicRuntimeChecks="3"
 25 | 				RuntimeLibrary="1"
 26 | 				BufferSecurityCheck="TRUE"
 27 | 				EnableFunctionLevelLinking="TRUE"
 28 | 				UsePrecompiledHeader="0"
 29 | 				WarningLevel="3"
 30 | 				Detect64BitPortabilityProblems="TRUE"
 31 | 				DebugInformationFormat="4"/>
 32 | 			<Tool
 33 | 				Name="VCCustomBuildTool"/>
 34 | 			<Tool
 35 | 				Name="VCLinkerTool"
 36 | 				OutputFile="$(OutDir)/TComb.dll"
 37 | 				LinkIncremental="2"
 38 | 				GenerateDebugInformation="TRUE"
 39 | 				ProgramDatabaseFile="$(OutDir)/TComb.pdb"
 40 | 				SubSystem="2"
 41 | 				OptimizeReferences="1"
 42 | 				EnableCOMDATFolding="1"
 43 | 				ImportLibrary="$(OutDir)/TComb.lib"
 44 | 				TargetMachine="1"/>
 45 | 			<Tool
 46 | 				Name="VCMIDLTool"/>
 47 | 			<Tool
 48 | 				Name="VCPostBuildEventTool"/>
 49 | 			<Tool
 50 | 				Name="VCPreBuildEventTool"/>
 51 | 			<Tool
 52 | 				Name="VCPreLinkEventTool"/>
 53 | 			<Tool
 54 | 				Name="VCResourceCompilerTool"/>
 55 | 			<Tool
 56 | 				Name="VCWebServiceProxyGeneratorTool"/>
 57 | 			<Tool
 58 | 				Name="VCXMLDataGeneratorTool"/>
 59 | 			<Tool
 60 | 				Name="VCWebDeploymentTool"/>
 61 | 			<Tool
 62 | 				Name="VCManagedWrapperGeneratorTool"/>
 63 | 			<Tool
 64 | 				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
 65 | 		</Configuration>
 66 | 		<Configuration
 67 | 			Name="Release|Win32"
 68 | 			OutputDirectory="Release"
 69 | 			IntermediateDirectory="Release"
 70 | 			ConfigurationType="2"
 71 | 			CharacterSet="2"
 72 | 			WholeProgramOptimization="FALSE">
 73 | 			<Tool
 74 | 				Name="VCCLCompilerTool"
 75 | 				Optimization="3"
 76 | 				GlobalOptimizations="TRUE"
 77 | 				InlineFunctionExpansion="2"
 78 | 				EnableIntrinsicFunctions="TRUE"
 79 | 				ImproveFloatingPointConsistency="FALSE"
 80 | 				FavorSizeOrSpeed="1"
 81 | 				OmitFramePointers="TRUE"
 82 | 				EnableFiberSafeOptimizations="FALSE"
 83 | 				OptimizeForProcessor="3"
 84 | 				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS"
 85 | 				RuntimeLibrary="0"
 86 | 				BufferSecurityCheck="FALSE"
 87 | 				EnableFunctionLevelLinking="TRUE"
 88 | 				UsePrecompiledHeader="0"
 89 | 				WarningLevel="3"
 90 | 				Detect64BitPortabilityProblems="TRUE"
 91 | 				DebugInformationFormat="3"/>
 92 | 			<Tool
 93 | 				Name="VCCustomBuildTool"/>
 94 | 			<Tool
 95 | 				Name="VCLinkerTool"
 96 | 				OutputFile="$(OutDir)/TComb.dll"
 97 | 				LinkIncremental="1"
 98 | 				GenerateDebugInformation="TRUE"
 99 | 				SubSystem="2"
100 | 				OptimizeReferences="2"
101 | 				EnableCOMDATFolding="2"
102 | 				ImportLibrary="$(OutDir)/TComb.lib"
103 | 				TargetMachine="1"/>
104 | 			<Tool
105 | 				Name="VCMIDLTool"/>
106 | 			<Tool
107 | 				Name="VCPostBuildEventTool"/>
108 | 			<Tool
109 | 				Name="VCPreBuildEventTool"/>
110 | 			<Tool
111 | 				Name="VCPreLinkEventTool"/>
112 | 			<Tool
113 | 				Name="VCResourceCompilerTool"/>
114 | 			<Tool
115 | 				Name="VCWebServiceProxyGeneratorTool"/>
116 | 			<Tool
117 | 				Name="VCXMLDataGeneratorTool"/>
118 | 			<Tool
119 | 				Name="VCWebDeploymentTool"/>
120 | 			<Tool
121 | 				Name="VCManagedWrapperGeneratorTool"/>
122 | 			<Tool
123 | 				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
124 | 		</Configuration>
125 | 	</Configurations>
126 | 	<References>
127 | 	</References>
128 | 	<Files>
129 | 		<Filter
130 | 			Name="Source Files"
131 | 			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
132 | 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
133 | 			<File
134 | 				RelativePath=".\memcpy_amd.cpp">
135 | 			</File>
136 | 			<File
137 | 				RelativePath=".\PlanarFrame.cpp">
138 | 			</File>
139 | 			<File
140 | 				RelativePath=".\TComb.cpp">
141 | 			</File>
142 | 		</Filter>
143 | 		<Filter
144 | 			Name="Header Files"
145 | 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
146 | 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
147 | 			<File
148 | 				RelativePath=".\avisynth.h">
149 | 			</File>
150 | 			<File
151 | 				RelativePath=".\internal.h">
152 | 			</File>
153 | 			<File
154 | 				RelativePath=".\memcpy_amd.h">
155 | 			</File>
156 | 			<File
157 | 				RelativePath=".\PlanarFrame.h">
158 | 			</File>
159 | 			<File
160 | 				RelativePath=".\resource.h">
161 | 			</File>
162 | 			<File
163 | 				RelativePath=".\TComb.h">
164 | 			</File>
165 | 		</Filter>
166 | 		<Filter
167 | 			Name="Resource Files"
168 | 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
169 | 			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
170 | 			<File
171 | 				RelativePath=".\TComb.rc">
172 | 			</File>
173 | 		</Filter>
174 | 	</Files>
175 | 	<Globals>
176 | 	</Globals>
177 | </VisualStudioProject>
178 | 


--------------------------------------------------------------------------------
/TComb/TComb.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <ProjectGuid>{B4188B7A-C76E-4E35-946F-3477273D0A44}</ProjectGuid>
 23 |     <Keyword>Win32Proj</Keyword>
 24 |     <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
 25 |   </PropertyGroup>
 26 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 27 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 28 |     <ConfigurationType>DynamicLibrary</ConfigurationType>
 29 |     <PlatformToolset>v142</PlatformToolset>
 30 |     <CharacterSet>MultiByte</CharacterSet>
 31 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 32 |   </PropertyGroup>
 33 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 34 |     <ConfigurationType>DynamicLibrary</ConfigurationType>
 35 |     <PlatformToolset>v142</PlatformToolset>
 36 |     <CharacterSet>MultiByte</CharacterSet>
 37 |   </PropertyGroup>
 38 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 39 |     <ConfigurationType>DynamicLibrary</ConfigurationType>
 40 |     <PlatformToolset>v142</PlatformToolset>
 41 |     <CharacterSet>MultiByte</CharacterSet>
 42 |   </PropertyGroup>
 43 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 44 |     <ConfigurationType>DynamicLibrary</ConfigurationType>
 45 |     <PlatformToolset>v142</PlatformToolset>
 46 |     <CharacterSet>MultiByte</CharacterSet>
 47 |   </PropertyGroup>
 48 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 49 |   <ImportGroup Label="ExtensionSettings">
 50 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\masm.props" />
 51 |   </ImportGroup>
 52 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
 53 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 54 |   </ImportGroup>
 55 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 56 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 57 |   </ImportGroup>
 58 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
 59 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 60 |   </ImportGroup>
 61 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 62 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 63 |   </ImportGroup>
 64 |   <PropertyGroup Label="UserMacros" />
 65 |   <PropertyGroup>
 66 |     <_ProjectFileVersion>12.0.30501.0</_ProjectFileVersion>
 67 |   </PropertyGroup>
 68 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 69 |     <OutDir>Debug\</OutDir>
 70 |     <IntDir>Debug\</IntDir>
 71 |     <LinkIncremental>true</LinkIncremental>
 72 |   </PropertyGroup>
 73 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 74 |     <LinkIncremental>true</LinkIncremental>
 75 |   </PropertyGroup>
 76 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 77 |     <OutDir>Release\</OutDir>
 78 |     <IntDir>Release\</IntDir>
 79 |     <LinkIncremental>false</LinkIncremental>
 80 |   </PropertyGroup>
 81 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 82 |     <LinkIncremental>false</LinkIncremental>
 83 |   </PropertyGroup>
 84 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 85 |     <ClCompile>
 86 |       <Optimization>Disabled</Optimization>
 87 |       <PreprocessorDefinitions>INTEL_INTRINSICS;WIN32;_DEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 88 |       <MinimalRebuild>true</MinimalRebuild>
 89 |       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
 90 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
 91 |       <BufferSecurityCheck>true</BufferSecurityCheck>
 92 |       <FunctionLevelLinking>true</FunctionLevelLinking>
 93 |       <PrecompiledHeader />
 94 |       <WarningLevel>Level3</WarningLevel>
 95 |       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
 96 |       <AssemblerOutput>NoListing</AssemblerOutput>
 97 |     </ClCompile>
 98 |     <Link>
 99 |       <OutputFile>$(OutDir)TComb.dll</OutputFile>
100 |       <GenerateDebugInformation>true</GenerateDebugInformation>
101 |       <ProgramDatabaseFile>$(OutDir)TComb.pdb</ProgramDatabaseFile>
102 |       <SubSystem>Windows</SubSystem>
103 |       <OptimizeReferences>false</OptimizeReferences>
104 |       <EnableCOMDATFolding>false</EnableCOMDATFolding>
105 |       <ImportLibrary>$(OutDir)TComb.lib</ImportLibrary>
106 |       <TargetMachine>MachineX86</TargetMachine>
107 |       <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
108 |     </Link>
109 |   </ItemDefinitionGroup>
110 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
111 |     <ClCompile>
112 |       <Optimization>Disabled</Optimization>
113 |       <PreprocessorDefinitions>INTEL_INTRINSICS;WIN32;_DEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
114 |       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
115 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
116 |       <BufferSecurityCheck>true</BufferSecurityCheck>
117 |       <FunctionLevelLinking>true</FunctionLevelLinking>
118 |       <PrecompiledHeader>
119 |       </PrecompiledHeader>
120 |       <WarningLevel>Level3</WarningLevel>
121 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
122 |     </ClCompile>
123 |     <Link>
124 |       <OutputFile>$(OutDir)TComb.dll</OutputFile>
125 |       <GenerateDebugInformation>true</GenerateDebugInformation>
126 |       <ProgramDatabaseFile>$(OutDir)TComb.pdb</ProgramDatabaseFile>
127 |       <SubSystem>Windows</SubSystem>
128 |       <OptimizeReferences>false</OptimizeReferences>
129 |       <EnableCOMDATFolding>false</EnableCOMDATFolding>
130 |       <ImportLibrary>$(OutDir)TComb.lib</ImportLibrary>
131 |     </Link>
132 |   </ItemDefinitionGroup>
133 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
134 |     <ClCompile>
135 |       <Optimization>Full</Optimization>
136 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
137 |       <IntrinsicFunctions>true</IntrinsicFunctions>
138 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
139 |       <OmitFramePointers>true</OmitFramePointers>
140 |       <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
141 |       <PreprocessorDefinitions>INTEL_INTRINSICS;WIN32;NDEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
142 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
143 |       <BufferSecurityCheck>false</BufferSecurityCheck>
144 |       <FunctionLevelLinking>true</FunctionLevelLinking>
145 |       <PrecompiledHeader />
146 |       <WarningLevel>Level3</WarningLevel>
147 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
148 |       <MultiProcessorCompilation>true</MultiProcessorCompilation>
149 |       <AssemblerOutput>NoListing</AssemblerOutput>
150 |     </ClCompile>
151 |     <Link>
152 |       <OutputFile>$(OutDir)TComb.dll</OutputFile>
153 |       <GenerateDebugInformation>false</GenerateDebugInformation>
154 |       <SubSystem>Windows</SubSystem>
155 |       <OptimizeReferences>true</OptimizeReferences>
156 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
157 |       <ImportLibrary>$(OutDir)TComb.lib</ImportLibrary>
158 |       <TargetMachine>MachineX86</TargetMachine>
159 |       <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
160 |       <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
161 |     </Link>
162 |   </ItemDefinitionGroup>
163 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
164 |     <ClCompile>
165 |       <Optimization>MaxSpeed</Optimization>
166 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
167 |       <IntrinsicFunctions>true</IntrinsicFunctions>
168 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
169 |       <OmitFramePointers>true</OmitFramePointers>
170 |       <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
171 |       <PreprocessorDefinitions>INTEL_INTRINSICS;WIN32;NDEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
172 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
173 |       <BufferSecurityCheck>false</BufferSecurityCheck>
174 |       <FunctionLevelLinking>true</FunctionLevelLinking>
175 |       <PrecompiledHeader>
176 |       </PrecompiledHeader>
177 |       <WarningLevel>Level3</WarningLevel>
178 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
179 |       <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
180 |     </ClCompile>
181 |     <Link>
182 |       <OutputFile>$(OutDir)TComb.dll</OutputFile>
183 |       <GenerateDebugInformation>true</GenerateDebugInformation>
184 |       <SubSystem>Windows</SubSystem>
185 |       <OptimizeReferences>true</OptimizeReferences>
186 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
187 |       <ImportLibrary>$(OutDir)TComb.lib</ImportLibrary>
188 |     </Link>
189 |   </ItemDefinitionGroup>
190 |   <ItemGroup>
191 |     <ClCompile Include="PlanarFrame.cpp" />
192 |     <ClCompile Include="TComb.cpp" />
193 |     <ClCompile Include="TComb_core.cpp" />
194 |   </ItemGroup>
195 |   <ItemGroup>
196 |     <ClInclude Include="avisynth.h" />
197 |     <ClInclude Include="avs\alignment.h" />
198 |     <ClInclude Include="avs\capi.h" />
199 |     <ClInclude Include="avs\config.h" />
200 |     <ClInclude Include="avs\cpuid.h" />
201 |     <ClInclude Include="avs\minmax.h" />
202 |     <ClInclude Include="avs\types.h" />
203 |     <ClInclude Include="avs\win.h" />
204 |     <ClInclude Include="common.h" />
205 |     <ClInclude Include="PlanarFrame.h" />
206 |     <ClInclude Include="resource.h" />
207 |     <ClInclude Include="TComb.h" />
208 |   </ItemGroup>
209 |   <ItemGroup>
210 |     <ResourceCompile Include="TComb.rc" />
211 |   </ItemGroup>
212 |   <ItemGroup>
213 |     <MASM Include="TComb_asm.asm">
214 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
215 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
216 |     </MASM>
217 |     <MASM Include="TComb_asm_x64.asm">
218 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
219 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
220 |     </MASM>
221 |   </ItemGroup>
222 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
223 |   <ImportGroup Label="ExtensionTargets">
224 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\masm.targets" />
225 |   </ImportGroup>
226 | </Project>


--------------------------------------------------------------------------------
/TComb/TComb.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="Source Files">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="Header Files">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="Resource Files">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="PlanarFrame.cpp">
19 |       <Filter>Source Files</Filter>
20 |     </ClCompile>
21 |     <ClCompile Include="TComb.cpp">
22 |       <Filter>Source Files</Filter>
23 |     </ClCompile>
24 |     <ClCompile Include="TComb_core.cpp">
25 |       <Filter>Source Files</Filter>
26 |     </ClCompile>
27 |   </ItemGroup>
28 |   <ItemGroup>
29 |     <ClInclude Include="avisynth.h">
30 |       <Filter>Header Files</Filter>
31 |     </ClInclude>
32 |     <ClInclude Include="PlanarFrame.h">
33 |       <Filter>Header Files</Filter>
34 |     </ClInclude>
35 |     <ClInclude Include="resource.h">
36 |       <Filter>Header Files</Filter>
37 |     </ClInclude>
38 |     <ClInclude Include="TComb.h">
39 |       <Filter>Header Files</Filter>
40 |     </ClInclude>
41 |     <ClInclude Include="avs\alignment.h">
42 |       <Filter>Header Files</Filter>
43 |     </ClInclude>
44 |     <ClInclude Include="avs\capi.h">
45 |       <Filter>Header Files</Filter>
46 |     </ClInclude>
47 |     <ClInclude Include="avs\config.h">
48 |       <Filter>Header Files</Filter>
49 |     </ClInclude>
50 |     <ClInclude Include="avs\cpuid.h">
51 |       <Filter>Header Files</Filter>
52 |     </ClInclude>
53 |     <ClInclude Include="avs\minmax.h">
54 |       <Filter>Header Files</Filter>
55 |     </ClInclude>
56 |     <ClInclude Include="avs\types.h">
57 |       <Filter>Header Files</Filter>
58 |     </ClInclude>
59 |     <ClInclude Include="avs\win.h">
60 |       <Filter>Header Files</Filter>
61 |     </ClInclude>
62 |     <ClInclude Include="common.h">
63 |       <Filter>Header Files</Filter>
64 |     </ClInclude>
65 |   </ItemGroup>
66 |   <ItemGroup>
67 |     <ResourceCompile Include="TComb.rc">
68 |       <Filter>Resource Files</Filter>
69 |     </ResourceCompile>
70 |   </ItemGroup>
71 |   <ItemGroup>
72 |     <MASM Include="TComb_asm.asm">
73 |       <Filter>Source Files</Filter>
74 |     </MASM>
75 |     <MASM Include="TComb_asm_x64.asm">
76 |       <Filter>Source Files</Filter>
77 |     </MASM>
78 |   </ItemGroup>
79 | </Project>


--------------------------------------------------------------------------------
/TComb/TComb_asm.asm:
--------------------------------------------------------------------------------
  1 | .xmm
  2 | .model flat,c
  3 | 
  4 | .data
  5 | 
  6 | align 16
  7 | 
  8 | onesByte qword 2 dup(0101010101010101h)
  9 | sixsMask_W qword 2 dup(0006000600060006h)
 10 | eightsMask_W qword 2 dup(0008000800080008h)
 11 | 
 12 | .code
 13 | 
 14 | buildFinalMask_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,m1p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
 15 | 
 16 | 	mov eax,s1p
 17 | 	mov ebx,s2p
 18 | 	mov edx,m1p
 19 | 	mov esi,dstp
 20 | 	mov edi,width_
 21 | 	
 22 | 	dec thresh
 23 | 	movd xmm4,thresh
 24 | 	punpcklbw xmm4, xmm4
 25 | 	punpcklwd xmm4, xmm4
 26 | 	punpckldq xmm4, xmm4
 27 | 	punpcklqdq xmm4, xmm4
 28 | 	
 29 | 	pxor xmm5,xmm5
 30 | 	
 31 | yloop:
 32 | 	xor ecx,ecx
 33 | 	align 16
 34 | xloop:
 35 | 	movdqa xmm0,[eax+ecx]
 36 | 	movdqa xmm1,[ebx+ecx]
 37 | 	movdqa xmm2,xmm0
 38 | 	psubusb xmm0,xmm1
 39 | 	psubusb xmm1,xmm2
 40 | 	por xmm0,xmm1
 41 | 	psubusb xmm0,xmm4
 42 | 	pcmpeqb xmm0,xmm5
 43 | 	pand xmm0,[edx+ecx]
 44 | 	movdqa [esi+ecx],xmm0
 45 | 	
 46 | 	add ecx,16
 47 | 	cmp ecx,edi
 48 | 	jl xloop
 49 | 	
 50 | 	add eax,stride
 51 | 	add ebx,stride
 52 | 	add edx,stride
 53 | 	add esi,stride
 54 | 	dec height
 55 | 	jnz yloop
 56 | 
 57 | 	ret
 58 | 
 59 | buildFinalMask_SSE2 endp
 60 | 
 61 | 
 62 | 
 63 | andNeighborsInPlace_SSE2 proc public uses esi edi srcp:dword,stride:dword,width_:dword,height:dword
 64 | 
 65 | 	mov eax,srcp
 66 | 	mov edx,width_
 67 | 	mov esi,eax
 68 | 	sub esi,stride
 69 | 	mov edi,eax
 70 | 	add edi,stride
 71 | 	
 72 | yloop:
 73 | 	xor ecx,ecx
 74 | 	align 16
 75 | xloop:
 76 | 	movdqa xmm0,[esi+ecx]
 77 | 	movdqu xmm1,[esi+ecx-1]
 78 | 	por xmm0,xmm1
 79 | 	movdqu xmm1,[esi+ecx+1]
 80 | 	por xmm0,xmm1
 81 | 	movdqa xmm1,[eax+ecx]
 82 | 	movdqu xmm2,[edi+ecx-1]
 83 | 	por xmm0,xmm2
 84 | 	por xmm0,[edi+ecx]
 85 | 	movdqu xmm2,[edi+ecx+1]
 86 | 	por xmm0,xmm2
 87 | 	pand xmm0,xmm1
 88 | 	movdqa [eax+ecx],xmm0
 89 | 	
 90 | 	add ecx,16
 91 | 	cmp ecx,edx
 92 | 	jl xloop
 93 | 	
 94 | 	add eax,stride
 95 | 	add esi,stride
 96 | 	add edi,stride
 97 | 	dec height
 98 | 	jnz yloop
 99 | 
100 | 	ret
101 | 
102 | andNeighborsInPlace_SSE2 endp
103 | 
104 | 
105 | 
106 | absDiff_SSE2 proc public uses ebx esi srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword
107 | 	
108 | 	mov eax,srcp1
109 | 	mov esi,srcp2
110 | 	mov ebx,dstp
111 | 	mov edx,width_
112 | 	
113 | yloop:
114 | 	xor ecx,ecx
115 | 	align 16
116 | xloop:
117 | 	movdqa xmm0,[eax+ecx]
118 | 	movdqa xmm1,[esi+ecx]
119 | 	movdqa xmm2,xmm0
120 | 	psubusb xmm0,xmm1
121 | 	psubusb xmm1,xmm2
122 | 	por xmm0,xmm1
123 | 	movdqa [ebx+ecx],xmm0
124 | 	
125 | 	add ecx,16
126 | 	cmp ecx,edx
127 | 	jl xloop
128 | 	
129 | 	add eax,stride
130 | 	add esi,stride
131 | 	add ebx,stride
132 | 	dec height
133 | 	jnz yloop
134 | 
135 | 	ret
136 | 	
137 | absDiff_SSE2 endp
138 | 
139 | 
140 | 
141 | absDiffAndMinMask_SSE2 proc public uses ebx esi edi srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword
142 | 	
143 | 	mov eax,srcp1
144 | 	mov esi,srcp2
145 | 	mov ebx,dstp
146 | 	mov edx,width_
147 | 	mov edi,height
148 | 	
149 | yloop:
150 | 	xor ecx,ecx
151 | 	align 16
152 | xloop:
153 | 	movdqa xmm0,[eax+ecx]
154 | 	movdqa xmm1,[esi+ecx]
155 | 	movdqa xmm2,xmm0
156 | 	psubusb xmm0,xmm1
157 | 	psubusb xmm1,xmm2
158 | 	por xmm0,xmm1
159 | 	pminub xmm0,[ebx+ecx]
160 | 	movdqa [ebx+ecx],xmm0
161 | 	
162 | 	add ecx,16
163 | 	cmp ecx,edx
164 | 	jl xloop
165 | 	
166 | 	add eax,stride
167 | 	add esi,stride
168 | 	add ebx,stride
169 | 	dec edi
170 | 	jnz yloop
171 | 
172 | 	ret
173 | 
174 | absDiffAndMinMask_SSE2 endp
175 | 
176 | 
177 | 
178 | absDiffAndMinMaskThresh_SSE2 proc public uses ebx esi edi srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
179 | 	
180 | 	mov eax,srcp1
181 | 	mov esi,srcp2
182 | 	mov ebx,dstp
183 | 	mov edx,width_
184 | 	mov edi,height
185 | 	
186 | 	dec thresh
187 | 	movd xmm3,thresh
188 | 	punpcklbw xmm3,xmm3
189 | 	punpcklwd xmm3,xmm3
190 | 	punpckldq xmm3,xmm3
191 | 	punpcklqdq xmm3,xmm3
192 | 	
193 | 	pxor xmm4,xmm4
194 | 	
195 | yloop:
196 | 	xor ecx,ecx
197 | 	align 16
198 | xloop:
199 | 	movdqa xmm0,[eax+ecx]
200 | 	movdqa xmm1,[esi+ecx]
201 | 	movdqa xmm2,xmm0
202 | 	psubusb xmm0,xmm1
203 | 	psubusb xmm1,xmm2
204 | 	por xmm0,xmm1
205 | 	pminub xmm0,[ebx+ecx]
206 | 	psubusb xmm0,xmm3
207 | 	pcmpeqb xmm0,xmm4
208 | 	movdqa [ebx+ecx],xmm0
209 | 	
210 | 	add ecx,16
211 | 	cmp ecx,edx
212 | 	jl xloop
213 | 	
214 | 	add eax,stride
215 | 	add esi,stride
216 | 	add ebx,stride
217 | 	dec edi
218 | 	jnz yloop
219 | 
220 | 	ret
221 | 	
222 | absDiffAndMinMaskThresh_SSE2 endp
223 | 
224 | 
225 | 
226 | MinMax_SSE2 proc public uses ebx esi edi srcp:dword,minp:dword,maxp:dword,src_stride:dword,min_stride:dword,width_:dword,height:dword,thresh:dword
227 | 	
228 | 	mov eax,srcp
229 | 	mov esi,eax
230 | 	sub esi,src_stride
231 | 	mov edi,eax
232 | 	add edi,src_stride
233 | 	mov ebx,minp
234 | 	mov edx,maxp
235 | 	
236 | 	movd xmm3,thresh
237 | 	punpcklbw xmm3,xmm3
238 | 	punpcklwd xmm3,xmm3
239 | 	punpckldq xmm3,xmm3
240 | 	punpcklqdq xmm3,xmm3
241 | 	
242 | yloop:
243 | 	xor ecx,ecx
244 | 	align 16
245 | xloop:
246 | 	; srcp-1 is aligned because the pointer passed to this function is srcp+stride+1.
247 | 	movdqa xmm0,[esi+ecx-1]
248 | 	movdqa xmm1,xmm0
249 | 	movdqu xmm2,[esi+ecx]
250 | 	pminub xmm0,xmm2
251 | 	pmaxub xmm1,xmm2
252 | 	movdqu xmm2,[esi+ecx+1]
253 | 	pminub xmm0,xmm2
254 | 	pmaxub xmm1,xmm2
255 | 	movdqa xmm2,[eax+ecx-1]
256 | 	pminub xmm0,xmm2
257 | 	pmaxub xmm1,xmm2
258 | 	movdqu xmm2,[eax+ecx]
259 | 	pminub xmm0,xmm2
260 | 	pmaxub xmm1,xmm2
261 | 	movdqu xmm2,[eax+ecx+1]
262 | 	pminub xmm0,xmm2
263 | 	pmaxub xmm1,xmm2
264 | 	movdqa xmm2,[edi+ecx-1]
265 | 	pminub xmm0,xmm2
266 | 	pmaxub xmm1,xmm2
267 | 	movdqu xmm2,[edi+ecx]
268 | 	pminub xmm0,xmm2
269 | 	pmaxub xmm1,xmm2
270 | 	movdqu xmm2,[edi+ecx+1]
271 | 	pminub xmm0,xmm2
272 | 	pmaxub xmm1,xmm2
273 | 	psubusb xmm0,xmm3
274 | 	paddusb xmm1,xmm3
275 | 	movdqa [ebx+ecx],xmm0
276 | 	movdqa [edx+ecx],xmm1
277 | 	
278 | 	add ecx,16
279 | 	cmp ecx,width_
280 | 	jl xloop
281 | 	
282 | 	add esi,src_stride
283 | 	add eax,src_stride
284 | 	add edi,src_stride
285 | 	add ebx,min_stride
286 | 	add edx,min_stride
287 | 	dec height
288 | 	jnz yloop
289 | 
290 | 	ret
291 | 	
292 | MinMax_SSE2 endp
293 | 
294 | 
295 | 
296 | checkOscillation5_SSE2 proc public uses ebx esi edi p2p:dword,p1p:dword,s1p:dword,n1p:dword,n2p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
297 | 
298 | 	mov eax,p2p
299 | 	mov ebx,p1p
300 | 	mov edx,s1p
301 | 	mov edi,n1p
302 | 	mov esi,n2p
303 | 	
304 | 	
305 | 	pxor xmm6,xmm6
306 | 	
307 | 	dec thresh
308 | 	movd xmm7,thresh
309 | 	punpcklbw xmm7,xmm7
310 | 	punpcklwd xmm7,xmm7
311 | 	punpckldq xmm7,xmm7
312 | 	punpcklqdq xmm7,xmm7
313 | 	
314 | yloop:
315 | 	xor ecx,ecx
316 | 	align 16
317 | xloop:
318 | 	movdqa xmm0,[eax+ecx]
319 | 	movdqa xmm2,[ebx+ecx]
320 | 	movdqa xmm1,xmm0
321 | 	movdqa xmm3,xmm2
322 | 	pminub xmm0,[edx+ecx]
323 | 	pmaxub xmm1,[edx+ecx]
324 | 	pminub xmm2,[edi+ecx]
325 | 	pmaxub xmm3,[edi+ecx]
326 | 	pminub xmm0,[esi+ecx]
327 | 	pmaxub xmm1,[esi+ecx]
328 | 
329 | 	movdqa xmm4,xmm3
330 | 	movdqa xmm5,xmm1
331 | 	psubusb xmm4,xmm2
332 | 	psubusb xmm5,xmm0
333 | 	; minus (thresh-1)
334 | 	psubusb xmm4,xmm7
335 | 	psubusb xmm5,xmm7
336 | 	; minus 1
337 | 	psubusb xmm2,oword ptr onesByte
338 | 	psubusb xmm0,oword ptr onesByte
339 | 
340 | 	psubusb xmm1,xmm2
341 | 	psubusb xmm3,xmm0
342 | 
343 | 	pcmpeqb xmm1,xmm6
344 | 	pcmpeqb xmm3,xmm6
345 | 	pcmpeqb xmm4,xmm6
346 | 	pcmpeqb xmm5,xmm6
347 | 	mov eax,dstp
348 | 	por xmm1,xmm3
349 | 	pand xmm4,xmm5
350 | 	pand xmm1,xmm4
351 | 	movdqa [eax+ecx],xmm1
352 | 	
353 | 	add ecx,16
354 | 	mov eax,p2p
355 | 	cmp ecx,width_
356 | 	jl xloop
357 | 	
358 | 	mov eax,stride
359 | 	add ebx,stride
360 | 	add p2p,eax
361 | 	add edx,stride
362 | 	add edi,stride
363 | 	add dstp,eax
364 | 	add esi,stride
365 | 	mov eax,p2p
366 | 	dec height
367 | 	jnz yloop
368 | 
369 | 	ret
370 | 	
371 | checkOscillation5_SSE2 endp
372 | 
373 | 
374 | 
375 | calcAverages_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
376 | 	
377 | 	mov eax,s1p
378 | 	mov ebx,s2p
379 | 	mov edx,dstp
380 | 	mov edi,height
381 | 	mov esi,width_
382 | 	
383 | yloop:
384 | 	xor ecx,ecx
385 | 	align 16
386 | xloop:
387 | 	movdqa xmm0,[eax+ecx]
388 | 	pavgb xmm0,[ebx+ecx]
389 | 	movdqa [edx+ecx],xmm0
390 | 	
391 | 	add ecx,16
392 | 	cmp ecx,esi
393 | 	jl xloop
394 | 	
395 | 	add eax,stride
396 | 	add ebx,stride
397 | 	add edx,stride
398 | 	dec edi
399 | 	jnz yloop
400 | 
401 | 	ret
402 | 	
403 | calcAverages_SSE2 endp
404 | 
405 | 
406 | 
407 | checkAvgOscCorrelation_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,s3p:dword,s4p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
408 | 	
409 | 	mov eax,s1p
410 | 	mov ebx,s2p
411 | 	mov edx,s3p
412 | 	mov edi,s4p
413 | 	mov esi,dstp
414 | 	
415 | 	dec thresh
416 | 	movd xmm2, thresh
417 | 	punpcklbw xmm2, xmm2
418 | 	punpcklwd xmm2, xmm2
419 | 	punpckldq xmm2, xmm2
420 | 	punpcklqdq xmm2, xmm2
421 | 	
422 | 	pxor xmm3,xmm3
423 | 	
424 | yloop:
425 | 	xor ecx,ecx
426 | 	align 16
427 | xloop:
428 | 	movdqa xmm5,[eax+ecx]
429 | 	movdqa xmm0,xmm5
430 | 	movdqa xmm1,xmm5
431 | 	movdqa xmm5,[ebx+ecx]
432 | 	pminub xmm0,xmm5
433 | 	pmaxub xmm1,xmm5
434 | 	movdqa xmm5,[edx+ecx]
435 | 	pminub xmm0,xmm5
436 | 	pmaxub xmm1,xmm5
437 | 	movdqa xmm5,[edi+ecx]
438 | 	pminub xmm0,xmm5
439 | 	pmaxub xmm1,xmm5
440 | 	psubusb xmm1,xmm0
441 | 	movdqa xmm4,[esi+ecx]
442 | 	psubusb xmm1,xmm2
443 | 	pcmpeqb xmm1,xmm3
444 | 	pand xmm1,xmm4
445 | 	movdqa [esi+ecx],xmm1
446 | 	
447 | 	add ecx,16
448 | 	cmp ecx,width_
449 | 	jl xloop
450 | 	
451 | 	add eax,stride
452 | 	add ebx,stride
453 | 	add edx,stride
454 | 	add edi,stride
455 | 	add esi,stride
456 | 	dec height
457 | 	jnz yloop
458 | 
459 | 	ret
460 | 	
461 | checkAvgOscCorrelation_SSE2 endp
462 | 
463 | 
464 | 
465 | or3Masks_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,s3p:dword,dstp:dword,stride:dword,width_:dword,height:dword
466 | 	
467 | 	mov eax,s1p
468 | 	mov ebx,s2p
469 | 	mov edx,s3p
470 | 	mov edi,dstp
471 | 	mov esi,width_
472 | 	
473 | yloop:
474 | 	xor ecx,ecx
475 | 	align 16
476 | xloop:
477 | 	movdqa xmm0,[eax+ecx]
478 | 	por xmm0,[ebx+ecx]
479 | 	por xmm0,[edx+ecx]
480 | 	movdqa [edi+ecx],xmm0
481 | 	
482 | 	add ecx,16
483 | 	cmp ecx,esi
484 | 	jl xloop
485 | 	
486 | 	add eax,stride
487 | 	add ebx,stride
488 | 	add edx,stride
489 | 	add edi,stride
490 | 	dec height
491 | 	jnz yloop
492 | 
493 | 	ret
494 | 	
495 | or3Masks_SSE2 endp
496 | 
497 | 
498 | 
499 | orAndMasks_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
500 | 	
501 | 	mov eax,s1p
502 | 	mov ebx,s2p
503 | 	mov edx,dstp
504 | 	mov edi,width_
505 | 	mov esi,height
506 | 	
507 | yloop:
508 | 	xor ecx,ecx
509 | 	align 16
510 | xloop:
511 | 	movdqa xmm0,[eax+ecx]
512 | 	movdqa xmm1,[edx+ecx]
513 | 	pand xmm0,[ebx+ecx]
514 | 	por xmm1,xmm0
515 | 	movdqa [edx+ecx],xmm1
516 | 	
517 | 	add ecx,16
518 | 	cmp ecx,edi
519 | 	jl xloop
520 | 	
521 | 	add eax,stride
522 | 	add ebx,stride
523 | 	add edx,stride
524 | 	dec esi
525 | 	jnz yloop
526 | 
527 | 	ret
528 | 	
529 | orAndMasks_SSE2 endp
530 | 
531 | 
532 | 
533 | andMasks_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
534 | 	
535 | 	mov eax,s1p
536 | 	mov ebx,s2p
537 | 	mov edx,dstp
538 | 	mov edi,width_
539 | 	mov esi,height
540 | 	
541 | yloop:
542 | 	xor ecx,ecx
543 | 	align 16
544 | xloop:
545 | 	movdqa xmm0,[eax+ecx]
546 | 	pand xmm0,[ebx+ecx]
547 | 	movdqa [edx+ecx],xmm0
548 | 	
549 | 	add ecx,16
550 | 	cmp ecx,edi
551 | 	jl xloop
552 | 	
553 | 	add eax,stride
554 | 	add ebx,stride
555 | 	add edx,stride
556 | 	dec esi
557 | 	jnz yloop
558 | 
559 | 	ret
560 | 	
561 | andMasks_SSE2 endp
562 | 
563 | 
564 | 
565 | checkSceneChange_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,stride:dword,width_:dword,height:dword,diffp:dword
566 | 	
567 | 	mov eax,s1p
568 | 	mov edi,s2p
569 | 	mov esi,stride
570 | 	mov edx,width_
571 | 	pxor xmm1,xmm1
572 | 	
573 | yloop:
574 | 	xor ecx,ecx
575 | 	align 16
576 | xloop:
577 | 	movdqa xmm0,[eax+ecx]
578 | 	psadbw xmm0,[edi+ecx]
579 | 	paddd xmm1,xmm0
580 | 
581 | 	add ecx,16
582 | 	cmp ecx,edx
583 | 	jl xloop
584 | 
585 | 	add eax,esi
586 | 	add edi,esi
587 | 	dec height
588 | 	jnz yloop
589 | 
590 | 	movdqa xmm2,xmm1
591 | 	psrldq xmm1,8
592 | 	paddd xmm2,xmm1
593 | 
594 | 	mov eax, diffp
595 | 	movd DWORD PTR [eax],xmm2
596 | 
597 | 	ret
598 | 	
599 | checkSceneChange_SSE2 endp
600 | 
601 | 
602 | 
603 | VerticalBlur3_SSE2 proc public uses ebx esi edi srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
604 | 	
605 | 	mov eax,srcp
606 | 	mov ebx,dstp
607 | 	mov edx,stride
608 | 	mov esi,eax
609 | 	mov edi,eax
610 | 	sub esi,edx
611 | 	add edi,edx
612 | 	mov edx,width_
613 | 	
614 | 	; 0x0002,for rounding
615 | 	pcmpeqb xmm6,xmm6
616 | 	psrlw xmm6,15
617 | 	psllw xmm6,1
618 | 	pxor xmm7,xmm7
619 | 
620 | 	xor ecx,ecx
621 | 
622 | toploop:
623 | 	movdqa xmm0,[eax+ecx]
624 | 	pavgb xmm0,[edi+ecx]
625 | 	movdqa [ebx+ecx],xmm0
626 | 	
627 | 	add ecx,16
628 | 	cmp ecx,edx
629 | 	jl toploop
630 | 	
631 | 	add esi,stride
632 | 	add eax,stride
633 | 	add edi,stride
634 | 	add ebx,stride
635 | 	sub height,2 ; the main loop processes 2 lines fewer than the height
636 | 	
637 | yloop:
638 | 	xor ecx,ecx
639 | xloop:
640 | 	movdqa xmm0,[esi+ecx]
641 | 	movdqa xmm1,[eax+ecx]
642 | 	movdqa xmm2,[edi+ecx]
643 | 	movdqa xmm3,xmm0
644 | 	movdqa xmm4,xmm1
645 | 	movdqa xmm5,xmm2
646 | 	punpcklbw xmm0,xmm7
647 | 	punpcklbw xmm1,xmm7
648 | 	punpcklbw xmm2,xmm7
649 | 	punpckhbw xmm3,xmm7
650 | 	punpckhbw xmm4,xmm7
651 | 	punpckhbw xmm5,xmm7
652 | 	
653 | 	; add bottom to top
654 | 	paddw xmm0,xmm2
655 | 	paddw xmm3,xmm5
656 | 	
657 | 	; multiply center by 2
658 | 	psllw xmm1,1
659 | 	psllw xmm4,1
660 | 	
661 | 	; add center to sum
662 | 	paddw xmm0,xmm1
663 | 	paddw xmm3,xmm4
664 | 	
665 | 	; add 2 to sum
666 | 	paddw xmm0,xmm6
667 | 	paddw xmm3,xmm6
668 | 	
669 | 	; divide by 4
670 | 	psrlw xmm0,2
671 | 	psrlw xmm3,2
672 | 	packuswb xmm0,xmm3
673 | 	movdqa [ebx+ecx],xmm0
674 | 	
675 | 	add ecx,16
676 | 	cmp ecx,edx
677 | 	jl xloop
678 | 	
679 | 	add esi,stride
680 | 	add eax,stride
681 | 	add edi,stride
682 | 	add ebx,stride
683 | 	dec height
684 | 	jnz yloop
685 | 	
686 | 	xor ecx,ecx
687 | 	
688 | bottomloop:
689 | 	movdqa xmm0,[esi+ecx]
690 | 	pavgb xmm0,[eax+ecx]
691 | 	movdqa [ebx+ecx],xmm0
692 | 	
693 | 	add ecx,16
694 | 	cmp ecx,edx
695 | 	jl bottomloop
696 | 
697 | 	ret
698 | 	
699 | VerticalBlur3_SSE2 endp
700 | 
701 | 
702 | 
703 | HorizontalBlur3_SSE2 proc public srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
704 | 	
705 | 	mov eax,srcp
706 | 	mov edx,dstp
707 | 	pxor xmm7,xmm7
708 | 	; 0x0002,for rounding
709 | 	pcmpeqb xmm6,xmm6
710 | 	psrlw xmm6,15
711 | 	psllw xmm6,1
712 | 
713 | yloop:
714 | 	xor ecx,ecx
715 | 	align 16
716 | xloop:
717 | 	movdqu xmm0,[eax+ecx-1]
718 | 	movdqa xmm1,[eax+ecx]
719 | 	movdqu xmm2,[eax+ecx+1]
720 | 	movdqa xmm3,xmm0
721 | 	movdqa xmm4,xmm1
722 | 	movdqa xmm5,xmm2
723 | 	punpcklbw xmm0,xmm7
724 | 	punpcklbw xmm1,xmm7
725 | 	punpcklbw xmm2,xmm7
726 | 	punpckhbw xmm3,xmm7
727 | 	punpckhbw xmm4,xmm7
728 | 	punpckhbw xmm5,xmm7
729 | 	; center * 2
730 | 	psllw xmm1,1
731 | 	psllw xmm4,1
732 | 	paddw xmm1,xmm0
733 | 	paddw xmm4,xmm3
734 | 	paddw xmm1,xmm2
735 | 	paddw xmm4,xmm5
736 | 
737 | 	; add 2 to sum
738 | 	paddw xmm1,xmm6
739 | 	paddw xmm4,xmm6
740 | 
741 | 	; divide by 4
742 | 	psrlw xmm1,2
743 | 	psrlw xmm4,2
744 | 	packuswb xmm1,xmm4
745 | 	movdqa [edx+ecx],xmm1
746 | 	
747 | 	add ecx,16
748 | 	cmp ecx,width_
749 | 	jl xloop
750 | 	
751 | 	add eax,stride
752 | 	add edx,stride
753 | 	dec height
754 | 	jnz yloop
755 | 
756 | 	ret
757 | 	
758 | HorizontalBlur3_SSE2 endp
759 | 
760 | 
761 | 
762 | HorizontalBlur6_SSE2 proc public srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
763 | 	
764 | 	mov eax,srcp
765 | 	mov edx,dstp
766 | 	movdqu xmm6,oword ptr sixsMask_W
767 | 	pxor xmm7,xmm7
768 | 	
769 | yloop:
770 | 	xor ecx,ecx
771 | 	align 16
772 | xloop:
773 | 	movdqu xmm0,[eax+ecx-2]
774 | 	movdqu xmm1,[eax+ecx+2]
775 | 	movdqa xmm2,xmm0
776 | 	movdqa xmm3,xmm1
777 | 	punpcklbw xmm0,xmm7
778 | 	punpcklbw xmm1,xmm7
779 | 	punpckhbw xmm2,xmm7
780 | 	punpckhbw xmm3,xmm7
781 | 	
782 | 	; srcp[x-2] + srcp[x+2]
783 | 	paddw xmm0,xmm1
784 | 	paddw xmm2,xmm3
785 | 	
786 | 	; srcp[x-1] + srcp[x+1]
787 | 	movdqu xmm1,[eax+ecx-1]
788 | 	movdqu xmm3,[eax+ecx+1]
789 | 	movdqa xmm4,xmm1
790 | 	movdqa xmm5,xmm3
791 | 	punpcklbw xmm1,xmm7
792 | 	punpcklbw xmm3,xmm7
793 | 	punpckhbw xmm4,xmm7
794 | 	punpckhbw xmm5,xmm7
795 | 	paddw xmm1,xmm3
796 | 	paddw xmm4,xmm5
797 | 	
798 | 	; (srcp[x-1 + srcp[x+])*4
799 | 	psllw xmm1,2
800 | 	psllw xmm4,2
801 | 	
802 | 	; (srcp[x-1 + srcp[x+])*4 + srcp[x-2] + srcp[x+2]
803 | 	paddw xmm0,xmm1
804 | 	paddw xmm2,xmm4
805 | 	
806 | 	; srcp[x] * 6
807 | 	movdqa xmm1,[eax+ecx]
808 | 	movdqu xmm5,oword ptr eightsMask_W
809 | 	movdqa xmm3,xmm1
810 | 	punpcklbw xmm1,xmm7
811 | 	punpckhbw xmm3,xmm7
812 | 	pmullw xmm1,xmm6
813 | 	pmullw xmm3,xmm6
814 | 	paddw xmm0,xmm1
815 | 	paddw xmm2,xmm3
816 | 	
817 | 	; add 8
818 | 	paddw xmm0,xmm5
819 | 	paddw xmm2,xmm5
820 | 	
821 | 	; divide by 16
822 | 	psrlw xmm0,4
823 | 	psrlw xmm2,4
824 | 	packuswb xmm0,xmm2
825 | 	movdqa [edx+ecx],xmm0
826 | 	
827 | 	add ecx,16
828 | 	cmp ecx,width_
829 | 	jl xloop
830 | 	
831 | 	add eax,stride
832 | 	add edx,stride
833 | 	dec height
834 | 	jnz yloop
835 | 
836 | 	ret
837 | 	
838 | HorizontalBlur6_SSE2 endp
839 | 
840 | 
841 | 
842 | end


--------------------------------------------------------------------------------
/TComb/TComb_asm_x64.asm:
--------------------------------------------------------------------------------
   1 | .code
   2 | 
   3 | ;buildFinalMask_SSE2 proc s1p:dword,s2p:dword,m1p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
   4 | ; s1p = rcx
   5 | ; s2p = rdx
   6 | ; m1p = r8
   7 | ; dstp = r9
   8 | 
   9 | buildFinalMask_SSE2 proc public frame
  10 | 	
  11 | 	stride equ dword ptr [rbp+48]
  12 | 	width_ equ dword ptr [rbp+56]
  13 | 	height equ dword ptr [rbp+64]
  14 | 	thresh equ dword ptr [rbp+72]
  15 | 	
  16 | 	push rbp
  17 | .pushreg rbp
  18 | 	mov rbp,rsp
  19 | 	push rbx
  20 | .pushreg rbx
  21 | 	push rsi
  22 | .pushreg rsi
  23 | 	push rdi
  24 | .pushreg rdi
  25 | .endprolog
  26 | 	
  27 | 	mov rax,rcx
  28 | 	mov rbx,rdx
  29 | 	mov rdx,r8
  30 | 	mov rsi,r9
  31 | 	movsxd r8,stride
  32 | 	xor rdi,rdi
  33 | 	mov edi,width_
  34 | 	xor r9,r9
  35 | 	mov r9d,height
  36 | 	mov r10,16
  37 | 
  38 | 	dec thresh
  39 | 	movd xmm4,thresh
  40 | 	punpcklbw xmm4, xmm4
  41 | 	punpcklwd xmm4, xmm4
  42 | 	punpckldq xmm4, xmm4
  43 | 	punpcklqdq xmm4, xmm4
  44 | 	pxor xmm5,xmm5
  45 | 
  46 | yloop:
  47 | 	xor rcx,rcx
  48 | xloop:
  49 | 	movdqa xmm0,[rax+rcx]
  50 | 	movdqa xmm1,[rbx+rcx]
  51 | 	movdqa xmm2,xmm0
  52 | 	psubusb xmm0,xmm1
  53 | 	psubusb xmm1,xmm2
  54 | 	por xmm0,xmm1
  55 | 	psubusb xmm0,xmm4
  56 | 	pcmpeqb xmm0,xmm5
  57 | 	pand xmm0,[rdx+rcx]
  58 | 	movdqa [rsi+rcx],xmm0
  59 | 
  60 | 	add rcx,r10
  61 | 	cmp rcx,rdi
  62 | 	jl xloop
  63 | 
  64 | 	add rax,r8
  65 | 	add rbx,r8
  66 | 	add rdx,r8
  67 | 	add rsi,r8
  68 | 	dec r9
  69 | 	jnz yloop
  70 | 	
  71 | 	pop rdi
  72 | 	pop rsi
  73 | 	pop rbx
  74 | 	pop rbp
  75 | 
  76 | 	ret
  77 | 
  78 | buildFinalMask_SSE2 endp
  79 | 
  80 | 
  81 | 
  82 | ;andNeighborsInPlace_SSE2 proc srcp:dword,stride:dword,width_:dword,height:dword
  83 | ; srcp = rcx
  84 | ; stride = rdx
  85 | ; width_ = r8d
  86 | ; height = r9d
  87 | 
  88 | andNeighborsInPlace_SSE2 proc public frame
  89 | 	
  90 | 	push rbp
  91 | .pushreg rbp
  92 | 	mov rbp,rsp
  93 | 	push rsi
  94 | .pushreg rsi
  95 | 	push rdi
  96 | .pushreg rdi
  97 | .endprolog
  98 | 
  99 | 	mov rax,rcx
 100 | 	xchg r8,rdx
 101 | 	movsxd r8,r8d
 102 | 	mov rsi,rax
 103 | 	mov rdi,rax
 104 | 	sub rsi,r8
 105 | 	add rdi,r8
 106 | 	mov r10,16
 107 | 
 108 | yloop:
 109 | 	xor rcx,rcx
 110 | xloop:
 111 | 	movdqa xmm0,[rsi+rcx]
 112 | 	movdqu xmm1,[rsi+rcx-1]
 113 | 	por xmm0,xmm1
 114 | 	movdqu xmm1,[rsi+rcx+1]
 115 | 	por xmm0,xmm1
 116 | 	movdqa xmm1,[rax+rcx]
 117 | 	movdqu xmm2,[rdi+rcx-1]
 118 | 	por xmm0,xmm2
 119 | 	por xmm0,[rdi+rcx]
 120 | 	movdqu xmm2,[rdi+rcx+1]
 121 | 	por xmm0,xmm2
 122 | 	pand xmm0,xmm1
 123 | 	movdqa [rax+rcx],xmm0
 124 | 
 125 | 	add rcx,r10
 126 | 	cmp rcx,rdx
 127 | 	jl xloop
 128 | 
 129 | 	add rax,r8
 130 | 	add rsi,r8
 131 | 	add rdi,r8
 132 | 	dec r9d
 133 | 	jnz yloop
 134 | 
 135 | 	pop rdi
 136 | 	pop rsi
 137 | 	pop rbp
 138 | 	
 139 | 	ret
 140 | 
 141 | andNeighborsInPlace_SSE2 endp
 142 | 
 143 | 
 144 | 
 145 | ;absDiff_SSE2 proc srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword
 146 | ; srcp1 = rcx
 147 | ; srcp2 = rdx
 148 | ; dstp = r8
 149 | ; stride = r9d
 150 | 
 151 | absDiff_SSE2 proc public frame
 152 | 
 153 | 	width_ equ dword ptr [rbp+48]
 154 | 	height equ dword ptr [rbp+56]
 155 | 	
 156 | 	push rbp
 157 | .pushreg rbp
 158 | 	mov rbp,rsp
 159 | 	push rbx
 160 | .pushreg rbx
 161 | 	push rsi
 162 | .pushreg rsi
 163 | .endprolog
 164 | 	
 165 | 	mov rax,rcx
 166 | 	mov rsi,rdx
 167 | 	mov rbx,r8
 168 | 	movsxd r8,r9d
 169 | 	xor rdx,rdx
 170 | 	mov edx,width_
 171 | 	xor r9,r9
 172 | 	mov r9d,height
 173 | 	mov r10,16
 174 | 	
 175 | yloop:
 176 | 	xor rcx,rcx
 177 | xloop:
 178 | 	movdqa xmm0,[rax+rcx]
 179 | 	movdqa xmm1,[rsi+rcx]
 180 | 	movdqa xmm2,xmm0
 181 | 	psubusb xmm0,xmm1
 182 | 	psubusb xmm1,xmm2
 183 | 	por xmm0,xmm1
 184 | 	movdqa [rbx+rcx],xmm0
 185 | 	
 186 | 	add rcx,r10
 187 | 	cmp rcx,rdx
 188 | 	jl xloop
 189 | 	
 190 | 	add rax,r8
 191 | 	add rsi,r8
 192 | 	add rbx,r8
 193 | 	dec r9d
 194 | 	jnz yloop
 195 | 	
 196 | 	pop rsi
 197 | 	pop rbx
 198 | 	pop rbp
 199 | 
 200 | 	ret
 201 | 	
 202 | absDiff_SSE2 endp
 203 | 
 204 | 
 205 | 
 206 | ;absDiffAndMinMask_SSE2 proc srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword
 207 | ; srcp1 = rcx
 208 | ; srcp2 = rdx
 209 | ; dstp = r8
 210 | ; stride = r9d
 211 | 
 212 | absDiffAndMinMask_SSE2 proc public frame
 213 | 
 214 | 	width_ equ dword ptr [rbp+48]
 215 | 	height equ dword ptr [rbp+56]
 216 | 	
 217 | 	push rbp
 218 | .pushreg rbp
 219 | 	mov rbp,rsp
 220 | 	push rbx
 221 | .pushreg rbx
 222 | 	push rsi
 223 | .pushreg rsi
 224 | 	push rdi
 225 | .pushreg rdi
 226 | .endprolog
 227 | 	
 228 | 	mov rax,rcx
 229 | 	mov rsi,rdx
 230 | 	mov rbx,r8
 231 | 	movsxd r8,r9d
 232 | 	xor rdx,rdx
 233 | 	mov edx,width_
 234 | 	xor rdi,rdi
 235 | 	mov edi,height
 236 | 	mov r10,16
 237 | 	
 238 | yloop:
 239 | 	xor rcx,rcx
 240 | xloop:
 241 | 	movdqa xmm0,[rax+rcx]
 242 | 	movdqa xmm1,[rsi+rcx]
 243 | 	movdqa xmm2,xmm0
 244 | 	psubusb xmm0,xmm1
 245 | 	psubusb xmm1,xmm2
 246 | 	por xmm0,xmm1
 247 | 	pminub xmm0,[rbx+rcx]
 248 | 	movdqa [rbx+rcx],xmm0
 249 | 	
 250 | 	add rcx,r10
 251 | 	cmp rcx,rdx
 252 | 	jl xloop
 253 | 	
 254 | 	add rax,r8
 255 | 	add rsi,r8
 256 | 	add rbx,r8
 257 | 	dec edi
 258 | 	jnz yloop
 259 | 
 260 | 	pop rdi
 261 | 	pop rsi
 262 | 	pop rbx
 263 | 	pop rbp
 264 | 	
 265 | 	ret
 266 | 
 267 | absDiffAndMinMask_SSE2 endp
 268 | 
 269 | 
 270 | 
 271 | ;absDiffAndMinMaskThresh_SSE2 proc srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
 272 | ; srcp1 = rcx
 273 | ; srcp2 = rdx
 274 | ; dstp = r8
 275 | ; stride = r9d
 276 | 
 277 | absDiffAndMinMaskThresh_SSE2 proc public frame
 278 | 
 279 | 	width_ equ dword ptr [rbp+48]
 280 | 	height equ dword ptr [rbp+56]
 281 | 	thresh equ dword ptr [rbp+64]
 282 | 	
 283 | 	push rbp
 284 | .pushreg rbp
 285 | 	mov rbp,rsp
 286 | 	push rbx
 287 | .pushreg rbx
 288 | 	push rsi
 289 | .pushreg rsi
 290 | 	push rdi
 291 | .pushreg rdi
 292 | .endprolog
 293 | 	
 294 | 	mov rax,rcx
 295 | 	mov rsi,rdx
 296 | 	mov rbx,r8
 297 | 	movsxd r8,r9d
 298 | 	xor rdx,rdx
 299 | 	mov edx,width_
 300 | 	xor rdi,rdi
 301 | 	mov edi,height
 302 | 	dec thresh
 303 | 	movd xmm3,thresh
 304 | 	punpcklbw xmm3,xmm3
 305 | 	punpcklwd xmm3,xmm3
 306 | 	punpckldq xmm3,xmm3
 307 | 	punpcklqdq xmm3,xmm3
 308 | 	pxor xmm4,xmm4
 309 | 	mov r10,16
 310 | 	
 311 | yloop:
 312 | 	xor rcx,rcx
 313 | xloop:
 314 | 	movdqa xmm0,[rax+rcx]
 315 | 	movdqa xmm1,[rsi+rcx]
 316 | 	movdqa xmm2,xmm0
 317 | 	psubusb xmm0,xmm1
 318 | 	psubusb xmm1,xmm2
 319 | 	por xmm0,xmm1
 320 | 	pminub xmm0,[rbx+rcx]
 321 | 	psubusb xmm0,xmm3
 322 | 	pcmpeqb xmm0,xmm4
 323 | 	movdqa [rbx+rcx],xmm0
 324 | 	
 325 | 	add rcx,r10
 326 | 	cmp rcx,rdx
 327 | 	jl xloop
 328 | 	
 329 | 	add rax,r8
 330 | 	add rsi,r8
 331 | 	add rbx,r8
 332 | 	dec edi
 333 | 	jnz yloop
 334 | 	
 335 | 	pop rdi
 336 | 	pop rsi
 337 | 	pop rbx
 338 | 	pop rbp
 339 | 
 340 | 	ret
 341 | 	
 342 | absDiffAndMinMaskThresh_SSE2 endp
 343 | 
 344 | 
 345 | 
 346 | ;MinMax_SSE2 proc srcp:dword,minp:dword,maxp:dword,src_stride:dword,min_stride:dword,width_:dword,height:dword,thresh:dword
 347 | ; srcp = rcx
 348 | ; minp = edx
 349 | ; maxp = r8d
 350 | ; src_stride = r9d
 351 | 
 352 | MinMax_SSE2 proc public frame
 353 | 
 354 | 	min_stride equ dword ptr [rbp+48]
 355 | 	width_ equ dword ptr [rbp+56]
 356 | 	height equ dword ptr [rbp+64]
 357 | 	thresh equ dword ptr [rbp+72]
 358 | 	
 359 | 	push rbp
 360 | .pushreg rbp
 361 | 	mov rbp,rsp
 362 | 	push rbx
 363 | .pushreg rbx
 364 | 	push rsi
 365 | .pushreg rsi
 366 | 	push rdi
 367 | .pushreg rdi
 368 | 	push r12
 369 | .pushreg r12
 370 | .endprolog
 371 | 	
 372 | 	mov rax,rcx
 373 | 	mov rsi,rax
 374 | 	mov rdi,rax
 375 | 	mov rbx,rdx
 376 | 	mov rdx,r8
 377 | 	movsxd r8,r9d
 378 | 	movsxd r9,min_stride
 379 | 	mov r10d,width_
 380 | 	mov r11d,height	
 381 | 	mov r12,16
 382 | 	sub rsi,r8
 383 | 	add rdi,r8
 384 | 	
 385 | 	movd xmm3,thresh
 386 | 	punpcklbw xmm3,xmm3
 387 | 	punpcklwd xmm3,xmm3
 388 | 	punpckldq xmm3,xmm3
 389 | 	punpcklqdq xmm3,xmm3
 390 | 	
 391 | yloop:
 392 | 	xor rcx,rcx
 393 | xloop:
 394 | 	; srcp-1 is aligned because the pointer passed to this function is srcp+stride+1.
 395 | 	movdqa xmm0,[rsi+rcx-1]
 396 | 	movdqa xmm1,xmm0
 397 | 	movdqu xmm2,[rsi+rcx]
 398 | 	pminub xmm0,xmm2
 399 | 	pmaxub xmm1,xmm2
 400 | 	movdqu xmm2,[rsi+rcx+1]
 401 | 	pminub xmm0,xmm2
 402 | 	pmaxub xmm1,xmm2
 403 | 	movdqa xmm2,[rax+rcx-1]
 404 | 	pminub xmm0,xmm2
 405 | 	pmaxub xmm1,xmm2
 406 | 	movdqu xmm2,[rax+rcx]
 407 | 	pminub xmm0,xmm2
 408 | 	pmaxub xmm1,xmm2
 409 | 	movdqu xmm2,[rax+rcx+1]
 410 | 	pminub xmm0,xmm2
 411 | 	pmaxub xmm1,xmm2
 412 | 	movdqa xmm2,[rdi+rcx-1]
 413 | 	pminub xmm0,xmm2
 414 | 	pmaxub xmm1,xmm2
 415 | 	movdqu xmm2,[rdi+rcx]
 416 | 	pminub xmm0,xmm2
 417 | 	pmaxub xmm1,xmm2
 418 | 	movdqu xmm2,[rdi+rcx+1]
 419 | 	pminub xmm0,xmm2
 420 | 	pmaxub xmm1,xmm2
 421 | 	psubusb xmm0,xmm3
 422 | 	paddusb xmm1,xmm3
 423 | 	movdqa [rbx+rcx],xmm0
 424 | 	movdqa [rdx+rcx],xmm1
 425 | 	
 426 | 	add rcx,r12
 427 | 	cmp rcx,r10
 428 | 	jl xloop
 429 | 	
 430 | 	add rsi,r8
 431 | 	add rax,r8
 432 | 	add rdi,r8
 433 | 	add rbx,r9
 434 | 	add rdx,r9	
 435 | 	dec r11d
 436 | 	jnz yloop
 437 | 	
 438 | 	pop r12
 439 | 	pop rdi
 440 | 	pop rsi
 441 | 	pop rbx
 442 | 	pop rbp
 443 | 
 444 | 	ret
 445 | 	
 446 | MinMax_SSE2 endp
 447 | 
 448 | 
 449 | 
 450 | ;checkOscillation5_SSE2 proc p2p:dword,p1p:dword,s1p:dword,n1p:dword,n2p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
 451 | ; p2p = rcx
 452 | ; p1p = rdx
 453 | ; s1p = r8
 454 | ; n1p = r9
 455 | 
 456 | checkOscillation5_SSE2 proc public frame
 457 | 
 458 | 	n2p equ qword ptr [rbp+48]
 459 | 	dstp equ qword ptr [rbp+56]
 460 | 	stride equ dword ptr [rbp+64]
 461 | 	width_ equ dword ptr [rbp+72]
 462 | 	height equ dword ptr [rbp+80]
 463 | 	thresh equ dword ptr [rbp+88]
 464 | 	
 465 | 	push rbp
 466 | .pushreg rbp
 467 | 	mov rbp,rsp
 468 | 	push rbx
 469 | .pushreg rbx
 470 | 	push rsi
 471 | .pushreg rsi
 472 | 	push rdi
 473 | .pushreg rdi
 474 | 	push r12
 475 | .pushreg r12
 476 | 	sub rsp,64
 477 | .allocstack 64
 478 | 	movdqu oword ptr[rsp],xmm6
 479 | .savexmm128 xmm6,0
 480 | 	movdqu oword ptr[rsp+16],xmm7
 481 | .savexmm128 xmm7,16
 482 | 	movdqu oword ptr[rsp+32],xmm8
 483 | .savexmm128 xmm8,32
 484 | 	movdqu oword ptr[rsp+48],xmm9
 485 | .savexmm128 xmm9,48
 486 | .endprolog
 487 | 	
 488 | 	mov rax,rcx ; p2p
 489 | 	mov rbx,rdx ; p1p
 490 | 	mov rdx,r8 ; s1p
 491 | 	mov rdi,r9 ; n1p
 492 | 	mov rsi,n2p ; n2p
 493 | 	mov r8,dstp
 494 | 	movsxd r9,stride
 495 | 	mov r10d,width_
 496 | 	mov r11d,height
 497 | 	mov r12,16
 498 | 	
 499 | 	pxor xmm6,xmm6
 500 | 	
 501 | 	; trick:
 502 | 	; x<thresh ==> x<=(thresh-1) ==> x-(thresh-1)<=0 ==> sub_sat(x,thresh-1)==0
 503 | 	; pcmpeqb(psubusb(x,thresh-1),zero): 0xFF where x<thresh
 504 | 	dec thresh
 505 | 	movd xmm7,thresh
 506 | 	punpcklbw xmm7,xmm7
 507 | 	punpcklwd xmm7,xmm7
 508 | 	punpckldq xmm7,xmm7
 509 | 	punpcklqdq xmm7,xmm7
 510 | 	
 511 |   ; all 01 (onesByte)
 512 | 	pcmpeqb xmm9,xmm9
 513 | 	psrlw xmm9,15
 514 | 	movdqa xmm8,xmm9
 515 | 	psllw xmm8,8
 516 | 	por xmm9,xmm8
 517 | 
 518 | yloop:
 519 | 	xor rcx,rcx
 520 | xloop:
 521 | 
 522 | ; const int min31 = min3(p2p[x], s1p[x], n2p[x]);
 523 | ; const int max31 = max3(p2p[x], s1p[x], n2p[x]);
 524 | ; const int min22 = min(p1p[x], n1p[x]);
 525 | ; const int max22 = max(p1p[x], n1p[x]);
 526 | 
 527 | 	movdqa xmm0,[rax+rcx] ; p2p
 528 | 	movdqa xmm2,[rbx+rcx] ; p1p
 529 | 	movdqa xmm1,xmm0
 530 | 	movdqa xmm3,xmm2
 531 | 	movdqa xmm8,[rdx+rcx] ; s1p
 532 | 	pminub xmm0,xmm8 ; min(p2p, s1p)
 533 | 	pmaxub xmm1,xmm8 ; max(p2p, s1p)
 534 | 	movdqa xmm8,[rdi+rcx] ; n1p
 535 | 	pminub xmm2,xmm8 ; min22 = min(p1p, n1p)
 536 | 	pmaxub xmm3,xmm8 ; max22 = max(p1p, n1p)
 537 | 	movdqa xmm8,[rsi+rcx] ; n2p
 538 | 	pminub xmm0,xmm8 ; min31 = min(p2p, s1p, n2p)
 539 | 	pmaxub xmm1,xmm8 ; max31 = max(p2p, s1p, n2p)
 540 | 
 541 | ; if (((min31 > max22) || max22 == 0 || (max31 < min22) || max31 == 0) &&
 542 | ;   max31 - min31 < thresh && max22 - min22 < thresh)
 543 | ; No check for (max22 == 0) or (max31 == 0), like in C, sub_sat handles automatically
 544 | 
 545 | 	movdqa xmm4,xmm3 ; max22
 546 | 	movdqa xmm5,xmm1 ; max31
 547 | 	psubusb xmm4,xmm2 ; max22-min22
 548 | 	psubusb xmm5,xmm0 ; max31-min31
 549 |   ; minus (thresh-1)
 550 | 	psubusb xmm4,xmm7 ; max22-min22 - (thresh-1)
 551 | 	psubusb xmm5,xmm7 ; max31-min31 - (thresh-1)
 552 | 
 553 | 	; minus 1
 554 | 	psubusb xmm2,xmm9 ; min22-1
 555 | 	psubusb xmm0,xmm9 ; min31-1
 556 | 
 557 | 	psubusb xmm1,xmm2 ; max31 - (min22-1)
 558 | 	psubusb xmm3,xmm0 ; max22 - (min31-1)
 559 | 
 560 | 	pcmpeqb xmm1,xmm6
 561 | 	pcmpeqb xmm3,xmm6
 562 | 	pcmpeqb xmm4,xmm6
 563 | 	pcmpeqb xmm5,xmm6
 564 | 	por xmm1,xmm3
 565 | 	pand xmm4,xmm5
 566 | 	pand xmm1,xmm4
 567 | 	movdqa [r8+rcx],xmm1
 568 | 
 569 | 	add rcx,r12
 570 | 	cmp rcx,r10
 571 | 	jl xloop
 572 | 
 573 | 	add rax,r9
 574 | 	add rbx,r9
 575 | 	add rdx,r9
 576 | 	add rdi,r9
 577 | 	add rsi,r9
 578 | 	add r8,r9
 579 | 	dec r11d
 580 | 	jnz yloop
 581 | 	
 582 | 	movdqu xmm9,oword ptr[rsp+48]
 583 | 	movdqu xmm8,oword ptr[rsp+32]
 584 | 	movdqu xmm7,oword ptr[rsp+16]
 585 | 	movdqu xmm6,oword ptr[rsp]
 586 | 	add rsp,64
 587 | 	pop r12
 588 | 	pop rdi
 589 | 	pop rsi
 590 | 	pop rbx
 591 | 	pop rbp
 592 | 
 593 | 	ret
 594 | 	
 595 | checkOscillation5_SSE2 endp
 596 | 
 597 | 
 598 | 
 599 | ;calcAverages_SSE2 proc s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
 600 | ; s1p = rcx
 601 | ; s2p = rdx
 602 | ; dstp = r8
 603 | ; stride = r9d
 604 | 
 605 | calcAverages_SSE2 proc public frame
 606 | 	
 607 | 	width_ equ dword ptr [rbp+48]
 608 | 	height equ dword ptr [rbp+56]
 609 | 
 610 | 	push rbp
 611 | .pushreg rbp
 612 | 	mov rbp,rsp
 613 | 	push rbx
 614 | .pushreg rbx
 615 | 	push rsi
 616 | .pushreg rsi
 617 | 	push rdi
 618 | .pushreg rdi
 619 | .endprolog
 620 | 
 621 | 	mov rax,rcx
 622 | 	mov rbx,rdx
 623 | 	mov rdx,r8
 624 | 	movsxd r8,r9d
 625 | 	xor rdi,rdi
 626 | 	mov edi,height
 627 | 	xor rsi,rsi
 628 | 	mov esi,width_
 629 | 	mov r10,16
 630 | 
 631 | yloop:
 632 | 	xor rcx,rcx
 633 | xloop:
 634 | 	movdqa xmm0,[rax+rcx]
 635 | 	pavgb xmm0,[rbx+rcx]
 636 | 	movdqa [rdx+rcx],xmm0
 637 | 
 638 | 	add rcx,r10
 639 | 	cmp rcx,rsi
 640 | 	jl xloop
 641 | 
 642 | 	add rax,r8
 643 | 	add rbx,r8
 644 | 	add rdx,r8
 645 | 	dec edi
 646 | 	jnz yloop
 647 | 
 648 | 	pop rdi
 649 | 	pop rsi
 650 | 	pop rbx
 651 | 	pop rbp
 652 | 
 653 | 	ret
 654 | 	
 655 | calcAverages_SSE2 endp
 656 | 
 657 | 
 658 | 
 659 | ;checkAvgOscCorrelation_SSE2 proc s1p:dword,s2p:dword,s3p:dword,s4p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword
 660 | ; s1p = rcx
 661 | ; s2p = rdx
 662 | ; s3p = r8
 663 | ; s4p = r9
 664 | 
 665 | checkAvgOscCorrelation_SSE2 proc public frame
 666 | 
 667 | 	dstp equ qword ptr [rbp+48]
 668 | 	stride equ dword ptr [rbp+56]
 669 | 	width_ equ dword ptr [rbp+64]
 670 | 	height equ dword ptr [rbp+72]
 671 | 	thresh equ dword ptr [rbp+80]
 672 | 
 673 | 	push rbp
 674 | .pushreg rbp
 675 | 	mov rbp,rsp
 676 | 	push rbx
 677 | .pushreg rbx
 678 | 	push rsi
 679 | .pushreg rsi
 680 | 	push rdi
 681 | .pushreg rdi
 682 | .endprolog
 683 | 
 684 | 	mov rax,rcx
 685 | 	mov rbx,rdx
 686 | 	mov rdx,r8
 687 | 	mov rdi,r9
 688 | 	mov rsi,dstp
 689 | 	movsxd r8,stride
 690 | 	xor r9,r9
 691 | 	mov r9d,width_
 692 | 	mov r10d,height
 693 | 	mov r11,16
 694 | 
 695 | 	dec thresh
 696 | 	movd xmm2, thresh
 697 | 	punpcklbw xmm2, xmm2
 698 | 	punpcklwd xmm2, xmm2
 699 | 	punpckldq xmm2, xmm2
 700 | 	punpcklqdq xmm2, xmm2
 701 | 	
 702 | 	pxor xmm3,xmm3
 703 | 
 704 | yloop:
 705 | 	xor rcx,rcx
 706 | xloop:
 707 | 	movdqa xmm5,[rax+rcx]
 708 | 	movdqa xmm0,xmm5
 709 | 	movdqa xmm1,xmm5
 710 | 	movdqa xmm5,[rbx+rcx]
 711 | 	pminub xmm0,xmm5
 712 | 	pmaxub xmm1,xmm5
 713 | 	movdqa xmm5,[rdx+rcx]
 714 | 	pminub xmm0,xmm5
 715 | 	pmaxub xmm1,xmm5
 716 | 	movdqa xmm5,[rdi+rcx]
 717 | 	pminub xmm0,xmm5
 718 | 	pmaxub xmm1,xmm5
 719 | 	psubusb xmm1,xmm0
 720 | 	movdqa xmm4,[rsi+rcx]
 721 | 	psubusb xmm1,xmm2
 722 | 	pcmpeqb xmm1,xmm3
 723 | 	pand xmm1,xmm4
 724 | 	movdqa [rsi+rcx],xmm1
 725 | 
 726 | 	add rcx,r11
 727 | 	cmp rcx,r9
 728 | 	jl xloop
 729 | 
 730 | 	add rax,r8
 731 | 	add rbx,r8
 732 | 	add rdx,r8
 733 | 	add rdi,r8
 734 | 	add rsi,r8
 735 | 	dec r10d
 736 | 	jnz yloop
 737 | 
 738 | 	pop rdi
 739 | 	pop rsi
 740 | 	pop rbx
 741 | 	pop rbp
 742 | 
 743 | 	ret
 744 | 	
 745 | checkAvgOscCorrelation_SSE2 endp
 746 | 
 747 | 
 748 | 
 749 | ;or3Masks_SSE2 proc s1p:dword,s2p:dword,s3p:dword,dstp:dword,stride:dword,width_:dword,height:dword
 750 | ; s1p = rcx
 751 | ; s2p = rdx
 752 | ; s3p = r8
 753 | ; dstp = r9
 754 | 
 755 | or3Masks_SSE2 proc public frame
 756 | 	
 757 | 	stride equ dword ptr [rbp+48]
 758 | 	width_ equ dword ptr [rbp+56]
 759 | 	height equ dword ptr [rbp+64]
 760 | 
 761 | 	push rbp
 762 | .pushreg rbp
 763 | 	mov rbp,rsp
 764 | 	push rbx
 765 | .pushreg rbx
 766 | 	push rsi
 767 | .pushreg rsi
 768 | 	push rdi
 769 | .pushreg rdi
 770 | .endprolog
 771 | 
 772 | 	mov rax,rcx
 773 | 	mov rbx,rdx
 774 | 	mov rdx,r8
 775 | 	mov rdi,r9
 776 | 	movsxd r8,stride
 777 | 	xor rsi,rsi
 778 | 	mov esi,width_
 779 | 	xor r9,r9
 780 | 	mov r9d,height
 781 | 	mov r10,16
 782 | 
 783 | yloop:
 784 | 	xor rcx,rcx
 785 | xloop:
 786 | 	movdqa xmm0,[rax+rcx]
 787 | 	por xmm0,[rbx+rcx]
 788 | 	por xmm0,[rdx+rcx]
 789 | 	movdqa [rdi+rcx],xmm0
 790 | 
 791 | 	add rcx,r10
 792 | 	cmp rcx,rsi
 793 | 	jl xloop
 794 | 
 795 | 	add rax,r8
 796 | 	add rbx,r8
 797 | 	add rdx,r8
 798 | 	add rdi,r8
 799 | 	dec r9d
 800 | 	jnz yloop
 801 | 
 802 | 	pop rdi
 803 | 	pop rsi
 804 | 	pop rbx
 805 | 	pop rbp
 806 | 
 807 | 	ret
 808 | 	
 809 | or3Masks_SSE2 endp
 810 | 
 811 | 
 812 | 
 813 | ;orAndMasks_SSE2 proc s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
 814 | ; s1p = rcx
 815 | ; s2p = rdx
 816 | ; dstp = r8
 817 | ; stride = r9d
 818 | 
 819 | orAndMasks_SSE2 proc public frame
 820 | 
 821 | 	width_ equ dword ptr [rbp+48]
 822 | 	height equ dword ptr [rbp+56]
 823 | 
 824 | 	push rbp
 825 | .pushreg rbp
 826 | 	mov rbp,rsp
 827 | 	push rbx
 828 | .pushreg rbx
 829 | 	push rsi
 830 | .pushreg rsi
 831 | 	push rdi
 832 | .pushreg rdi
 833 | .endprolog
 834 | 
 835 | 	mov rax,rcx
 836 | 	mov rbx,rdx
 837 | 	mov rdx,r8
 838 | 	movsxd r8,r9d
 839 | 	xor rdi,rdi
 840 | 	mov edi,width_
 841 | 	xor rsi,rsi
 842 | 	mov esi,height
 843 | 	mov r10,16
 844 | 
 845 | yloop:
 846 | 	xor rcx,rcx
 847 | xloop:
 848 | 	movdqa xmm0,[rax+rcx]
 849 | 	movdqa xmm1,[rdx+rcx]
 850 | 	pand xmm0,[rbx+rcx]
 851 | 	por xmm1,xmm0
 852 | 	movdqa [rdx+rcx],xmm1
 853 | 	add rcx,16
 854 | 	cmp rcx,rdi
 855 | 	jl xloop
 856 | 
 857 | 	add rax,r8
 858 | 	add rbx,r8
 859 | 	add rdx,r8
 860 | 	dec esi
 861 | 	jnz yloop
 862 | 
 863 | 	pop rdi
 864 | 	pop rsi
 865 | 	pop rbx
 866 | 	pop rbp
 867 | 
 868 | 	ret
 869 | 	
 870 | orAndMasks_SSE2 endp
 871 | 
 872 | 
 873 | 
 874 | ;andMasks_SSE2 proc s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword
 875 | ; s1p = rcx
 876 | ; s2p = rdx
 877 | ; dstp = r8
 878 | ; stride = r9d
 879 | 
 880 | andMasks_SSE2 proc public frame
 881 | 
 882 | 	width_ equ dword ptr [rbp+48]
 883 | 	height equ dword ptr [rbp+56]
 884 | 
 885 | 	push rbp
 886 | .pushreg rbp
 887 | 	mov rbp,rsp
 888 | 	push rbx
 889 | .pushreg rbx
 890 | 	push rsi
 891 | .pushreg rsi
 892 | 	push rdi
 893 | .pushreg rdi
 894 | .endprolog
 895 | 	
 896 | 	mov rax,rcx
 897 | 	mov rbx,rdx
 898 | 	mov rdx,r8
 899 | 	movsxd r8,r9d
 900 | 	xor rdi,rdi
 901 | 	mov edi,width_
 902 | 	xor rsi,rsi
 903 | 	mov esi,height
 904 | 	mov r10,16
 905 | 
 906 | yloop:
 907 | 	xor rcx,rcx
 908 | xloop:
 909 | 	movdqa xmm0,[rax+rcx]
 910 | 	pand xmm0,[rbx+rcx]
 911 | 	movdqa [rdx+rcx],xmm0
 912 | 	add rcx,r10
 913 | 	cmp rcx,rdi
 914 | 	jl xloop
 915 | 
 916 | 	add rax,r8
 917 | 	add rbx,r8
 918 | 	add rdx,r8
 919 | 	dec esi
 920 | 	jnz yloop
 921 | 
 922 | 	pop rdi
 923 | 	pop rsi
 924 | 	pop rbx
 925 | 	pop rbp
 926 | 
 927 | 	ret
 928 | 	
 929 | andMasks_SSE2 endp
 930 | 
 931 | 
 932 | 
 933 | ;checkSceneChange_SSE2 proc s1p:dword,s2p:dword,stride:dword,width_:dword,height:dword,diffp:dword
 934 | ; s1p = rcx
 935 | ; s2p = rdx
 936 | ; stride = r8d
 937 | ; width_ = r9d
 938 | 
 939 | checkSceneChange_SSE2 proc public frame
 940 | 
 941 | 	height equ dword ptr [rbp+48]
 942 | 	diffp equ qword ptr [rbp+56]
 943 | 
 944 | 	push rbp
 945 | .pushreg rbp
 946 | 	mov rbp,rsp
 947 | 	push rsi
 948 | .pushreg rsi
 949 | .endprolog
 950 | 	
 951 | 	mov rax,rcx
 952 | 	mov rsi,rdx
 953 | 	movsxd r8,r8d
 954 | 	xor rdx,rdx
 955 | 	mov edx,r9d
 956 | 	xor r9,r9
 957 | 	mov r9d,height
 958 | 	mov r10,16
 959 | 	
 960 | 	pxor xmm1,xmm1
 961 | 	
 962 | yloop:
 963 | 	xor rcx,rcx
 964 | xloop:
 965 | 	movdqa xmm0,[rax+rcx]
 966 | 	psadbw xmm0,[rsi+rcx]
 967 | 	paddq xmm1,xmm0
 968 | 
 969 | 	add rcx,r10
 970 | 	cmp rcx,rdx
 971 | 	jl xloop
 972 | 
 973 | 	add rax,r8
 974 | 	add rsi,r8
 975 | 	dec r9d
 976 | 	jnz yloop
 977 | 
 978 | 
 979 | 	movdqa xmm2,xmm1
 980 | 	psrldq xmm1,8
 981 | 	paddq xmm2,xmm1
 982 | 
 983 |   mov rax,diffp
 984 | 	movd QWORD PTR [rax],xmm2
 985 | 
 986 | 	pop rsi
 987 | 	pop rbp
 988 | 
 989 | 	ret
 990 | 	
 991 | checkSceneChange_SSE2 endp
 992 | 
 993 | 
 994 | 
 995 | ;VerticalBlur3_SSE2 proc srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
 996 | ; srcp = rcx
 997 | ; dstp = rdx
 998 | ; stride = r8d
 999 | ; width_ = r9d
1000 | 
1001 | VerticalBlur3_SSE2 proc public frame
1002 | 	
1003 | 	height equ dword ptr [rbp+48]
1004 | 
1005 | 	push rbp
1006 | .pushreg rbp
1007 | 	mov rbp,rsp
1008 | 	push rbx
1009 | .pushreg rbx
1010 | 	push rsi
1011 | .pushreg rsi
1012 | 	push rdi
1013 | .pushreg rdi
1014 | 
1015 | 	sub rsp,32
1016 | .allocstack 32
1017 | 	movdqu oword ptr[rsp],xmm6
1018 | .savexmm128 xmm6,0
1019 | 	movdqu oword ptr[rsp+16],xmm7
1020 | .savexmm128 xmm7,16
1021 | .endprolog
1022 | 
1023 | 	mov rax,rcx
1024 | 	mov rbx,rdx
1025 | 	movsxd r8,r8d
1026 | 	mov rsi,rax
1027 | 	mov rdi,rax
1028 | 	sub rsi,r8
1029 | 	add rdi,r8
1030 | 	xor rdx,rdx
1031 | 	mov edx,r9d
1032 | 	xor r9,r9
1033 | 	mov r9d,height
1034 | 	mov r10,2
1035 | 	mov r11,16
1036 | 
1037 | 	; 0x0002,for rounding
1038 | 	pcmpeqb xmm6,xmm6
1039 | 	psrlw xmm6,15
1040 | 	psllw xmm6,1
1041 | 
1042 | 	pxor xmm7,xmm7
1043 | 
1044 | 	xor rcx,rcx
1045 | 
1046 | toploop:
1047 | 	movdqa xmm0,[rax+rcx]
1048 | 	pavgb xmm0,[rdi+rcx]
1049 | 	movdqa [rbx+rcx],xmm0
1050 | 	add rcx,r11
1051 | 	cmp rcx,rdx
1052 | 	jl toploop
1053 | 
1054 | 	add rsi,r8
1055 | 	add rax,r8
1056 | 	add rdi,r8
1057 | 	add rbx,r8
1058 | 
1059 | 	sub r9d,r10d ; the main loop processes 2 lines fewer than the height
1060 | 
1061 | yloop:
1062 | 	xor rcx,rcx
1063 | xloop:
1064 | 	movdqa xmm0,[rsi+rcx]
1065 | 	movdqa xmm1,[rax+rcx]
1066 | 	movdqa xmm2,[rdi+rcx]
1067 | 	movdqa xmm3,xmm0
1068 | 	movdqa xmm4,xmm1
1069 | 	movdqa xmm5,xmm2
1070 | 	punpcklbw xmm0,xmm7
1071 | 	punpcklbw xmm1,xmm7
1072 | 	punpcklbw xmm2,xmm7
1073 | 	punpckhbw xmm3,xmm7
1074 | 	punpckhbw xmm4,xmm7
1075 | 	punpckhbw xmm5,xmm7
1076 | 
1077 | 	; add bottom to top
1078 | 	paddw xmm0,xmm2
1079 | 	paddw xmm3,xmm5
1080 | 
1081 | 	; multiply center by 2
1082 | 	psllw xmm1,1
1083 | 	psllw xmm4,1
1084 | 
1085 | 	; add center to sum
1086 | 	paddw xmm0,xmm1
1087 | 	paddw xmm3,xmm4
1088 | 
1089 | 	; add 2 to sum
1090 | 	paddw xmm0,xmm6
1091 | 	paddw xmm3,xmm6
1092 | 
1093 | 	; divide by 4
1094 | 	psrlw xmm0,2
1095 | 	psrlw xmm3,2
1096 | 	packuswb xmm0,xmm3
1097 | 	movdqa [rbx+rcx],xmm0
1098 | 
1099 | 	add rcx,r11
1100 | 	cmp rcx,rdx
1101 | 	jl xloop
1102 | 
1103 | 	add rsi,r8
1104 | 	add rax,r8
1105 | 	add rdi,r8
1106 | 	add rbx,r8
1107 | 	dec r9d
1108 | 	jnz yloop
1109 | 
1110 | 	xor rcx,rcx
1111 | 
1112 | bottomloop:
1113 | 	movdqa xmm0,[rsi+rcx]
1114 | 	pavgb xmm0,[rax+rcx]
1115 | 	movdqa [rbx+rcx],xmm0
1116 | 	add rcx,r11
1117 | 	cmp rcx,rdx
1118 | 	jl bottomloop
1119 | 
1120 | 	movdqu xmm7,oword ptr[rsp+16]
1121 | 	movdqu xmm6,oword ptr[rsp]
1122 | 	add rsp,32
1123 | 
1124 | 	pop rdi
1125 | 	pop rsi
1126 | 	pop rbx
1127 | 	pop rbp
1128 | 
1129 | 	ret
1130 | 	
1131 | VerticalBlur3_SSE2 endp
1132 | 
1133 | 
1134 | 
1135 | ;HorizontalBlur3_SSE2 proc srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
1136 | ; srcp = rcx
1137 | ; dstp = rdx
1138 | ; stride = r8d
1139 | ; width_ = r9d
1140 | 
1141 | HorizontalBlur3_SSE2 proc public frame
1142 | 
1143 | 	height equ dword ptr [rbp+48]
1144 | 
1145 | 	push rbp
1146 | .pushreg rbp
1147 | 	mov rbp,rsp
1148 | 	sub rsp,32
1149 | .allocstack 32
1150 | 	movdqu oword ptr[rsp],xmm6
1151 | .savexmm128 xmm6,0
1152 | 	movdqu oword ptr[rsp+16],xmm7
1153 | .savexmm128 xmm7,16
1154 | .endprolog
1155 | 	
1156 | 	mov rax,rcx
1157 | 	movsxd r8,r8d
1158 | 	mov r10d,height
1159 | 	mov r11,16
1160 | 
1161 | 	pxor xmm7,xmm7
1162 | 
1163 | 	; 0x0002,for rounding
1164 | 	pcmpeqb xmm6,xmm6
1165 | 	psrlw xmm6,15
1166 | 	psllw xmm6,1
1167 | 
1168 | yloop:
1169 | 	xor rcx,rcx
1170 | xloop:
1171 | 	movdqu xmm0,[rax+rcx-1]
1172 | 	movdqa xmm1,[rax+rcx]
1173 | 	movdqu xmm2,[rax+rcx+1]
1174 | 	movdqa xmm3,xmm0
1175 | 	movdqa xmm4,xmm1
1176 | 	movdqa xmm5,xmm2
1177 | 
1178 | 	punpcklbw xmm0,xmm7
1179 | 	punpcklbw xmm1,xmm7
1180 | 	punpcklbw xmm2,xmm7
1181 | 	punpckhbw xmm3,xmm7
1182 | 	punpckhbw xmm4,xmm7
1183 | 	punpckhbw xmm5,xmm7
1184 | 
1185 | 	; center * 2
1186 | 	psllw xmm1,1
1187 | 	psllw xmm4,1
1188 | 	paddw xmm1,xmm0
1189 | 	paddw xmm4,xmm3
1190 | 	paddw xmm1,xmm2
1191 | 	paddw xmm4,xmm5
1192 | 
1193 | 	; add 2 to sum
1194 | 	paddw xmm1,xmm6
1195 | 	paddw xmm4,xmm6
1196 | 
1197 | 	; divide by 4
1198 | 	psrlw xmm1,2
1199 | 	psrlw xmm4,2
1200 | 	packuswb xmm1,xmm4
1201 | 	movdqa [rdx+rcx],xmm1
1202 | 
1203 | 	add rcx,r11
1204 | 	cmp rcx,r9
1205 | 	jl xloop
1206 | 
1207 | 	add rax,r8
1208 | 	add rdx,r8
1209 | 	dec r10d
1210 | 	jnz yloop
1211 | 
1212 | 	movdqu xmm7,oword ptr[rsp+16]
1213 | 	movdqu xmm6,oword ptr[rsp]
1214 | 	add rsp,32
1215 | 	pop rbp
1216 | 
1217 | 	ret
1218 | 	
1219 | HorizontalBlur3_SSE2 endp
1220 | 
1221 | 
1222 | 
1223 | ;HorizontalBlur6_SSE2 proc srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword
1224 | ; srcp = rcx
1225 | ; dstp = rdx
1226 | ; stride = r8d
1227 | ; width_ = r9d
1228 | 
1229 | HorizontalBlur6_SSE2 proc public frame
1230 | 	
1231 | 	height equ dword ptr [rbp+48]
1232 | 
1233 | 	push rbp
1234 | .pushreg rbp
1235 | 	mov rbp,rsp
1236 | 	sub rsp,112
1237 | .allocstack 112
1238 | 	movdqu oword ptr[rsp],xmm6
1239 | .savexmm128 xmm6,0
1240 | 	movdqu oword ptr[rsp+16],xmm7
1241 | .savexmm128 xmm7,16
1242 | 	movdqu oword ptr[rsp+32],xmm8
1243 | .savexmm128 xmm8,32
1244 | 	movdqu oword ptr[rsp+48],xmm9
1245 | .savexmm128 xmm9,48
1246 | 	movdqu oword ptr[rsp+64],xmm10
1247 | .savexmm128 xmm10,64
1248 | 	movdqu oword ptr[rsp+80],xmm11
1249 | .savexmm128 xmm11,80
1250 | 	movdqu oword ptr[rsp+96],xmm12
1251 | .savexmm128 xmm12,96
1252 | .endprolog
1253 | 
1254 | 	mov rax,rcx
1255 | 	movsxd r8,r8d
1256 | 	mov r10d,height
1257 | 	mov r11,16
1258 | 
1259 | 	pxor xmm12,xmm12
1260 | 
1261 |     ; 0x0006
1262 |     pcmpeqb xmm11,xmm11
1263 |     psrlw xmm11,14
1264 |     psllw xmm11,1
1265 | 
1266 |     ; 0x0008
1267 |     pcmpeqb xmm10,xmm10
1268 |     psrlw xmm10,15
1269 |     psllw xmm10,3
1270 | 
1271 | yloop:
1272 | 	xor rcx,rcx
1273 | xloop:
1274 | 	movdqu xmm0,[rax+rcx-2]
1275 | 	movdqu xmm1,[rax+rcx-1]
1276 | 	movdqa xmm2,[rax+rcx]
1277 | 	movdqu xmm3,[rax+rcx+1]
1278 | 	movdqu xmm4,[rax+rcx+2]
1279 | 	movdqa xmm5,xmm0
1280 | 	movdqa xmm6,xmm1
1281 | 	movdqa xmm7,xmm2
1282 | 	movdqa xmm8,xmm3
1283 | 	movdqa xmm9,xmm4
1284 | 	punpcklbw xmm0,xmm12
1285 | 	punpcklbw xmm1,xmm12
1286 | 	punpcklbw xmm2,xmm12
1287 | 	punpcklbw xmm3,xmm12
1288 | 	punpcklbw xmm4,xmm12
1289 | 	punpckhbw xmm5,xmm12
1290 | 	punpckhbw xmm6,xmm12
1291 | 	punpckhbw xmm7,xmm12
1292 | 	punpckhbw xmm8,xmm12
1293 | 	punpckhbw xmm9,xmm12
1294 | 
1295 | 	; srcp[x-2] + srcp[x+2]
1296 | 	paddw xmm0,xmm4
1297 | 	paddw xmm5,xmm9
1298 | 
1299 | 	; srcp[x-1] + srcp[x+1]
1300 | 	paddw xmm1,xmm3
1301 | 	paddw xmm6,xmm8
1302 | 
1303 | 	; (srcp[x-1 + srcp[x+1])*4
1304 | 	psllw xmm1,2
1305 | 	psllw xmm6,2
1306 | 
1307 | 	; (srcp[x-1] + srcp[x+1])*4 + srcp[x-2] + srcp[x+2]
1308 | 	paddw xmm0,xmm1
1309 | 	paddw xmm5,xmm6
1310 | 
1311 | 	; srcp[x] * 6
1312 | 	pmullw xmm2,xmm11
1313 | 	pmullw xmm7,xmm11
1314 | 	paddw xmm0,xmm2
1315 | 	paddw xmm5,xmm7
1316 | 
1317 | 	; add 8
1318 | 	paddw xmm0,xmm10
1319 | 	paddw xmm5,xmm10
1320 | 
1321 | 	; divide by 16
1322 | 	psrlw xmm0,4
1323 | 	psrlw xmm5,4
1324 | 	packuswb xmm0,xmm5
1325 | 	movdqa [rdx+rcx],xmm0
1326 | 
1327 | 	add rcx,r11
1328 | 	cmp rcx,r9
1329 | 	jl xloop
1330 | 
1331 | 	add rax,r8
1332 | 	add rdx,r8
1333 | 	dec r10d
1334 | 	jnz yloop
1335 | 
1336 | 	movdqu xmm12,oword ptr[rsp+96]
1337 | 	movdqu xmm11,oword ptr[rsp+80]
1338 | 	movdqu xmm10,oword ptr[rsp+64]
1339 | 	movdqu xmm9,oword ptr[rsp+48]
1340 | 	movdqu xmm8,oword ptr[rsp+32]
1341 | 	movdqu xmm7,oword ptr[rsp+16]
1342 | 	movdqu xmm6,oword ptr[rsp]
1343 | 	add rsp,112
1344 | 	pop rbp
1345 | 
1346 | 	ret
1347 | 	
1348 | HorizontalBlur6_SSE2 endp
1349 | 
1350 | 
1351 | 
1352 | end


--------------------------------------------------------------------------------
/TComb/TComb_core.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | **                    TComb v2.x for Avisynth 2.6 and Avisynth+
  3 | **
  4 | **   TComb is a temporal comb filter (it reduces cross-luminance (rainbowing)
  5 | **   and cross-chrominance (dot crawl) artifacts in static areas of the picture).
  6 | **   It will ONLY work with NTSC material, and WILL NOT work with telecined material
  7 | **   where the rainbowing/dotcrawl was introduced prior to the telecine process!
  8 | **   It must be used before ivtc or deinterlace.
  9 | **
 10 | **   Copyright (C) 2021 Ferenc Pintér
 11 | **
 12 | **   Copyright (C) 2015 Shane Panke
 13 | **
 14 | **   Copyright (C) 2005-2006 Kevin Stone
 15 | **
 16 | **   This program is free software; you can redistribute it and/or modify
 17 | **   it under the terms of the GNU General Public License as published by
 18 | **   the Free Software Foundation; either version 2 of the License, or
 19 | **   (at your option) any later version.
 20 | **
 21 | **   This program is distributed in the hope that it will be useful,
 22 | **   but WITHOUT ANY WARRANTY; without even the implied warranty of
 23 | **   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 24 | **   GNU General Public License for more details.
 25 | **
 26 | **   You should have received a copy of the GNU General Public License
 27 | **   along with this program; if not, write to the Free Software
 28 | **   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 29 | */
 30 | 
 31 | #include "TComb.h"
 32 | #include <stdint.h>
 33 | 
 34 | #ifdef INTEL_INTRINSICS
 35 | #include <xmmintrin.h>
 36 | #include <emmintrin.h>
 37 | #endif
 38 | #include <algorithm>
 39 | 
 40 | template<typename pixel_t>
 41 | void checkSceneChangePlanar_1_c(const pixel_t* srcp, const pixel_t* nxtp,
 42 |   int height, int width, int src_pitch, int nxt_pitch, uint64_t& diff)
 43 | {
 44 |   for (int y = 0; y < height; ++y)
 45 |   {
 46 |     uint32_t rowdiff = 0;
 47 |     for (int x = 0; x < width; x += 4)
 48 |     {
 49 |       rowdiff += abs(srcp[x + 0] - nxtp[x + 0]);
 50 |       rowdiff += abs(srcp[x + 1] - nxtp[x + 1]);
 51 |       rowdiff += abs(srcp[x + 2] - nxtp[x + 2]);
 52 |       rowdiff += abs(srcp[x + 3] - nxtp[x + 3]);
 53 |     }
 54 |     diff += rowdiff;
 55 |     srcp += src_pitch;
 56 |     nxtp += nxt_pitch;
 57 |   }
 58 | }
 59 | 
 60 | // instantiate
 61 | template void checkSceneChangePlanar_1_c<uint8_t>(const uint8_t* srcp, const uint8_t* nxtp,
 62 |   int height, int width, int src_pitch, int nxt_pitch, uint64_t& diff);
 63 | 
 64 | #ifdef INTEL_INTRINSICS
 65 | void checkSceneChangePlanar_1_SSE2_simd(const uint8_t* prvp, const uint8_t* srcp,
 66 |   int height, int width, int prv_pitch, int src_pitch, uint64_t& diffp)
 67 | {
 68 |   __m128i sum = _mm_setzero_si128();
 69 |   while (height--) {
 70 |     for (int x = 0; x < width; x += 16)
 71 |     {
 72 |       __m128i src1 = _mm_load_si128(reinterpret_cast<const __m128i*>(prvp + x));
 73 |       __m128i src2 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + x));
 74 |       __m128i sad = _mm_sad_epu8(src1, src2);
 75 |       sum = _mm_add_epi32(sum, sad);
 76 |     }
 77 |     prvp += prv_pitch;
 78 |     srcp += src_pitch;
 79 |   }
 80 |   __m128i res = _mm_add_epi32(sum, _mm_srli_si128(sum, 8));
 81 |   diffp = _mm_cvtsi128_si32(res);
 82 | }
 83 | #endif
 84 | 
 85 | #ifdef INTEL_INTRINSICS
 86 | void andMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
 87 | {
 88 |   for (int y = 0; y < height; ++y)
 89 |   {
 90 |     for (int x = 0; x < width; x += 16)
 91 |     {
 92 |       __m128i src1 = _mm_load_si128(reinterpret_cast<const __m128i*>(s1p + x));
 93 |       __m128i src2 = _mm_load_si128(reinterpret_cast<const __m128i*>(s2p + x));
 94 |       __m128i result = _mm_and_si128(src1, src2);
 95 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
 96 |     }
 97 | 
 98 |     s1p += stride;
 99 |     s2p += stride;
100 |     dstp += stride;
101 |   }
102 | }
103 | #endif
104 | 
105 | void andMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
106 | {
107 |   for (int y = 0; y < height; ++y)
108 |   {
109 |     for (int x = 0; x < width; ++x)
110 |       dstp[x] = (s1p[x] & s2p[x]);
111 | 
112 |     s1p += stride;
113 |     s2p += stride;
114 |     dstp += stride;
115 |   }
116 | }
117 | 
118 | #ifdef INTEL_INTRINSICS
119 | void orAndMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
120 | {
121 |   for (int y = 0; y < height; ++y)
122 |   {
123 |     for (int x = 0; x < width; x += 16)
124 |     {
125 |       __m128i src1 = _mm_load_si128(reinterpret_cast<const __m128i*>(s1p + x));
126 |       __m128i src2 = _mm_load_si128(reinterpret_cast<const __m128i*>(s2p + x));
127 |       __m128i dst = _mm_load_si128(reinterpret_cast<const __m128i*>(dstp + x));
128 |       __m128i result = _mm_or_si128(dst, _mm_and_si128(src1, src2));
129 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
130 |     }
131 | 
132 |     s1p += stride;
133 |     s2p += stride;
134 |     dstp += stride;
135 |   }
136 | }
137 | #endif
138 | 
139 | void orAndMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
140 | {
141 |   for (int y = 0; y < height; ++y)
142 |   {
143 |     for (int x = 0; x < width; ++x)
144 |       dstp[x] |= (s1p[x] & s2p[x]);
145 | 
146 |     s1p += stride;
147 |     s2p += stride;
148 |     dstp += stride;
149 |   }
150 | }
151 | 
152 | #ifdef INTEL_INTRINSICS
153 | void or3Masks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, uint8_t* dstp, int stride, int width, int height)
154 | {
155 |   for (int y = 0; y < height; ++y)
156 |   {
157 |     for (int x = 0; x < width; x += 16)
158 |     {
159 |       __m128i src1 = _mm_load_si128(reinterpret_cast<const __m128i*>(s1p + x));
160 |       __m128i src2 = _mm_load_si128(reinterpret_cast<const __m128i*>(s2p + x));
161 |       __m128i src3 = _mm_load_si128(reinterpret_cast<const __m128i*>(s3p + x));
162 |       __m128i result = _mm_or_si128(src1, _mm_or_si128(src2, src3));
163 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
164 |     }
165 | 
166 |     s1p += stride;
167 |     s2p += stride;
168 |     s3p += stride;
169 |     dstp += stride;
170 |   }
171 | }
172 | #endif
173 | 
174 | void or3Masks_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, uint8_t* dstp, int stride, int width, int height)
175 | {
176 |   for (int y = 0; y < height; ++y)
177 |   {
178 |     for (int x = 0; x < width; ++x)
179 |       dstp[x] = (s1p[x] | s2p[x] | s3p[x]);
180 | 
181 |     s1p += stride;
182 |     s2p += stride;
183 |     s3p += stride;
184 |     dstp += stride;
185 |   }
186 | }
187 | 
188 | #ifdef INTEL_INTRINSICS
189 | void calcAverages_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
190 | {
191 |   for (int y = 0; y < height; ++y)
192 |   {
193 |     for (int x = 0; x < width; x += 16)
194 |     {
195 |       __m128i src1 = _mm_load_si128(reinterpret_cast<const __m128i*>(s1p + x));
196 |       __m128i src2 = _mm_load_si128(reinterpret_cast<const __m128i*>(s2p + x));
197 |       __m128i result = _mm_avg_epu8(src1, src2);
198 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
199 |     }
200 | 
201 |     s1p += stride;
202 |     s2p += stride;
203 |     dstp += stride;
204 |   }
205 | }
206 | #endif
207 | 
208 | void calcAverages_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height)
209 | {
210 |   for (int y = 0; y < height; ++y)
211 |   {
212 |     for (int x = 0; x < width; ++x)
213 |       dstp[x] = (s1p[x] + s2p[x] + 1) >> 1;
214 | 
215 |     s1p += stride;
216 |     s2p += stride;
217 |     dstp += stride;
218 |   }
219 | }
220 | 
221 | #ifdef INTEL_INTRINSICS
222 | void MinMax_SSE2_simd(const uint8_t* srcp, uint8_t* dstpMin, uint8_t* dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh)
223 | {
224 |   const uint8_t* srcpp = srcp - src_stride;
225 |   const uint8_t* srcpn = srcp + src_stride;
226 | 
227 |   const auto threshp = _mm_set1_epi8(thresh);
228 | 
229 |   for (int y = 0; y < height; ++y)
230 |   {
231 |     for (int x = 0; x < width; x += 16)
232 |     {
233 |       __m128i srcpp_m_1 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcpp + x - 1));
234 |       __m128i srcpp_0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcpp + x));
235 |       __m128i srcpp_p_1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcpp + x + 1));
236 | 
237 |       __m128i srcp_m_1 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + x - 1));
238 |       __m128i srcp_0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcp + x));
239 |       __m128i srcp_p_1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcp + x + 1));
240 | 
241 |       __m128i srcpn_m_1 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcpn + x - 1));
242 |       __m128i srcpn_0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcpn + x));
243 |       __m128i srcpn_p_1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcpn + x + 1));
244 | 
245 |       auto tmpmin = _mm_min_epu8(_mm_min_epu8(_mm_min_epu8(_mm_min_epu8(srcpp_m_1, srcpp_0),
246 |         _mm_min_epu8(srcpp_p_1, srcp_m_1)),
247 |         _mm_min_epu8(_mm_min_epu8(srcp_0, srcp_p_1),
248 |           _mm_min_epu8(srcpn_m_1, srcpn_0))), srcpn_p_1);
249 | 
250 |       auto min = _mm_subs_epu8(tmpmin, threshp);
251 | 
252 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstpMin + x), min);
253 | 
254 |       auto tmpmax = _mm_max_epu8(_mm_max_epu8(_mm_max_epu8(_mm_max_epu8(srcpp_m_1, srcpp_0),
255 |         _mm_max_epu8(srcpp_p_1, srcp_m_1)),
256 |         _mm_max_epu8(_mm_max_epu8(srcp_0, srcp_p_1),
257 |           _mm_max_epu8(srcpn_m_1, srcpn_0))), srcpn_p_1);
258 | 
259 |       auto max = _mm_adds_epu8(tmpmax, threshp); // future warning: 10-14 bitss
260 | 
261 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstpMax + x), max);
262 |     }
263 | 
264 |     srcpp += src_stride;
265 |     srcp += src_stride;
266 |     srcpn += src_stride;
267 |     dstpMin += dmin_stride;
268 |     dstpMax += dmin_stride;
269 |   }
270 | }
271 | #endif
272 | 
273 | void MinMax_c(const uint8_t* srcp, uint8_t* dstpMin, uint8_t* dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh)
274 | {
275 |   const uint8_t* srcpp = srcp - src_stride;
276 |   const uint8_t* srcpn = srcp + src_stride;
277 | 
278 |   for (int y = 0; y < height; ++y)
279 |   {
280 |     for (int x = 0; x < width; ++x)
281 |     {
282 |       dstpMin[x] = std::max(std::min(std::min(std::min(std::min(srcpp[x - 1], srcpp[x]),
283 |         std::min(srcpp[x + 1], srcp[x - 1])),
284 |         std::min(std::min(srcp[x], srcp[x + 1]),
285 |           std::min(srcpn[x - 1], srcpn[x]))), srcpn[x + 1]) - thresh, 0);
286 |       dstpMax[x] = std::min(std::max(std::max(std::max(std::max(srcpp[x - 1], srcpp[x]),
287 |         std::max(srcpp[x + 1], srcp[x - 1])),
288 |         std::max(std::max(srcp[x], srcp[x + 1]),
289 |           std::max(srcpn[x - 1], srcpn[x]))), srcpn[x + 1]) + thresh, 255);
290 |     }
291 | 
292 |     srcpp += src_stride;
293 |     srcp += src_stride;
294 |     srcpn += src_stride;
295 |     dstpMin += dmin_stride;
296 |     dstpMax += dmin_stride;
297 |   }
298 | }
299 | 
300 | #ifdef INTEL_INTRINSICS
301 | void absDiff_SSE2_simd(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height)
302 | {
303 |   for (int y = 0; y < height; ++y)
304 |   {
305 |     for (int x = 0; x < width; x += 16) {
306 |       auto src1 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp1 + x));
307 |       auto src2 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp2 + x));
308 |       auto diff12 = _mm_subs_epu8(src1, src2);
309 |       auto diff21 = _mm_subs_epu8(src2, src1);
310 |       auto diff = _mm_or_si128(diff12, diff21);
311 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), diff);
312 |     }
313 | 
314 |     srcp1 += stride;
315 |     srcp2 += stride;
316 |     dstp += stride;
317 |   }
318 | }
319 | #endif
320 | 
321 | void absDiff_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height)
322 | {
323 |   for (int y = 0; y < height; ++y)
324 |   {
325 |     for (int x = 0; x < width; ++x)
326 |       dstp[x] = abs(srcp1[x] - srcp2[x]);
327 | 
328 |     srcp1 += stride;
329 |     srcp2 += stride;
330 |     dstp += stride;
331 |   }
332 | }
333 | 
334 | #ifdef INTEL_INTRINSICS
335 | void buildFinalMask_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* m1p, uint8_t* dstp, int stride, int width, int height, int thresh)
336 | {
337 |   auto thresh_minus1 = _mm_set1_epi8(thresh-1);
338 |   auto zero = _mm_setzero_si128();
339 | 
340 |   for (int y = 0; y < height; ++y)
341 |   {
342 |     for (int x = 0; x < width; x += 16)
343 |     {
344 |       auto src1 = _mm_load_si128(reinterpret_cast<const __m128i*>(s1p + x));
345 |       auto src2 = _mm_load_si128(reinterpret_cast<const __m128i*>(s2p + x));
346 |       auto diff12 = _mm_subs_epu8(src1, src2);
347 |       auto diff21 = _mm_subs_epu8(src2, src1);
348 |       auto diff = _mm_or_si128(diff12, diff21);
349 |       auto addedsthresh = _mm_subs_epu8(diff, thresh_minus1);
350 |       auto cmpresult = _mm_cmpeq_epi8(addedsthresh, zero);
351 |       auto m1 = _mm_load_si128(reinterpret_cast<const __m128i*>(m1p + x));
352 |       auto tmp = _mm_and_si128(cmpresult, m1);
353 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), tmp);
354 | 
355 |       /*
356 |       if (m1p[x] && abs(s1p[x] - s2p[x]) < thresh)
357 |         dstp[x] = 0xFF;
358 |       else
359 |         dstp[x] = 0;
360 |       */
361 |     }
362 | 
363 |     m1p += stride;
364 |     s1p += stride;
365 |     s2p += stride;
366 |     dstp += stride;
367 |   }
368 | }
369 | #endif
370 | 
371 | void buildFinalMask_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* m1p, uint8_t* dstp, int stride, int width, int height, int thresh)
372 | {
373 |   for (int y = 0; y < height; ++y)
374 |   {
375 |     for (int x = 0; x < width; ++x)
376 |     {
377 |       if (m1p[x] && abs(s1p[x] - s2p[x]) < thresh)
378 |         dstp[x] = 0xFF;
379 |       else
380 |         dstp[x] = 0;
381 |     }
382 | 
383 |     m1p += stride;
384 |     s1p += stride;
385 |     s2p += stride;
386 |     dstp += stride;
387 |   }
388 | }
389 | 
390 | #ifdef INTEL_INTRINSICS
391 | void checkOscillation5_SSE2_simd(const uint8_t* p2p, const uint8_t* p1p, const uint8_t* s1p, const uint8_t* n1p, const uint8_t* n2p, uint8_t* dstp, int stride, int width, int height, int thresh)
392 | {
393 |   int threshm1 = std::min(std::max(thresh - 1, 0), 255);
394 |   auto thresh_minus1 = _mm_set1_epi8(threshm1);
395 |   auto one = _mm_set1_epi8(1);
396 |   auto zero = _mm_setzero_si128();
397 | 
398 |   for (int y = 0; y < height; ++y)
399 |   {
400 |     for (int x = 0; x < width; x += 16)
401 |     {
402 |       // trick: x < thresh ==> x <= (thresh - 1) ==> x - (thresh - 1) <= 0 ==> sub_sat(x, thresh - 1) == 0
403 |       // pcmpeqb(psubusb(x, thresh - 1), zero) : 0xFF where x < thresh
404 | 
405 |       __m128i src_p2p = _mm_load_si128(reinterpret_cast<const __m128i*>(p2p + x));
406 |       __m128i src_s1p = _mm_load_si128(reinterpret_cast<const __m128i*>(s1p + x));
407 |       __m128i src_n2p = _mm_load_si128(reinterpret_cast<const __m128i*>(n2p + x));
408 |       __m128i src_p1p = _mm_load_si128(reinterpret_cast<const __m128i*>(p1p + x));
409 |       __m128i src_n1p = _mm_load_si128(reinterpret_cast<const __m128i*>(n1p + x));
410 | 
411 |       auto min31 = _mm_min_epu8(_mm_min_epu8(src_p2p, src_s1p), src_n2p);
412 |       auto max31 = _mm_max_epu8(_mm_max_epu8(src_p2p, src_s1p), src_n2p);
413 |       auto min22 = _mm_min_epu8(src_p1p, src_n1p);
414 |       auto max22 = _mm_max_epu8(src_p1p, src_n1p);
415 | 
416 |       auto cmp1 = _mm_cmpeq_epi8(_mm_subs_epu8(max22, _mm_subs_epu8(min31, one)), zero);
417 |       auto cmp2 = _mm_cmpeq_epi8(_mm_subs_epu8(max31, _mm_subs_epu8(min22, one)), zero);
418 |       // No check for (max22 == 0) or (max31 == 0), like in C, sub_sat handles automatically
419 |       auto maxmindiff31 = _mm_subs_epu8(max31, min31);
420 |       auto cmp3 = _mm_cmpeq_epi8(_mm_subs_epu8(maxmindiff31, thresh_minus1), zero);
421 |       auto maxmindiff22 = _mm_subs_epu8(max22, min22);
422 |       auto cmp4 = _mm_cmpeq_epi8(_mm_subs_epu8(maxmindiff22, thresh_minus1), zero);
423 | 
424 |       auto result = _mm_and_si128(_mm_or_si128(cmp1, cmp2), _mm_and_si128(cmp3, cmp4));
425 |       /*
426 |       if (((max22 < min31) || max22 == 0 || (max31 < min22) || max31 == 0) &&
427 |         max31 - min31 < thresh && max22 - min22 < thresh)
428 |         dstp[x] = 0xFF;
429 |       else dstp[x] = 0;
430 |       */
431 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
432 |     }
433 | 
434 |     p2p += stride;
435 |     p1p += stride;
436 |     s1p += stride;
437 |     n1p += stride;
438 |     n2p += stride;
439 |     dstp += stride;
440 |   }
441 | }
442 | #endif
443 | 
444 | void checkOscillation5_c(const uint8_t* p2p, const uint8_t* p1p, const uint8_t* s1p, const uint8_t* n1p, const uint8_t* n2p, uint8_t* dstp, int stride, int width, int height, int thresh)
445 | {
446 |   for (int y = 0; y < height; ++y)
447 |   {
448 |     for (int x = 0; x < width; ++x)
449 |     {
450 |       const int min31 = min3(p2p[x], s1p[x], n2p[x]);
451 |       const int max31 = max3(p2p[x], s1p[x], n2p[x]);
452 |       const int min22 = std::min(p1p[x], n1p[x]);
453 |       const int max22 = std::max(p1p[x], n1p[x]);
454 |       if (((max22 < min31) || max22 == 0 || (max31 < min22) || max31 == 0) &&
455 |         max31 - min31 < thresh && max22 - min22 < thresh)
456 |         dstp[x] = 0xFF;
457 |       else dstp[x] = 0;
458 |     }
459 | 
460 |     p2p += stride;
461 |     p1p += stride;
462 |     s1p += stride;
463 |     n1p += stride;
464 |     n2p += stride;
465 |     dstp += stride;
466 |   }
467 | }
468 | 
469 | #ifdef INTEL_INTRINSICS
470 | void absDiffAndMinMaskThresh_SSE2_simd(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height, int thresh)
471 | {
472 |   int threshm1 = std::min(std::max(thresh - 1, 0), 255);
473 |   auto thresh_minus1 = _mm_set1_epi8(threshm1);
474 |   auto zero = _mm_setzero_si128();
475 | 
476 |   for (int y = 0; y < height; ++y)
477 |   {
478 |     for (int x = 0; x < width; x += 16)
479 |     {
480 |       __m128i src1 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp1 + x));
481 |       __m128i src2 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp2 + x));
482 |       __m128i dst = _mm_load_si128(reinterpret_cast<const __m128i*>(dstp + x));
483 |       auto diff12 = _mm_subs_epu8(src1, src2);
484 |       auto diff21 = _mm_subs_epu8(src2, src1);
485 |       auto diff = _mm_or_si128(diff12, diff21);
486 | 
487 |       auto tmp_min = _mm_min_epu8(diff, dst);
488 |       auto result = _mm_cmpeq_epi8(_mm_subs_epu8(tmp_min, thresh_minus1), zero);
489 |       /*
490 |       if (diff < dstp[x]) dstp[x] = diff; // min
491 |       if (dstp[x] < thresh)
492 |         dstp[x] = 0xFF;
493 |       else
494 |         dstp[x] = 0;
495 |       */
496 | 
497 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
498 | 
499 |     }
500 | 
501 |     srcp1 += stride;
502 |     srcp2 += stride;
503 |     dstp += stride;
504 |   }
505 | }
506 | #endif
507 | 
508 | void absDiffAndMinMaskThresh_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height, int thresh)
509 | {
510 |   for (int y = 0; y < height; ++y)
511 |   {
512 |     for (int x = 0; x < width; ++x)
513 |     {
514 |       const int diff = abs(srcp1[x] - srcp2[x]);
515 |       if (diff < dstp[x])
516 |         dstp[x] = diff;
517 |       if (dstp[x] < thresh)
518 |         dstp[x] = 0xFF;
519 |       else
520 |         dstp[x] = 0;
521 |     }
522 | 
523 |     srcp1 += stride;
524 |     srcp2 += stride;
525 |     dstp += stride;
526 |   }
527 | }
528 | 
529 | #ifdef INTEL_INTRINSICS
530 | void absDiffAndMinMask_SSE2_simd(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height)
531 | {
532 |   for (int y = 0; y < height; ++y)
533 |   {
534 |     for (int x = 0; x < width; x += 16)
535 |     {
536 |       __m128i src1 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp1 + x));
537 |       __m128i src2 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp2 + x));
538 |       __m128i dst = _mm_load_si128(reinterpret_cast<const __m128i*>(dstp + x));
539 |       auto diff12 = _mm_subs_epu8(src1, src2);
540 |       auto diff21 = _mm_subs_epu8(src2, src1);
541 |       auto diff = _mm_or_si128(diff12, diff21);
542 | 
543 |       auto tmp_min = _mm_min_epu8(diff, dst);
544 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), tmp_min);
545 | 
546 |       /*
547 |       const int diff = abs(srcp1[x] - srcp2[x]);
548 |       if (diff < dstp[x])
549 |         dstp[x] = diff;
550 |       */
551 |     }
552 | 
553 |     srcp1 += stride;
554 |     srcp2 += stride;
555 |     dstp += stride;
556 |   }
557 | }
558 | #endif
559 | 
560 | void absDiffAndMinMask_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height)
561 | {
562 |   for (int y = 0; y < height; ++y)
563 |   {
564 |     for (int x = 0; x < width; ++x)
565 |     {
566 |       const int diff = abs(srcp1[x] - srcp2[x]);
567 |       if (diff < dstp[x])
568 |         dstp[x] = diff;
569 |     }
570 | 
571 |     srcp1 += stride;
572 |     srcp2 += stride;
573 |     dstp += stride;
574 |   }
575 | }
576 | 
577 | #ifdef INTEL_INTRINSICS
578 | void checkAvgOscCorrelation_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, const uint8_t* s4p, uint8_t* dstp, int stride, int width, int height, int thresh)
579 | {
580 |   int threshm1 = std::min(std::max(thresh - 1, 0), 255);
581 |   auto thresh_minus1 = _mm_set1_epi8(threshm1);
582 |   auto zero = _mm_setzero_si128();
583 | 
584 |   for (int y = 0; y < height; ++y)
585 |   {
586 |     for (int x = 0; x < width; x += 16)
587 |     {
588 |       __m128i s1 = _mm_load_si128(reinterpret_cast<const __m128i*>(s1p + x));
589 |       __m128i s2 = _mm_load_si128(reinterpret_cast<const __m128i*>(s2p + x));
590 |       __m128i s3 = _mm_load_si128(reinterpret_cast<const __m128i*>(s3p + x));
591 |       __m128i s4 = _mm_load_si128(reinterpret_cast<const __m128i*>(s4p + x));
592 | 
593 |       auto min = _mm_min_epu8(_mm_min_epu8(_mm_min_epu8(s1, s2), s3), s4);
594 |       auto max = _mm_max_epu8(_mm_max_epu8(_mm_max_epu8(s1, s2), s3), s4);
595 | 
596 |       auto diffmaxmin = _mm_subs_epu8(max, min);
597 |       auto cmp = _mm_cmpeq_epi8(_mm_subs_epu8(diffmaxmin, thresh_minus1), zero);
598 | 
599 |       __m128i dst = _mm_load_si128(reinterpret_cast<const __m128i*>(dstp + x));
600 |       auto result = _mm_and_si128(cmp, dst);
601 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
602 | 
603 |       /*
604 |       if (max4(s1p[x], s2p[x], s3p[x], s4p[x]) - min4(s1p[x], s2p[x], s3p[x], s4p[x]) >= thresh)
605 |         dstp[x] = 0;
606 |       that is: 
607 |       if(max-min < thresh) dstp[x] = dstp[x] else 0    (dst=dst&FF   0=dst&00)
608 |       */
609 |     }
610 | 
611 |     s1p += stride;
612 |     s2p += stride;
613 |     s3p += stride;
614 |     s4p += stride;
615 |     dstp += stride;
616 |   }
617 | }
618 | #endif
619 | 
620 | void checkAvgOscCorrelation_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, const uint8_t* s4p, uint8_t* dstp, int stride, int width, int height, int thresh)
621 | {
622 |   for (int y = 0; y < height; ++y)
623 |   {
624 |     for (int x = 0; x < width; ++x)
625 |     {
626 |       if (max4(s1p[x], s2p[x], s3p[x], s4p[x]) -
627 |         min4(s1p[x], s2p[x], s3p[x], s4p[x]) >= thresh)
628 |         dstp[x] = 0;
629 |     }
630 | 
631 |     s1p += stride;
632 |     s2p += stride;
633 |     s3p += stride;
634 |     s4p += stride;
635 |     dstp += stride;
636 |   }
637 | }
638 | 
639 | #ifdef INTEL_INTRINSICS
640 | void VerticalBlur3_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
641 | {
642 |   const uint8_t* srcpp = srcp - stride;
643 |   const uint8_t* srcpn = srcp + stride;
644 | 
645 |   auto zero = _mm_setzero_si128();
646 |   auto two = _mm_set1_epi16(2);
647 | 
648 |   // top line
649 |   for (int x = 0; x < width; x += 16) {
650 |     __m128i s1 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + x));
651 |     __m128i s2 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcpn + x));
652 |     auto avg = _mm_avg_epu8(s1, s2);
653 |     _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), avg);
654 |     // dstp[x] = (srcp[x] + srcpn[x] + 1) >> 1;
655 |   }
656 | 
657 |   srcpp += stride;
658 |   srcp += stride;
659 |   srcpn += stride;
660 |   dstp += stride;
661 | 
662 |   for (int y = 1; y < height - 1; ++y)
663 |   {
664 |     for (int x = 0; x < width; x += 16) {
665 |       __m128i p = _mm_load_si128(reinterpret_cast<const __m128i*>(srcpp + x));
666 |       __m128i s = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + x));
667 |       __m128i n = _mm_load_si128(reinterpret_cast<const __m128i*>(srcpn + x));
668 | 
669 |       auto p_lo = _mm_unpacklo_epi8(p, zero);
670 |       auto p_hi = _mm_unpackhi_epi8(p, zero);
671 |       auto s_lo = _mm_unpacklo_epi8(s, zero);
672 |       auto s_hi = _mm_unpackhi_epi8(s, zero);
673 |       auto n_lo = _mm_unpacklo_epi8(n, zero);
674 |       auto n_hi = _mm_unpackhi_epi8(n, zero);
675 |       auto res_lo = _mm_add_epi16(_mm_add_epi16(p_lo, _mm_slli_epi16(s_lo, 1)), n_lo);
676 |       auto res_hi = _mm_add_epi16(_mm_add_epi16(p_hi, _mm_slli_epi16(s_hi, 1)), n_hi);
677 |       res_lo = _mm_srli_epi16(_mm_add_epi16(res_lo, two), 2);
678 |       res_hi = _mm_srli_epi16(_mm_add_epi16(res_hi, two), 2);
679 |       auto result = _mm_packus_epi16(res_lo, res_hi);
680 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
681 |       // dstp[x] = (srcpp[x] + (srcp[x] << 1) + srcpn[x] + 2) >> 2;
682 |     }
683 | 
684 |     srcpp += stride;
685 |     srcp += stride;
686 |     srcpn += stride;
687 |     dstp += stride;
688 |   }
689 | 
690 |   // bottom
691 |   for (int x = 0; x < width; x += 16) {
692 |     __m128i s1 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcpp + x));
693 |     __m128i s2 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + x));
694 |     auto avg = _mm_avg_epu8(s1, s2);
695 |     _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), avg);
696 |     //dstp[x] = (srcpp[x] + srcp[x] + 1) >> 1;
697 |   }
698 | 
699 | }
700 | #endif
701 | 
702 | void VerticalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
703 | {
704 |   const uint8_t* srcpp = srcp - stride;
705 |   const uint8_t* srcpn = srcp + stride;
706 | 
707 |   for (int x = 0; x < width; ++x)
708 |     dstp[x] = (srcp[x] + srcpn[x] + 1) >> 1;
709 | 
710 |   srcpp += stride;
711 |   srcp += stride;
712 |   srcpn += stride;
713 |   dstp += stride;
714 | 
715 |   for (int y = 1; y < height - 1; ++y)
716 |   {
717 |     for (int x = 0; x < width; ++x)
718 |       dstp[x] = (srcpp[x] + (srcp[x] << 1) + srcpn[x] + 2) >> 2;
719 | 
720 |     srcpp += stride;
721 |     srcp += stride;
722 |     srcpn += stride;
723 |     dstp += stride;
724 |   }
725 | 
726 |   for (int x = 0; x < width; ++x)
727 |     dstp[x] = (srcpp[x] + srcp[x] + 1) >> 1;
728 | }
729 | 
730 | #ifdef INTEL_INTRINSICS
731 | // width mod 16 and srcp alignment guaranteed
732 | void HorizontalBlur3_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
733 | {
734 |   auto zero = _mm_setzero_si128();
735 |   auto two = _mm_set1_epi16(2);
736 | 
737 |   for (int y = 0; y < height; ++y)
738 |   {
739 |     for (int x = 0; x < width; x += 16)
740 |     {
741 |       __m128i p = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcp + x - 1));
742 |       __m128i s = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + x));
743 |       __m128i n = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcp + x + 1));
744 | 
745 |       auto p_lo = _mm_unpacklo_epi8(p, zero);
746 |       auto p_hi = _mm_unpackhi_epi8(p, zero);
747 |       auto s_lo = _mm_unpacklo_epi8(s, zero);
748 |       auto s_hi = _mm_unpackhi_epi8(s, zero);
749 |       auto n_lo = _mm_unpacklo_epi8(n, zero);
750 |       auto n_hi = _mm_unpackhi_epi8(n, zero);
751 |       auto res_lo = _mm_add_epi16(_mm_add_epi16(p_lo, _mm_slli_epi16(s_lo, 1)), n_lo);
752 |       auto res_hi = _mm_add_epi16(_mm_add_epi16(p_hi, _mm_slli_epi16(s_hi, 1)), n_hi);
753 |       res_lo = _mm_srli_epi16(_mm_add_epi16(res_lo, two), 2);
754 |       res_hi = _mm_srli_epi16(_mm_add_epi16(res_hi, two), 2);
755 |       auto result = _mm_packus_epi16(res_lo, res_hi);
756 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
757 |       // dstp[x] = (srcp[x - 1] + (srcp[x] << 1) + srcp[x + 1] + 2) >> 2;
758 |     }
759 | 
760 |     srcp += stride;
761 |     dstp += stride;
762 |   }
763 | 
764 | }
765 | #endif
766 | 
767 | void HorizontalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
768 | {
769 |   for (int y = 0; y < height; ++y)
770 |   {
771 |     dstp[0] = (srcp[0] + srcp[1] + 1) >> 1;
772 | 
773 |     for (int x = 1; x < width - 1; ++x)
774 |       dstp[x] = (srcp[x - 1] + (srcp[x] << 1) + srcp[x + 1] + 2) >> 2;
775 | 
776 |     dstp[width - 1] = (srcp[width - 2] + srcp[width - 1] + 1) >> 1;
777 | 
778 |     srcp += stride;
779 |     dstp += stride;
780 |   }
781 | }
782 | 
783 | #ifdef INTEL_INTRINSICS
784 | void HorizontalBlur6_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
785 | {
786 |   auto zero = _mm_setzero_si128();
787 |   auto eight = _mm_set1_epi16(8);
788 |   auto six = _mm_set1_epi16(6);
789 | 
790 |   for (int y = 0; y < height; y++)
791 |   {
792 |     for (int x = 0; x < width; x += 16) {
793 |       __m128i pp = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcp + x - 2));
794 |       __m128i p = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcp + x - 1));
795 |       __m128i s = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + x));
796 |       __m128i n = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcp + x + 1));
797 |       __m128i nn = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcp + x + 2));
798 | 
799 |       auto pp_lo = _mm_unpacklo_epi8(pp, zero);
800 |       auto pp_hi = _mm_unpackhi_epi8(pp, zero);
801 |       auto p_lo = _mm_unpacklo_epi8(p, zero);
802 |       auto p_hi = _mm_unpackhi_epi8(p, zero);
803 |       auto s_lo = _mm_unpacklo_epi8(s, zero);
804 |       auto s_hi = _mm_unpackhi_epi8(s, zero);
805 |       auto n_lo = _mm_unpacklo_epi8(n, zero);
806 |       auto n_hi = _mm_unpackhi_epi8(n, zero);
807 |       auto nn_lo = _mm_unpacklo_epi8(nn, zero);
808 |       auto nn_hi = _mm_unpackhi_epi8(nn, zero);
809 |       
810 |       auto centermulsix_lo = _mm_mullo_epi16(s_lo, six);
811 |       auto centermulsix_hi = _mm_mullo_epi16(s_hi, six);
812 |       auto res_lo = _mm_add_epi16(centermulsix_lo, _mm_add_epi16(_mm_add_epi16(pp_lo, _mm_slli_epi16(_mm_add_epi16(p_lo, n_lo), 2)), nn_lo));
813 |       auto res_hi = _mm_add_epi16(centermulsix_hi, _mm_add_epi16(_mm_add_epi16(pp_hi, _mm_slli_epi16(_mm_add_epi16(p_hi, n_hi), 2)), nn_hi));
814 | 
815 |       res_lo = _mm_srli_epi16(_mm_add_epi16(res_lo, eight), 4);
816 |       res_hi = _mm_srli_epi16(_mm_add_epi16(res_hi, eight), 4);
817 |       auto result = _mm_packus_epi16(res_lo, res_hi);
818 |       _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result);
819 |       // dstp[x] = (srcp[x - 2] + ((srcp[x - 1] + srcp[x + 1]) << 2) + srcp[x] * 6 + srcp[x + 2] + 8) >> 4;
820 |     }
821 | 
822 |     srcp += stride;
823 |     dstp += stride;
824 |   }
825 | }
826 | #endif
827 | 
828 | void HorizontalBlur6_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height)
829 | {
830 |   for (int y = 0; y < height; y++)
831 |   {
832 |     dstp[0] = (srcp[0] * 6 + (srcp[1] << 3) + (srcp[2] << 1) + 8) >> 4;
833 |     dstp[1] = (((srcp[0] + srcp[2]) << 2) + srcp[1] * 6 + (srcp[3] << 1) + 8) >> 4;
834 | 
835 |     for (int x = 2; x < width - 2; ++x)
836 |       dstp[x] = (srcp[x - 2] + ((srcp[x - 1] + srcp[x + 1]) << 2) + srcp[x] * 6 + srcp[x + 2] + 8) >> 4;
837 | 
838 |     dstp[width - 2] = ((srcp[width - 4] << 1) + ((srcp[width - 3] + srcp[width - 1]) << 2) + srcp[width - 2] * 6 + 8) >> 4;
839 |     dstp[width - 1] = ((srcp[width - 3] << 1) + (srcp[width - 2] << 3) + srcp[width - 1] * 6 + 8) >> 4;
840 | 
841 |     srcp += stride;
842 |     dstp += stride;
843 |   }
844 | }
845 | 
846 | #ifdef INTEL_INTRINSICS
847 | void andNeighborsInPlace_SSE2_simd(uint8_t* srcp, int stride, int width, int height)
848 | {
849 |   uint8_t* srcpp = srcp - stride;
850 |   uint8_t* srcpn = srcp + stride;
851 | 
852 |   for (int y = 0; y < height; y++)
853 |   {
854 |     for (int x = 0; x < width; x += 16) {
855 |       __m128i src_0 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + x));
856 |       __m128i src_p_m1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcpp + x - 1));
857 |       __m128i src_p = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcpp + x));
858 |       __m128i src_p_p1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcpp + x + 1));
859 |       __m128i src_n_m1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcpn + x - 1));
860 |       __m128i src_n = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcpn + x));
861 |       __m128i src_n_p1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(srcpn + x + 1));
862 |       auto result_p = _mm_or_si128(_mm_or_si128(src_p_m1, src_p), src_p_p1);
863 |       auto result_n = _mm_or_si128(_mm_or_si128(src_n_m1, src_n), src_n_p1);
864 |       auto result = _mm_and_si128(src_0, _mm_or_si128(result_p, result_n));
865 |       _mm_store_si128(reinterpret_cast<__m128i*>(srcp + x), result);
866 |       // srcp[x] &= (srcpp[x - 1] | srcpp[x] | srcpp[x + 1] | srcpn[x - 1] | srcpn[x] | srcpn[x + 1]);
867 |     }
868 | 
869 |     srcpp += stride;
870 |     srcp += stride;
871 |     srcpn += stride;
872 |   }
873 | }
874 | #endif
875 | 


--------------------------------------------------------------------------------
/TComb/avs/alignment.h:
--------------------------------------------------------------------------------
  1 | // Avisynth C Interface Version 0.20
  2 | // Copyright 2003 Kevin Atkinson
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // As a special exception, I give you permission to link to the
 20 | // Avisynth C interface with independent modules that communicate with
 21 | // the Avisynth C interface solely through the interfaces defined in
 22 | // avisynth_c.h, regardless of the license terms of these independent
 23 | // modules, and to copy and distribute the resulting combined work
 24 | // under terms of your choice, provided that every copy of the
 25 | // combined work is accompanied by a complete copy of the source code
 26 | // of the Avisynth C interface and Avisynth itself (with the version
 27 | // used to produce the combined work), being distributed under the
 28 | // terms of the GNU General Public License plus this exception.  An
 29 | // independent module is a module which is not derived from or based
 30 | // on Avisynth C Interface, such as 3rd-party filters, import and
 31 | // export plugins, or graphical user interfaces.
 32 | 
 33 | #ifndef AVS_ALIGNMENT_H
 34 | #define AVS_ALIGNMENT_H
 35 | 
 36 | // Functions and macros to help work with alignment requirements.
 37 | 
 38 | // Tells if a number is a power of two.
 39 | #define IS_POWER2(n) ((n) && !((n) & ((n) - 1)))
 40 | 
 41 | // Tells if the pointer "ptr" is aligned to "align" bytes.
 42 | #define IS_PTR_ALIGNED(ptr, align) (((uintptr_t)ptr & ((uintptr_t)(align-1))) == 0)
 43 | 
 44 | // Rounds up the number "n" to the next greater multiple of "align"
 45 | #define ALIGN_NUMBER(n, align) (((n) + (align)-1) & (~((align)-1)))
 46 | 
 47 | // Rounds up the pointer address "ptr" to the next greater multiple of "align"
 48 | #define ALIGN_POINTER(ptr, align) (((uintptr_t)(ptr) + (align)-1) & (~(uintptr_t)((align)-1)))
 49 | 
 50 | #ifdef __cplusplus
 51 | 
 52 | #include <cassert>
 53 | #include <cstdlib>
 54 | #include <cstdint>
 55 | #include "config.h"
 56 | 
 57 | #if defined(MSVC) && _MSC_VER<1400
 58 |     // needed for VS2013, otherwise C++11 'alignas' works
 59 |     #define avs_alignas(x) __declspec(align(x))
 60 | #else
 61 |     // assumes C++11 support
 62 |     #define avs_alignas(x) alignas(x)
 63 | #endif
 64 | 
 65 | template<typename T>
 66 | static bool IsPtrAligned(T* ptr, size_t align)
 67 | {
 68 |   assert(IS_POWER2(align));
 69 |   return (bool)IS_PTR_ALIGNED(ptr, align);
 70 | }
 71 | 
 72 | template<typename T>
 73 | static T AlignNumber(T n, T align)
 74 | {
 75 |   assert(IS_POWER2(align));
 76 |   return ALIGN_NUMBER(n, align);
 77 | }
 78 | 
 79 | template<typename T>
 80 | static T* AlignPointer(T* ptr, size_t align)
 81 | {
 82 |   assert(IS_POWER2(align));
 83 |   return (T*)ALIGN_POINTER(ptr, align);
 84 | }
 85 | 
 86 | extern "C"
 87 | {
 88 | #else
 89 | #include <stdlib.h>
 90 | #endif  // __cplusplus
 91 | 
 92 | // Returns a new buffer that is at least the size "nbytes".
 93 | // The buffer will be aligned to "align" bytes.
 94 | // Returns NULL on error. On successful allocation,
 95 | // the returned buffer must be freed using "avs_free".
 96 | inline void* avs_malloc(size_t nbytes, size_t align)
 97 | {
 98 |   if (!IS_POWER2(align))
 99 |     return NULL;
100 | 
101 |   size_t offset = sizeof(void*) + align - 1;
102 | 
103 |   void *orig = malloc(nbytes + offset);
104 |   if (orig == NULL)
105 |    return NULL;
106 | 
107 |   void **aligned = (void**)(((uintptr_t)orig + (uintptr_t)offset) & (~(uintptr_t)(align-1)));
108 |   aligned[-1] = orig;
109 |   return aligned;
110 | }
111 | 
112 | // Buffers allocated using "avs_malloc" must be freed
113 | // using "avs_free" instead of "free".
114 | inline void avs_free(void *ptr)
115 | {
116 |   // Mirroring free()'s semantic requires us to accept NULLs
117 |   if (ptr == NULL)
118 |     return;
119 | 
120 |   free(((void**)ptr)[-1]);
121 | }
122 | 
123 | #ifdef __cplusplus
124 | } // extern "C"
125 | 
126 | // The point of these undef's is to force using the template functions
127 | // if we are in C++ mode. For C, the user can rely only on the macros.
128 | #undef IS_PTR_ALIGNED
129 | #undef ALIGN_NUMBER
130 | #undef ALIGN_POINTER
131 | 
132 | #endif  // __cplusplus
133 | 
134 | #endif  //AVS_ALIGNMENT_H
135 | 


--------------------------------------------------------------------------------
/TComb/avs/capi.h:
--------------------------------------------------------------------------------
  1 | // Avisynth C Interface Version 0.20
  2 | // Copyright 2003 Kevin Atkinson
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // As a special exception, I give you permission to link to the
 20 | // Avisynth C interface with independent modules that communicate with
 21 | // the Avisynth C interface solely through the interfaces defined in
 22 | // avisynth_c.h, regardless of the license terms of these independent
 23 | // modules, and to copy and distribute the resulting combined work
 24 | // under terms of your choice, provided that every copy of the
 25 | // combined work is accompanied by a complete copy of the source code
 26 | // of the Avisynth C interface and Avisynth itself (with the version
 27 | // used to produce the combined work), being distributed under the
 28 | // terms of the GNU General Public License plus this exception.  An
 29 | // independent module is a module which is not derived from or based
 30 | // on Avisynth C Interface, such as 3rd-party filters, import and
 31 | // export plugins, or graphical user interfaces.
 32 | 
 33 | #ifndef AVS_CAPI_H
 34 | #define AVS_CAPI_H
 35 | 
 36 | #include "config.h"
 37 | 
 38 | #ifdef AVS_POSIX
 39 | // this is also defined in avs/posix.h
 40 | #ifndef AVS_HAIKU
 41 | #define __declspec(x)
 42 | #endif
 43 | #endif
 44 | 
 45 | #ifdef __cplusplus
 46 | #  define EXTERN_C extern "C"
 47 | #else
 48 | #  define EXTERN_C
 49 | #endif
 50 | 
 51 | #ifdef AVS_WINDOWS
 52 | #ifdef BUILDING_AVSCORE
 53 | #  if defined(GCC) && defined(X86_32)
 54 | #    define AVSC_CC
 55 | #  else // MSVC builds and 64-bit GCC
 56 | #    ifndef AVSC_USE_STDCALL
 57 | #      define AVSC_CC __cdecl
 58 | #    else
 59 | #      define AVSC_CC __stdcall
 60 | #    endif
 61 | #  endif
 62 | #else // needed for programs that talk to AviSynth+
 63 | #  ifndef AVSC_WIN32_GCC32 // see comment below
 64 | #    ifndef AVSC_USE_STDCALL
 65 | #      define AVSC_CC __cdecl
 66 | #    else
 67 | #      define AVSC_CC __stdcall
 68 | #    endif
 69 | #  else
 70 | #    define AVSC_CC
 71 | #  endif
 72 | #endif
 73 | #  else
 74 | #    define AVSC_CC
 75 | #endif
 76 | 
 77 | // On 64-bit Windows, there's only one calling convention,
 78 | // so there is no difference between MSVC and GCC. On 32-bit,
 79 | // this isn't true. The convention that GCC needs to use to
 80 | // even build AviSynth+ as 32-bit makes anything that uses
 81 | // it incompatible with 32-bit MSVC builds of AviSynth+.
 82 | // The AVSC_WIN32_GCC32 define is meant to provide a user
 83 | // switchable way to make builds of FFmpeg to test 32-bit
 84 | // GCC builds of AviSynth+ without having to screw around
 85 | // with alternate headers, while still default to the usual
 86 | // situation of using 32-bit MSVC builds of AviSynth+.
 87 | 
 88 | // Hopefully, this situation will eventually be resolved
 89 | // and a broadly compatible solution will arise so the
 90 | // same 32-bit FFmpeg build can handle either MSVC or GCC
 91 | // builds of AviSynth+.
 92 | 
 93 | #define AVSC_INLINE static __inline
 94 | 
 95 | #ifdef BUILDING_AVSCORE
 96 | #ifdef AVS_WINDOWS
 97 | #  define AVSC_EXPORT __declspec(dllexport)
 98 | #  define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name
 99 | #else
100 | #  define AVSC_EXPORT EXTERN_C
101 | #  define AVSC_API(ret, name) EXTERN_C ret AVSC_CC name
102 | #endif
103 | #else
104 | #  define AVSC_EXPORT EXTERN_C __declspec(dllexport)
105 | #  ifndef AVSC_NO_DECLSPEC
106 | #    define AVSC_API(ret, name) EXTERN_C __declspec(dllimport) ret AVSC_CC name
107 | #  else
108 | #    define AVSC_API(ret, name) typedef ret (AVSC_CC *name##_func)
109 | #  endif
110 | #endif
111 | 
112 | #endif //AVS_CAPI_H
113 | 


--------------------------------------------------------------------------------
/TComb/avs/config.h:
--------------------------------------------------------------------------------
  1 | // Avisynth C Interface Version 0.20
  2 | // Copyright 2003 Kevin Atkinson
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // As a special exception, I give you permission to link to the
 20 | // Avisynth C interface with independent modules that communicate with
 21 | // the Avisynth C interface solely through the interfaces defined in
 22 | // avisynth_c.h, regardless of the license terms of these independent
 23 | // modules, and to copy and distribute the resulting combined work
 24 | // under terms of your choice, provided that every copy of the
 25 | // combined work is accompanied by a complete copy of the source code
 26 | // of the Avisynth C interface and Avisynth itself (with the version
 27 | // used to produce the combined work), being distributed under the
 28 | // terms of the GNU General Public License plus this exception.  An
 29 | // independent module is a module which is not derived from or based
 30 | // on Avisynth C Interface, such as 3rd-party filters, import and
 31 | // export plugins, or graphical user interfaces.
 32 | 
 33 | #ifndef AVS_CONFIG_H
 34 | #define AVS_CONFIG_H
 35 | 
 36 | // Undefine this to get cdecl calling convention
 37 | #define AVSC_USE_STDCALL 1
 38 | 
 39 | // NOTE TO PLUGIN AUTHORS:
 40 | // Because FRAME_ALIGN can be substantially higher than the alignment
 41 | // a plugin actually needs, plugins should not use FRAME_ALIGN to check for
 42 | // alignment. They should always request the exact alignment value they need.
 43 | // This is to make sure that plugins work over the widest range of AviSynth
 44 | // builds possible.
 45 | #define FRAME_ALIGN 64
 46 | 
 47 | #if   defined(_M_AMD64) || defined(__x86_64)
 48 | #   define X86_64
 49 | #elif defined(_M_IX86) || defined(__i386__)
 50 | #   define X86_32
 51 | // VS2017 introduced _M_ARM64
 52 | #elif defined(_M_ARM64) || defined(__aarch64__)
 53 | #   define ARM64
 54 | #elif defined(_M_ARM) || defined(__arm__)
 55 | #   define ARM32
 56 | #elif defined(__PPC64__)
 57 | #   define PPC64
 58 | #elif defined(_M_PPC) || defined(__PPC__) || defined(__POWERPC__)
 59 | #   define PPC32
 60 | #else
 61 | #   error Unsupported CPU architecture.
 62 | #endif
 63 | 
 64 | //            VC++  LLVM-Clang-cl   MinGW-Gnu
 65 | // MSVC        x          x
 66 | // MSVC_PURE   x
 67 | // CLANG                  x
 68 | // GCC                                  x
 69 | 
 70 | #if defined(__clang__)
 71 | // Check clang first. clang-cl also defines __MSC_VER
 72 | // We set MSVC because they are mostly compatible
 73 | #   define CLANG
 74 | #if defined(_MSC_VER)
 75 | #   define MSVC
 76 | #   define AVS_FORCEINLINE __attribute__((always_inline))
 77 | #else
 78 | #   define AVS_FORCEINLINE __attribute__((always_inline)) inline
 79 | #endif
 80 | #elif   defined(_MSC_VER)
 81 | #   define MSVC
 82 | #   define MSVC_PURE
 83 | #   define AVS_FORCEINLINE __forceinline
 84 | #elif defined(__GNUC__)
 85 | #   define GCC
 86 | #   define AVS_FORCEINLINE __attribute__((always_inline)) inline
 87 | #else
 88 | #   error Unsupported compiler.
 89 | #   define AVS_FORCEINLINE inline
 90 | #   undef __forceinline
 91 | #   define __forceinline inline
 92 | #endif
 93 | 
 94 | #if defined(_WIN32)
 95 | #   define AVS_WINDOWS
 96 | #elif defined(__linux__)
 97 | #   define AVS_LINUX
 98 | #   define AVS_POSIX
 99 | #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
100 | #   define AVS_BSD
101 | #   define AVS_POSIX
102 | #elif defined(__APPLE__)
103 | #   define AVS_MACOS
104 | #   define AVS_POSIX
105 | #elif defined(__HAIKU__)
106 | #   define AVS_HAIKU
107 | #   define AVS_POSIX
108 | #else
109 | #   error Operating system unsupported.
110 | #endif
111 | 
112 | // useful warnings disabler macros for supported compilers
113 | 
114 | #if defined(_MSC_VER)
115 | #define DISABLE_WARNING_PUSH           __pragma(warning( push ))
116 | #define DISABLE_WARNING_POP            __pragma(warning( pop ))
117 | #define DISABLE_WARNING(warningNumber) __pragma(warning( disable : warningNumber ))
118 | 
119 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE      DISABLE_WARNING(4101)
120 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION            DISABLE_WARNING(4505)
121 | // other warnings you want to deactivate...
122 | 
123 | #elif defined(__GNUC__) || defined(__clang__)
124 | #define DO_PRAGMA(X) _Pragma(#X)
125 | #define DISABLE_WARNING_PUSH           DO_PRAGMA(GCC diagnostic push)
126 | #define DISABLE_WARNING_POP            DO_PRAGMA(GCC diagnostic pop)
127 | #define DISABLE_WARNING(warningName)   DO_PRAGMA(GCC diagnostic ignored #warningName)
128 | 
129 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE      DISABLE_WARNING(-Wunused-variable)
130 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION            DISABLE_WARNING(-Wunused-function)
131 | // other warnings you want to deactivate...
132 | 
133 | #else
134 | #define DISABLE_WARNING_PUSH
135 | #define DISABLE_WARNING_POP
136 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE
137 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION
138 | // other warnings you want to deactivate...
139 | 
140 | #endif
141 | 
142 | #if defined(AVS_POSIX)
143 | #define NEW_AVSVALUE
144 | #else
145 | #define NEW_AVSVALUE
146 | #endif
147 | 
148 | #if defined(AVS_WINDOWS)
149 | // Windows XP does not have proper initialization for
150 | // thread local variables.
151 | // Use workaround instead __declspec(thread)
152 | #define XP_TLS
153 | #endif
154 | 
155 | #endif //AVS_CONFIG_H
156 | 


--------------------------------------------------------------------------------
/TComb/avs/cpuid.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_CPUID_H
33 | #define AVSCORE_CPUID_H
34 | 
35 | // For GetCPUFlags.  These are backwards-compatible with those in VirtualDub.
36 | // ending with SSE4_2
37 | // For emulation see https://software.intel.com/en-us/articles/intel-software-development-emulator
38 | enum {
39 |                     /* oldest CPU to support extension */
40 |   CPUF_FORCE        =  0x01,   //  N/A
41 |   CPUF_FPU          =  0x02,   //  386/486DX
42 |   CPUF_MMX          =  0x04,   //  P55C, K6, PII
43 |   CPUF_INTEGER_SSE  =  0x08,   //  PIII, Athlon
44 |   CPUF_SSE          =  0x10,   //  PIII, Athlon XP/MP
45 |   CPUF_SSE2         =  0x20,   //  PIV, K8
46 |   CPUF_3DNOW        =  0x40,   //  K6-2
47 |   CPUF_3DNOW_EXT    =  0x80,   //  Athlon
48 |   CPUF_X86_64       =  0xA0,   //  Hammer (note: equiv. to 3DNow + SSE2, which
49 |                                //          only Hammer will have anyway)
50 |   CPUF_SSE3         = 0x100,   //  PIV+, K8 Venice
51 |   CPUF_SSSE3        = 0x200,   //  Core 2
52 |   CPUF_SSE4         = 0x400,
53 |   CPUF_SSE4_1       = 0x400,   //  Penryn, Wolfdale, Yorkfield
54 |   CPUF_AVX          = 0x800,   //  Sandy Bridge, Bulldozer
55 |   CPUF_SSE4_2       = 0x1000,  //  Nehalem
56 |   // AVS+
57 |   CPUF_AVX2         = 0x2000,   //  Haswell
58 |   CPUF_FMA3         = 0x4000,
59 |   CPUF_F16C         = 0x8000,
60 |   CPUF_MOVBE        = 0x10000,  // Big Endian move
61 |   CPUF_POPCNT       = 0x20000,
62 |   CPUF_AES          = 0x40000,
63 |   CPUF_FMA4         = 0x80000,
64 | 
65 |   CPUF_AVX512F      = 0x100000,  // AVX-512 Foundation.
66 |   CPUF_AVX512DQ     = 0x200000,  // AVX-512 DQ (Double/Quad granular) Instructions
67 |   CPUF_AVX512PF     = 0x400000,  // AVX-512 Prefetch
68 |   CPUF_AVX512ER     = 0x800000,  // AVX-512 Exponential and Reciprocal
69 |   CPUF_AVX512CD     = 0x1000000, // AVX-512 Conflict Detection
70 |   CPUF_AVX512BW     = 0x2000000, // AVX-512 BW (Byte/Word granular) Instructions
71 |   CPUF_AVX512VL     = 0x4000000, // AVX-512 VL (128/256 Vector Length) Extensions
72 |   CPUF_AVX512IFMA   = 0x8000000, // AVX-512 IFMA integer 52 bit
73 |   CPUF_AVX512VBMI   = 0x10000000,// AVX-512 VBMI
74 | };
75 | 
76 | #ifdef BUILDING_AVSCORE
77 | int GetCPUFlags();
78 | void SetMaxCPU(int new_flags);
79 | #endif
80 | 
81 | #endif // AVSCORE_CPUID_H
82 | 


--------------------------------------------------------------------------------
/TComb/avs/filesystem.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Snippet copied from filesystem/README.md
 4 | 
 5 | #if defined(__cplusplus) && __cplusplus >= 201703L && defined(__has_include)
 6 | #if __has_include(<filesystem>)
 7 | #define GHC_USE_STD_FS
 8 | #include <filesystem>
 9 | namespace fs = std::filesystem;
10 | #endif
11 | #endif
12 | #ifndef GHC_USE_STD_FS
13 | #include <ghc/filesystem.hpp>
14 | namespace fs = ghc::filesystem;
15 | #endif
16 | 


--------------------------------------------------------------------------------
/TComb/avs/minmax.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_MINMAX_H
33 | #define AVSCORE_MINMAX_H
34 | 
35 | template<typename T>
36 | T min(T v1, T v2)
37 | {
38 |   return v1 < v2 ? v1 : v2;
39 | }
40 | 
41 | template<typename T>
42 | T max(T v1, T v2)
43 | {
44 |   return v1 > v2 ? v1 : v2;
45 | }
46 | 
47 | template<typename T>
48 | T clamp(T n, T min, T max)
49 | {
50 |     n = n > max ? max : n;
51 |     return n < min ? min : n;
52 | }
53 | 
54 | #endif // AVSCORE_MINMAX_H
55 | 


--------------------------------------------------------------------------------
/TComb/avs/posix.h:
--------------------------------------------------------------------------------
  1 | // This program is free software; you can redistribute it and/or modify
  2 | // it under the terms of the GNU General Public License as published by
  3 | // the Free Software Foundation; either version 2 of the License, or
  4 | // (at your option) any later version.
  5 | //
  6 | // This program is distributed in the hope that it will be useful,
  7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
  8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  9 | // GNU General Public License for more details.
 10 | //
 11 | // You should have received a copy of the GNU General Public License
 12 | // along with this program; if not, write to the Free Software
 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 14 | // http://www.gnu.org/copyleft/gpl.html .
 15 | //
 16 | // Linking Avisynth statically or dynamically with other modules is making a
 17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
 18 | // General Public License cover the whole combination.
 19 | //
 20 | // As a special exception, the copyright holders of Avisynth give you
 21 | // permission to link Avisynth with independent modules that communicate with
 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
 23 | // terms of these independent modules, and to copy and distribute the
 24 | // resulting combined work under terms of your choice, provided that
 25 | // every copy of the combined work is accompanied by a complete copy of
 26 | // the source code of Avisynth (the version of Avisynth used to produce the
 27 | // combined work), being distributed under the terms of the GNU General
 28 | // Public License plus this exception.  An independent module is a module
 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
 30 | // import and export plugins, or graphical user interfaces.
 31 | 
 32 | #ifdef AVS_POSIX
 33 | #ifndef AVSCORE_POSIX_H
 34 | #define AVSCORE_POSIX_H
 35 | 
 36 | #ifdef __cplusplus
 37 | #include <cstring>
 38 | #endif
 39 | #include <strings.h>
 40 | #include <unistd.h>
 41 | 
 42 | // Define these MSVC-extension used in Avisynth
 43 | #define __single_inheritance
 44 | 
 45 | // These things don't exist in Linux
 46 | #if defined(AVS_HAIKU)
 47 | #undef __declspec
 48 | #endif
 49 | #define __declspec(x)
 50 | #define lstrlen strlen
 51 | #define lstrcmp strcmp
 52 | #define lstrcmpi strcasecmp
 53 | #define _stricmp strcasecmp
 54 | #define _strnicmp strncasecmp
 55 | #define _strdup strdup
 56 | #define SetCurrentDirectory(x) chdir(x)
 57 | #define SetCurrentDirectoryW(x) chdir(x)
 58 | #define GetCurrentDirectoryW(x) getcwd(x)
 59 | #define _putenv putenv
 60 | #define _alloca alloca
 61 | 
 62 | // Borrowing some compatibility macros from AvxSynth, slightly modified
 63 | #define UInt32x32To64(a, b) ((uint64_t)(((uint64_t)((uint32_t)(a))) * ((uint32_t)(b))))
 64 | #define Int64ShrlMod32(a, b) ((uint64_t)((uint64_t)(a) >> (b)))
 65 | #define Int32x32To64(a, b)  ((int64_t)(((int64_t)((long)(a))) * ((long)(b))))
 66 | 
 67 | #define InterlockedIncrement(x) __sync_add_and_fetch((x), 1)
 68 | #define InterlockedDecrement(x) __sync_sub_and_fetch((x), 1)
 69 | #define MulDiv(nNumber, nNumerator, nDenominator)   (int32_t) (((int64_t) (nNumber) * (int64_t) (nNumerator) + (int64_t) ((nDenominator)/2)) / (int64_t) (nDenominator))
 70 | 
 71 | #ifndef TRUE
 72 | #define TRUE  true
 73 | #endif
 74 | 
 75 | #ifndef FALSE
 76 | #define FALSE false
 77 | #endif
 78 | 
 79 | #define S_FALSE       (0x00000001)
 80 | #define E_FAIL        (0x80004005)
 81 | #define FAILED(hr)    ((hr) & 0x80000000)
 82 | #define SUCCEEDED(hr) (!FAILED(hr))
 83 | 
 84 | // Statuses copied from comments in exception.cpp
 85 | #define STATUS_GUARD_PAGE_VIOLATION 0x80000001
 86 | #define STATUS_DATATYPE_MISALIGNMENT 0x80000002
 87 | #define STATUS_BREAKPOINT 0x80000003
 88 | #define STATUS_SINGLE_STEP 0x80000004
 89 | #define STATUS_ACCESS_VIOLATION 0xc0000005
 90 | #define STATUS_IN_PAGE_ERROR 0xc0000006
 91 | #define STATUS_INVALID_HANDLE 0xc0000008
 92 | #define STATUS_NO_MEMORY 0xc0000017
 93 | #define STATUS_ILLEGAL_INSTRUCTION 0xc000001d
 94 | #define STATUS_NONCONTINUABLE_EXCEPTION 0xc0000025
 95 | #define STATUS_INVALID_DISPOSITION 0xc0000026
 96 | #define STATUS_ARRAY_BOUNDS_EXCEEDED 0xc000008c
 97 | #define STATUS_FLOAT_DENORMAL_OPERAND 0xc000008d
 98 | #define STATUS_FLOAT_DIVIDE_BY_ZERO 0xc000008e
 99 | #define STATUS_FLOAT_INEXACT_RESULT 0xc000008f
100 | #define STATUS_FLOAT_INVALID_OPERATION 0xc0000090
101 | #define STATUS_FLOAT_OVERFLOW 0xc0000091
102 | #define STATUS_FLOAT_STACK_CHECK 0xc0000092
103 | #define STATUS_FLOAT_UNDERFLOW 0xc0000093
104 | #define STATUS_INTEGER_DIVIDE_BY_ZERO 0xc0000094
105 | #define STATUS_INTEGER_OVERFLOW 0xc0000095
106 | #define STATUS_PRIVILEGED_INSTRUCTION 0xc0000096
107 | #define STATUS_STACK_OVERFLOW 0xc00000fd
108 | 
109 | // Calling convension
110 | #ifndef AVS_HAIKU
111 | #define __stdcall
112 | #define __cdecl
113 | #endif
114 | 
115 | // PowerPC OS X is really niche these days, but this painless equivocation
116 | // of the function/macro names used in posix_get_available_memory()
117 | // is all it takes to let it work.  The G5 was 64-bit, and if 10.5 Leopard
118 | // can run in native 64-bit, it probably uses the names in that block as-is.
119 | #ifdef AVS_MACOS
120 | #ifdef PPC32
121 | #define vm_statistics64_data_t vm_statistics_data_t
122 | #define HOST_VM_INFO64_COUNT HOST_VM_INFO_COUNT
123 | #define HOST_VM_INFO64 HOST_VM_INFO
124 | #define host_statistics64 host_statistics
125 | #endif // PPC32
126 | #endif // AVS_MACOS
127 | 
128 | #endif // AVSCORE_POSIX_H
129 | #endif // AVS_POSIX
130 | 


--------------------------------------------------------------------------------
/TComb/avs/types.h:
--------------------------------------------------------------------------------
 1 | // Avisynth C Interface Version 0.20
 2 | // Copyright 2003 Kevin Atkinson
 3 | 
 4 | // This program is free software; you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License as published by
 6 | // the Free Software Foundation; either version 2 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception.  An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 | 
33 | #ifndef AVS_TYPES_H
34 | #define AVS_TYPES_H
35 | 
36 | // Define all types necessary for interfacing with avisynth.dll
37 | #include <stdint.h>
38 | #include <stdbool.h>
39 | #ifdef __cplusplus
40 |   #include <cstddef>
41 |   #include <cstdarg>
42 | #else
43 |   #include <stddef.h>
44 |   #include <stdarg.h>
45 | #endif
46 | 
47 | // Raster types used by VirtualDub & Avisynth
48 | typedef uint32_t Pixel32;
49 | typedef uint8_t  BYTE;
50 | 
51 | // Audio Sample information
52 | typedef float SFLOAT;
53 | 
54 | #endif //AVS_TYPES_H
55 | 


--------------------------------------------------------------------------------
/TComb/avs/win.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_WIN_H
33 | #define AVSCORE_WIN_H
34 | 
35 | // Whenever you need windows headers, start by including this file, then the rest.
36 | 
37 | // WWUUT? We require XP now?
38 | #if !defined(NTDDI_VERSION) && !defined(_WIN32_WINNT)
39 |   #define NTDDI_VERSION 0x05020000
40 |   #define _WIN32_WINNT  0x0502
41 | #endif
42 | 
43 | #define WIN32_LEAN_AND_MEAN
44 | #define STRICT
45 | #if !defined(NOMINMAX)
46 |     #define NOMINMAX
47 | #endif
48 | 
49 | #include <windows.h>
50 | 
51 | // Provision for UTF-8 max 4 bytes per code point
52 | #define AVS_MAX_PATH MAX_PATH*4
53 | 
54 | #endif // AVSCORE_WIN_H
55 | 


--------------------------------------------------------------------------------
/TComb/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef __COMMON_H__
 2 | #define __COMMON_H__
 3 | 
 4 | #include "avisynth.h"
 5 | #include <stdint.h>
 6 | 
 7 | #if defined(__clang__)
 8 | // Check clang first. clang-cl also defines __MSC_VER
 9 | // We set MSVC because they are mostly compatible
10 | #   define CLANG
11 | #if defined(_MSC_VER)
12 | #   define MSVC
13 | #   define TC_FORCEINLINE __attribute__((always_inline))
14 | #else
15 | #   define TC_FORCEINLINE __attribute__((always_inline)) inline
16 | #endif
17 | #elif   defined(_MSC_VER)
18 | #   define MSVC
19 | #   define MSVC_PURE
20 | #   define TC_FORCEINLINE __forceinline
21 | #elif defined(__GNUC__)
22 | #   define GCC
23 | #   define TC_FORCEINLINE __attribute__((always_inline)) inline
24 | #else
25 | #   error Unsupported compiler.
26 | #   define TC_FORCEINLINE inline
27 | #   undef __forceinline
28 | #   define __forceinline inline
29 | #endif 
30 | 
31 | 
32 | #ifndef _WIN32
33 | #define OutputDebugString(x)
34 | #endif
35 | 
36 | #if (defined(GCC) || defined(CLANG)) && !defined(_WIN32)
37 | #include <stdlib.h>
38 | #define _aligned_malloc(size, alignment) aligned_alloc(alignment, size)
39 | #define _aligned_free(ptr) free(ptr)
40 | #endif
41 | 
42 | #ifndef _WIN32
43 | #include <stdio.h>
44 | #ifdef AVS_POSIX
45 | #ifndef _POSIX_C_SOURCE
46 | #define _POSIX_C_SOURCE 1
47 | #endif
48 | #include <limits.h>
49 | #endif
50 | #endif
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/TComb/resource.h:
--------------------------------------------------------------------------------
 1 | //{{NO_DEPENDENCIES}}
 2 | // Microsoft Visual C++ generated include file.
 3 | // Used by TComb.rc
 4 | 
 5 | // Next default values for new objects
 6 | // 
 7 | #ifdef APSTUDIO_INVOKED
 8 | #ifndef APSTUDIO_READONLY_SYMBOLS
 9 | #define _APS_NEXT_RESOURCE_VALUE        101
10 | #define _APS_NEXT_COMMAND_VALUE         40001
11 | #define _APS_NEXT_CONTROL_VALUE         1001
12 | #define _APS_NEXT_SYMED_VALUE           101
13 | #endif
14 | #endif
15 | 


--------------------------------------------------------------------------------
/cmake_uninstall.cmake.in:
--------------------------------------------------------------------------------
 1 | if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 2 |   message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 3 | endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 4 | 
 5 | file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
 6 | string(REGEX REPLACE "\n" ";" files "${files}")
 7 | foreach(file ${files})
 8 |   message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
 9 |   if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
10 |     exec_program(
11 |       "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
12 |       OUTPUT_VARIABLE rm_out
13 |       RETURN_VALUE rm_retval
14 |       )
15 |     if(NOT "${rm_retval}" STREQUAL 0)
16 |       message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
17 |     endif(NOT "${rm_retval}" STREQUAL 0)
18 |   else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
19 |     message(STATUS "File $ENV{DESTDIR}${file} does not exist.")
20 |   endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
21 | endforeach(file)
22 | 


--------------------------------------------------------------------------------