├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── TComb - ReadMe.txt ├── TComb ├── CMakeLists.txt ├── Files.cmake ├── PlanarFrame.cpp ├── PlanarFrame.h ├── TComb.cpp ├── TComb.h ├── TComb.rc ├── TComb.sln ├── TComb.vcproj ├── TComb.vcxproj ├── TComb.vcxproj.filters ├── TComb_asm.asm ├── TComb_asm_x64.asm ├── TComb_core.cpp ├── avisynth.h ├── avs │ ├── alignment.h │ ├── capi.h │ ├── config.h │ ├── cpuid.h │ ├── filesystem.h │ ├── minmax.h │ ├── posix.h │ ├── types.h │ └── win.h ├── common.h └── resource.h └── cmake_uninstall.cmake.in /.gitignore: -------------------------------------------------------------------------------- 1 | CMakeCache.txt 2 | CMakeFiles/* 3 | 4 | #cmake generated files 5 | cmake_install.cmake 6 | cmake_uninstall.cmake 7 | generate.stamp 8 | generate.stamp.depend 9 | makefile 10 | 11 | #make 12 | install_manifest.txt 13 | 14 | ## Ignore Visual Studio temporary files, build results, and 15 | ## files generated by popular Visual Studio add-ons. 16 | 17 | # User-specific files 18 | *.suo 19 | *.user 20 | *.userosscache 21 | *.sln.docstates 22 | 23 | # User-specific files (MonoDevelop/Xamarin Studio) 24 | *.userprefs 25 | 26 | # Build results 27 | [Dd]ebug/ 28 | [Dd]ebugPublic/ 29 | [Rr]elease/ 30 | [Rr]eleases/ 31 | x64/ 32 | x86/ 33 | build/ 34 | bld/ 35 | [Bb]in/ 36 | [Oo]bj/ 37 | 38 | # Visual Studo 2015 cache/options directory 39 | .vs/ 40 | 41 | # MSTest test Results 42 | [Tt]est[Rr]esult*/ 43 | [Bb]uild[Ll]og.* 44 | 45 | # NUNIT 46 | *.VisualState.xml 47 | TestResult.xml 48 | 49 | # Build Results of an ATL Project 50 | [Dd]ebugPS/ 51 | [Rr]eleasePS/ 52 | dlldata.c 53 | 54 | *_i.c 55 | *_p.c 56 | *_i.h 57 | *.ilk 58 | *.meta 59 | *.obj 60 | *.pch 61 | *.pdb 62 | *.pgc 63 | *.pgd 64 | *.rsp 65 | *.sbr 66 | *.tlb 67 | *.tli 68 | *.tlh 69 | *.tmp 70 | *.tmp_proj 71 | *.log 72 | *.vspscc 73 | *.vssscc 74 | .builds 75 | *.pidb 76 | *.svclog 77 | *.scc 78 | 79 | # Chutzpah Test files 80 | _Chutzpah* 81 | 82 | # Visual C++ cache files 83 | ipch/ 84 | *.aps 85 | *.ncb 86 | *.opensdf 87 | *.sdf 88 | *.cachefile 89 | 90 | # Visual Studio profiler 91 | *.psess 92 | *.vsp 93 | *.vspx 94 | 95 | # TFS 2012 Local Workspace 96 | $tf/ 97 | 98 | # Guidance Automation Toolkit 99 | *.gpState 100 | 101 | # ReSharper is a .NET coding add-in 102 | _ReSharper*/ 103 | *.[Rr]e[Ss]harper 104 | *.DotSettings.user 105 | 106 | # JustCode is a .NET coding addin-in 107 | .JustCode 108 | 109 | # TeamCity is a build add-in 110 | _TeamCity* 111 | 112 | # DotCover is a Code Coverage Tool 113 | *.dotCover 114 | 115 | # NCrunch 116 | _NCrunch_* 117 | .*crunch*.local.xml 118 | 119 | # MightyMoose 120 | *.mm.* 121 | AutoTest.Net/ 122 | 123 | # Web workbench (sass) 124 | .sass-cache/ 125 | 126 | # Installshield output folder 127 | [Ee]xpress/ 128 | 129 | # DocProject is a documentation generator add-in 130 | DocProject/buildhelp/ 131 | DocProject/Help/*.HxT 132 | DocProject/Help/*.HxC 133 | DocProject/Help/*.hhc 134 | DocProject/Help/*.hhk 135 | DocProject/Help/*.hhp 136 | DocProject/Help/Html2 137 | DocProject/Help/html 138 | 139 | # Click-Once directory 140 | publish/ 141 | 142 | # Publish Web Output 143 | *.[Pp]ublish.xml 144 | *.azurePubxml 145 | # TODO: Comment the next line if you want to checkin your web deploy settings 146 | # but database connection strings (with potential passwords) will be unencrypted 147 | *.pubxml 148 | *.publishproj 149 | 150 | # NuGet Packages 151 | *.nupkg 152 | # The packages folder can be ignored because of Package Restore 153 | **/packages/* 154 | # except build/, which is used as an MSBuild target. 155 | !**/packages/build/ 156 | # Uncomment if necessary however generally it will be regenerated when needed 157 | #!**/packages/repositories.config 158 | 159 | # Windows Azure Build Output 160 | csx/ 161 | *.build.csdef 162 | 163 | # Windows Store app package directory 164 | AppPackages/ 165 | 166 | # Others 167 | *.[Cc]ache 168 | ClientBin/ 169 | [Ss]tyle[Cc]op.* 170 | ~$* 171 | *~ 172 | *.dbmdl 173 | *.dbproj.schemaview 174 | *.pfx 175 | *.publishsettings 176 | node_modules/ 177 | bower_components/ 178 | 179 | # RIA/Silverlight projects 180 | Generated_Code/ 181 | 182 | # Backup & report files from converting an old project file 183 | # to a newer Visual Studio version. Backup files are not needed, 184 | # because we have git ;-) 185 | _UpgradeReport_Files/ 186 | Backup*/ 187 | UpgradeLog*.XML 188 | UpgradeLog*.htm 189 | 190 | # SQL Server files 191 | *.mdf 192 | *.ldf 193 | 194 | # Business Intelligence projects 195 | *.rdl.data 196 | *.bim.layout 197 | *.bim_*.settings 198 | 199 | # Microsoft Fakes 200 | FakesAssemblies/ 201 | 202 | # Node.js Tools for Visual Studio 203 | .ntvs_analysis.dat 204 | 205 | # Visual Studio 6 build log 206 | *.plg 207 | 208 | # Visual Studio 6 workspace options file 209 | *.opt 210 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # We need CMake 3.8 at least, because we require 2 | # CMAKE_CXX_STANDARD to be set to C++17. 3 | # Visual Studio 2019 is supported from CMake 3.14.1 4 | # Possible generators: 5 | # "MinGW Makefiles": MSYS2/Mingw32 GCC 8.3 build 6 | # "Visual Studio 15 2017" optional platform generator Win32 and x64 7 | 8 | # "Visual Studio 16 2019" optional platform generator Win32 and x64 9 | # "Visual Studio 16 2019" + LLVM 8.0 (clang) optional platform generator Win32 and x64 10 | CMAKE_MINIMUM_REQUIRED( VERSION 3.8.2 ) 11 | 12 | project("TComb" LANGUAGES CXX) 13 | include(GNUInstallDirs) 14 | 15 | # Avoid uselessly linking to unused libraries 16 | set(CMAKE_STANDARD_LIBRARIES "" CACHE STRING "" FORCE) 17 | set(CMAKE_C_STANDARD_LIBRARIES "" CACHE STRING "" FORCE) 18 | set(CMAKE_CXX_STANDARD_LIBRARIES "" CACHE STRING "" FORCE) 19 | 20 | # We require C++17 or higher. 21 | set(CMAKE_CXX_STANDARD 17) 22 | set(CMAKE_CXX_STANDARD_REQUIRED TRUE) 23 | set(CMAKE_CXX_EXTENSIONS FALSE) 24 | 25 | # Detect Intel processors and turn Intel SIMD on or off automatically. 26 | message("-- Detected target processor as: ${CMAKE_SYSTEM_PROCESSOR}") 27 | string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" ARCHID) 28 | if( ("${ARCHID}" STREQUAL "x86") OR 29 | ("${ARCHID}" STREQUAL "x64") OR 30 | ("${ARCHID}" STREQUAL "i686") OR 31 | ("${ARCHID}" STREQUAL "amd64") OR 32 | ("${ARCHID}" STREQUAL "x86_64") ) 33 | set(INTEL_SIMD "ON") 34 | else() 35 | set(INTEL_SIMD "OFF") 36 | endif() 37 | 38 | option(ENABLE_INTEL_SIMD "Enable SIMD intrinsics for Intel processors" "${INTEL_SIMD}") 39 | 40 | if(CMAKE_CONFIGURATION_TYPES) 41 | set(CMAKE_CONFIGURATION_TYPES Debug Release RelWithDebInfo) 42 | set(CMAKE_CONFIGURATION_TYPES "${CMAKE_CONFIGURATION_TYPES}" CACHE STRING "Reset the configurations to what we need" FORCE) 43 | endif() 44 | 45 | IF( MSVC ) # Check for Visual Studio 46 | # We do not allow creating Visual Studio solutions, existing .sln file contains 47 | # all x86/x64 versions of MSVC and LLVM builds. 48 | MESSAGE(FATAL_ERROR "Please use the existing sln file both for MS VC and also for LLVM toolset in VS") 49 | # anyway we keep all things below 50 | # ** not tested ** 51 | 52 | 53 | 54 | 55 | #1910-1919 = VS 15.0 (v141 toolset) Visual Studio 2017 56 | #1920 = VS 16.0 (v142 toolset) Visual Studio 2019 57 | 58 | IF( MSVC_VERSION VERSION_LESS 1910 ) 59 | MESSAGE(FATAL_ERROR "Visual C++ 2017 or newer required.") 60 | ENDIF() 61 | 62 | IF(MSVC_IDE) 63 | message("Reported CMAKE_GENERATOR_TOOLSET is: ${CMAKE_GENERATOR_TOOLSET}") 64 | 65 | # For LLVM Clang installed separately, specify llvm or LLVM 66 | # Since Visual Studio 2019 v16.4, LLVM 9.0 is integrated, for this use Toolset: ClangCL 67 | IF(CMAKE_GENERATOR_TOOLSET STREQUAL "LLVM" OR CMAKE_GENERATOR_TOOLSET STREQUAL "llvm" OR CMAKE_GENERATOR_TOOLSET STREQUAL "ClangCL") 68 | if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") # hope: always 69 | message("LLVM toolset was specified via -T. Compiler ID is: ${CMAKE_CXX_COMPILER_ID}; CMAKE_CXX_COMPILER_VERSION is: ${CMAKE_CXX_COMPILER_VERSION}") 70 | # Clang; 9.0.0 71 | # These are probably not supported when clang is downloaded as a ready-made binary: CLANG_VERSION_MAJOR CLANG_VERSION_MINOR CLANG_VERSION_STRING 72 | # string (REGEX REPLACE ".*clang version ([0-9]+\\.[0-9]+).*" "\\1" CLANG_VERSION_STRING ${clang_full_version_string}) 73 | if( CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.1 ) 74 | MESSAGE(FATAL_ERROR "Clang 7.0.1 or newer required") # as of 2019.december actually we are using 9.0 75 | endif() 76 | endif() 77 | set(CLANG_IN_VS "1") 78 | ELSEIF(CMAKE_GENERATOR_TOOLSET STREQUAL "v141_clang_c2") 79 | #1900 is reported 80 | message("v141_clang_c2 toolset was specified via -T. Reported MSVC_VERSION is: ${MSVC_VERSION}") 81 | message("May not work, try LLVM") 82 | set(CLANG_IN_VS "1") 83 | ENDIF() 84 | 85 | option(WINXP_SUPPORT "Make binaries compatible with Windows XP and Vista" OFF) 86 | if(WINXP_SUPPORT) 87 | # We want our project to also run on Windows XP 88 | # Not for LLVM: Clang stopped XP support in 2016 89 | # 1900 (VS2015) is not supported but we leave here 90 | IF(MSVC_VERSION VERSION_LESS 1910 ) 91 | IF(NOT CLANG_IN_VS STREQUAL "1") 92 | set(CMAKE_GENERATOR_TOOLSET "v140_xp" CACHE STRING "The compiler toolset to use for Visual Studio." FORCE) # VS2015 93 | # https://connect.microsoft.com/VisualStudio/feedback/details/1789709/visual-c-2015-runtime-broken-on-windows-server-2003-c-11-magic-statics 94 | message("CMAKE_GENERATOR_TOOLSET is forced to: ${CMAKE_GENERATOR_TOOLSET}") 95 | add_definitions("/Zc:threadSafeInit-") 96 | ENDIF() 97 | ELSE() 98 | IF(NOT CLANG_IN_VS STREQUAL "1") 99 | set(CMAKE_GENERATOR_TOOLSET "v141_xp" CACHE STRING "The compiler toolset to use for Visual Studio." FORCE) # VS2017, also choosable for VS2019 100 | # https://connect.microsoft.com/VisualStudio/feedback/details/1789709/visual-c-2015-runtime-broken-on-windows-server-2003-c-11-magic-statics 101 | message("CMAKE_GENERATOR_TOOLSET is forced to: ${CMAKE_GENERATOR_TOOLSET}") 102 | add_definitions("/Zc:threadSafeInit-") 103 | ENDIF() 104 | ENDIF() 105 | endif() 106 | ENDIF() 107 | 108 | IF(CLANG_IN_VS STREQUAL "1") 109 | #these are unknown 110 | #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexceptions") 111 | #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions") 112 | STRING( REPLACE "/EHsc" "/EHa" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") 113 | STRING( REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 114 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-inconsistent-missing-override") 115 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-inconsistent-missing-override") 116 | ELSE() 117 | # Enable C++ with SEH exceptions 118 | # Avoid an obnoxious 'overrriding /EHsc with /EHa' warning when 119 | # using something other than MSBuild 120 | STRING( REPLACE "/EHsc" "/EHa" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") 121 | STRING( REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 122 | ENDIF() 123 | # Prevent VC++ from complaining about not using MS-specific functions 124 | add_definitions("/D _CRT_SECURE_NO_WARNINGS /D _SECURE_SCL=0") 125 | 126 | # Enable CRT heap debugging - only effective in debug builds 127 | add_definitions("/D _CRTDBG_MAP_ALLOC") 128 | 129 | # Set additional optimization flags 130 | set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Oy /Ot /GS- /Oi") 131 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oy /Ot /GS- /Oi") 132 | 133 | # CPU_ARCH can be overridden with the corresponding values when using MSVC: 134 | # IA32 (disabled), 135 | # SSE (Pentium III and higher, 1999), 136 | # SSE2 (Pentium 4 and higher, 2000/2001), 137 | # AVX (Sandy Bridge and higher, 2011), 138 | # AVX2 (Haswell and higher, 2013) 139 | set(MSVC_CPU_ARCH "SSE2" CACHE STRING "Set MSVC architecture optimization level (default: SSE2)") 140 | 141 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:${MSVC_CPU_ARCH}") 142 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:${MSVC_CPU_ARCH}") 143 | 144 | if(CMAKE_SIZEOF_VOID_P EQUAL 8) 145 | # MSVC doesn't allow 64-bit builds to have their /arch set to SSE2 (no-op) or below 146 | if("${MSVC_CPU_ARCH}" MATCHES "(IA32|SSE|SSE2)") 147 | set(DELETE_THIS "/arch:${MSVC_CPU_ARCH}") 148 | message("MSVC doesn't allow x86-64 builds to define /arch:${MSVC_CPU_ARCH}. Setting will be ignored.") 149 | STRING( REPLACE "${DELETE_THIS}" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") 150 | STRING( REPLACE "${DELETE_THIS}" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 151 | endif() 152 | endif() 153 | 154 | IF(CLANG_IN_VS STREQUAL "1") 155 | # suppress other frequent but harmless/unavoidable warnings 156 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-function") 157 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function") 158 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-reorder") 159 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-reorder") 160 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-value") 161 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-value") 162 | # allow per-function attributes like __attribute__((__target__("sse4.1"))) 163 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-gcc-compat") 164 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-gcc-compat") 165 | ENDIF() 166 | 167 | # Set C++17 flag 168 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /std:c++17") 169 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++17") 170 | 171 | # Enable standards-conformance mode for MSVC compilers that support this 172 | # flag (Visual C++ 2017 and later). 173 | if (NOT (MSVC_VERSION LESS 1910)) 174 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /permissive-") 175 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /permissive-") 176 | endif() 177 | 178 | if(ENABLE_INTEL_SIMD) 179 | add_definitions("/D INTEL_INTRINSICS") 180 | endif() 181 | 182 | ELSE() 183 | 184 | if(ENABLE_INTEL_SIMD) 185 | SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2 -DINTEL_INTRINSICS" ) 186 | endif() 187 | 188 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 189 | SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-security" ) 190 | endif() 191 | 192 | IF(WIN32) 193 | SET( CMAKE_SHARED_LINKER_FLAGS "-Wl,--enable-stdcall-fixup" ) 194 | ELSE() 195 | if(APPLE) 196 | # macOS uses Clang's linker, doesn't like --no-undefined 197 | SET( CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-undefined,error" ) 198 | else() 199 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 200 | # make sure there are no undefined symbols 201 | SET( CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined" ) 202 | endif() 203 | endif() 204 | ENDIF() 205 | ENDIF() 206 | 207 | IF(ENABLE_INTEL_SIMD) 208 | message("Intel SIMD enabled") 209 | ELSE() 210 | message("Intel SIMD disabled") 211 | ENDIF() 212 | 213 | add_subdirectory("TComb") 214 | 215 | # uninstall target 216 | configure_file( 217 | "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in" 218 | "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" 219 | IMMEDIATE @ONLY) 220 | 221 | add_custom_target(uninstall 222 | COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) 223 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TComb 2 | 3 | This is an update to tritical's TComb v2.0 Beta 2 moving it from beta to release as it encompasses all the changes in tritical's To-Do-List. 4 | 5 | ### Requirements 6 | 7 | This filter requires AviSynth 2.6.0 or AviSynth+ as well as the Visual C++ Redistributable Package for Visual Studio 2015-19. 8 | 9 | ### Syntax and Parameters 10 | 11 | The syntax and parameters are identical to the original TComb with the exception of the "opt" parameter. To see a list refer to this [link](http://avisynth.nl/index.php/TComb). 12 | 13 | ### Changes 14 | 15 | In 2015 Elegant made many changes when updating TComb in order to improve speed (see full changelog for more details): 16 | 17 | * Removed buffering of frames/info that weren't actually used 18 | * Switched to AVS 2.6 API 19 | * Added x64 support which also utilizes SSE2 20 | * Restructured debug and error messages 21 | * Removed MMX/ISSE support 22 | * Removed/changed "opt" parameter 23 | 24 | In 2021 came a general bugfix release by pinterf. 25 | Added linux port, the missing 8 bit Y and YUV formats, external assembler was rewritten in SIMD intrinsics. 26 | 27 | ### Programmer Notes 28 | 29 | This program was compiled using Visual Studio 2019 and falls under the GNU General Public License. 30 | 31 | I (Elegant) would like to thank jpsdr and dubhater for their work on nnedi3 and the VapourSynth version of TComb (respectively). Their work led to the port of this project. 32 | I'd also like to thank the masm32 community who were very helpful as I explored assembly. 33 | 34 | Build instructions 35 | ================== 36 | VS2019: 37 | use IDE 38 | 39 | Windows GCC (mingw installed by msys2): 40 | from the 'build' folder under project root: 41 | 42 | del ..\CMakeCache.txt 43 | cmake .. -G "MinGW Makefiles" -DENABLE_INTEL_SIMD:bool=on 44 | @rem test: cmake .. -G "MinGW Makefiles" -DENABLE_INTEL_SIMD:bool=off 45 | cmake --build . --config Release 46 | 47 | Linux 48 | note: ENABLE_INTEL_SIMD is automatically off for non x86 arhitectures 49 | 50 | * Clone repo and build 51 | 52 | git clone https://github.com/pinterf/TComb 53 | cd TComb 54 | cmake -B build -S . 55 | cmake --build build 56 | 57 | Useful hints: 58 | build after clean: 59 | 60 | cmake --build build --clean-first 61 | 62 | Force no asm support 63 | 64 | cmake -B build -S . -DENABLE_INTEL_SIMD:bool=off 65 | 66 | delete cmake cache 67 | 68 | rm build/CMakeCache.txt 69 | 70 | * Find binaries at 71 | 72 | build/TComb/libtcomb.so 73 | 74 | * Install binaries 75 | 76 | cd build 77 | sudo make install 78 | 79 | -------------------------------------------------------------------------------- /TComb - ReadMe.txt: -------------------------------------------------------------------------------- 1 | | 2 | TComb for AviSynth | 3 | v2.3 (24 February 2021) | 4 | by tritical | 5 | modified by Elegant (v2.0; 17 July 2015) | 6 | additional work by pinterf | 7 | | 8 | HELP FILE | 9 | ------------------------------------------------------------------------------------------------------- 10 | ------------------------------------------------------------------------------------------------------- 11 | 12 | 13 | INFO: 14 | 15 | 16 | TComb is a temporal comb filter (it reduces cross-luminance (rainbowing) and cross-chrominance 17 | (dot crawl) artifacts in static areas of the picture). It will ONLY work with NTSC material, and 18 | WILL NOT work with telecined material where the rainbowing/dotcrawl was introduced prior to the 19 | telecine process! It must be used before ivtc or deinterlace in order to work. In terms of what 20 | it does it is similar to guavacomb/dedot. 21 | 22 | TComb currently supports Y8, YV12, YV16, YV24, YV411 and YUY2 colorspaces. 23 | 24 | TComb does support seeking... that is, jumping to a random frame will produce the same result 25 | as if you had linearly run up to that frame. For dot crawl removal tcomb requires at least 3 26 | static fields of the same parity and for rainbow removal tcomb requires at least 5 static fields 27 | of the same parity. 28 | 29 | 30 | Syntax => 31 | 32 | TComb(int mode, int fthreshL, int fthreshC, int othreshL, int othreshC, bool map, 33 | double scthresh, bool debug, int opt) 34 | 35 | 36 | 37 | PARAMETERS: 38 | 39 | 40 | mode - (limit processing to luma or chroma only) 41 | 42 | Controls whether both luma/chroma are processed or only one or the other. Possible settings: 43 | 44 | 0 - process luma only (dot crawl removal) 45 | 1 - process chroma only (rainbow removal) 46 | 2 - process both 47 | 48 | For greyscale clips mode=0 is used regardless the settings 49 | 50 | default: 2 (int) 51 | 52 | 53 | fthreshL/fthreshC - (filtered pixel correlation thresholds) 54 | 55 | One of the things TComb checks for is correlation between filtered values over the length 56 | of the filtering window. If all values differ by less than fthreshL (for luma) or fthreshC 57 | (for chroma) then the filtered values are considered to be correlated. Larger values will 58 | allow more filtering (will be more effective at removing rainbowing/dot crawl), but will also 59 | create more artifacts. Smaller values will produce less artifacts, but will be less effective 60 | in removing rainbowing/dot crawl. A good range of values is between 4 and 7. 61 | 62 | default: fthreshL -> 4 (int) 63 | fthreshC -> 5 64 | 65 | 66 | othreshL/othreshC - (original pixel correlation thresholds) 67 | 68 | One of the things TComb checks for is correlation between original pixel values from every 69 | other field of the same parity. Due to the oscillation period, these values should be equal 70 | or very similar in static areas containing dot crawl or rainbowing. If the pixel values 71 | differ by less than othreshL (for luma) or othreshC (for chroma) then the pixels are considered 72 | to be correlated. Larger values will allow more filtering (will be more effective at removing 73 | rainbowing/dotcrawl), but will also create more artifacts. Smaller values will produce less 74 | artifacts, but will be less effective in removing rainbowing/dotcrawl. A good range of values 75 | is between 4 and 8. 76 | 77 | default: othreshL -> 5 (int) 78 | othreshC -> 6 79 | 80 | 81 | map - 82 | 83 | Identifies pixels that are being replaced with filtered values. Each pixel in the output 84 | frame will have one of the following values indicating how it is being filtered: 85 | 86 | 0 - not being filtered 87 | 85 - [1 2 1] average of (n,n+1,n+2) 88 | 170 - [1 2 1] average of (n-2,n-1,n) 89 | 255 - [1 2 1] average of (n-1,n,n+1) 90 | 91 | ** n = current frame 92 | 93 | default: false (bool) 94 | 95 | 96 | scthresh - (scenechange threshold) 97 | 98 | Sets the scenechange detection threshold as a percentage of maximum change on the luma 99 | plane. Use the debug output to see which frames are detected as scenechanges and the 100 | scenechange statistics. 101 | 102 | default: 12.0 (float) 103 | 104 | 105 | debug - 106 | 107 | Will enable debug output. The only thing it shows are the scenechange stats. The info 108 | is output via OutputDebugString(). You can use the utility "DebugView" from sysinternals 109 | to view the output. The frame numbers in the debug output correspond to the input clip 110 | after a separatefields() call. TComb internally invokes separatefields() before itself 111 | and weave() after itself. 112 | 113 | default: false (bool) 114 | 115 | opt - (another debug parameter: CPU) 116 | 117 | 0: C only (no assembly at all) 118 | other: automatically choose SSE2 or C 119 | 120 | For development use: opt parameters can appear/disappear/change their meaning between versions 121 | 122 | default: -1 (int) 123 | 124 | 125 | BASIC SETUP/USAGE: 126 | 127 | 128 | Setting up TComb is pretty simple. The only values that would ever really need adjusting 129 | are fthreshL/fthreshC, othreshL/othreshC, and mode. 130 | 131 | Set mode to 0 if you want to do dot crawl removal only, set it to 1 if you want to 132 | do rainbow removal only, or set it to 2 to do both. 133 | 134 | Dot Crawl Removal Tweaking (fthreshL/othreshL): 135 | 136 | To find good values for fthreshL/othreshL, start with the following line: 137 | 138 | tcomb(mode=0,fthreshL=255,othreshL=255) 139 | 140 | Now, keep othreshL at 255 but set fthreshL down to 1. Keep increasing fthreshL 141 | in steps of 1 to 2 until you find the point at which all dot crawl is removed. 142 | Remember that value. Next, set fthreshL back to 255, and set othreshL to 1. 143 | Now, increase othreshL in steps of 1 or 2 until you find the point at which all 144 | dot crawl is removed. You've now got values for fthreshL/othreshL. 145 | 146 | Rainbowing Removal Tweaking (fthreshC/othreshC): 147 | 148 | To find good values for fthreshC/othreshC, start with the following line: 149 | 150 | tcomb(mode=1,fthreshC=255,othreshC=255) 151 | 152 | Now, keep othreshC at 255 but set fthreshC down to 1. Keep increasing fthreshC 153 | in steps of 1 to 2 until you find the point at which all (or most) rainbowing is 154 | removed. Remember that value. Next, set fthreshC back to 255, and set othreshC 155 | to 1. Now, increase othreshC in steps of 1 or 2 until you find the point at which 156 | all (or most) rainbowing is removed. You've now got values for fthreshC/othreshC. 157 | 158 | Once you've got values for mode, fthreshL/fthreshC, and othreshL/othreshC, add the 159 | necessary tcomb() line into your script and run through part of it. If you see any 160 | artifacts try lowering your fthresh/othresh values. 161 | 162 | 163 | 164 | CHANGE LIST: 165 | 166 | ** v2.3 (20210224 pinterf)** 167 | - Y8, YV16, YV24, YV411 support 168 | 169 | ** v2.2 (20210223 pinterf)** 170 | - Fix: unsave register x64 assembler causing artifacts 171 | - Drop all external asm 172 | - Rewrite assembler in SIMD intrinsics (old stuff is not removed yet, only conditionally ignored) 173 | - Add CMake build system 174 | - Add MinGW/gcc support 175 | - Add linux support (with ENABLE_INTEL_SIMD=off option as well) 176 | - Add build instructions to README.md 177 | 178 | ** v2.1 (20210222 pinterf)** 179 | - project forked to https://github.com/pinterf/TComb/ 180 | - param 'opt' is back for debug. 0 means pure C code 181 | - Fix bug in x64 assembler buildFinalMask_SSE2 182 | - Fix crash in 32bit version of VerticalBlur3_SSE2 183 | - Fix: scenechange SSE2 did not work 184 | - Fix: x64 assembler HorizontalBlur6_SSE2 185 | - Fix: HorizontalBlur6: C only did top 2 lines. SSE2 bad top 2 lines 186 | - Fix: HorizontalBlur3_SSE2 artifacts (both x86 and x64) 187 | - Fix: HorizontalBlur3_SSE2 missing rounder (both x86 and x64) (now C and SSE2 is giving identical results) 188 | - Code: 189 | - Update to Visual Studio 2019 190 | - update to actual Avisynth+ headers 191 | - clang-friendly code 192 | - removed memcpy and bitblt variants 193 | - replaced planarframes module with the one I updated in tivtc project for Avisynth+ and hbd preparation 194 | - Fix debug build configuration in VS project settings 195 | 196 | ** v2.0.0.1 (20150726 Elegant)** 197 | - Corrected the masks used in HorizontalBlur6 for x64. 198 | 199 | ** v2.0 (20150717 Elegant)** 200 | 201 | - Removed buffering of frames/info that weren't actually used (was there for 202 | development/testing purposes). Should save a lot of RAM usage. 203 | - Switched to AVS 2.6 API since AviSynth 2.6.0 was released. 204 | - Added x64 support which also utilizes SSE2. This also includes some missing SSE2 functions (andNeighborsInPlace_SSE2). 205 | - Restructured debug and error messages so that it was apparent that TComb was responsible. 206 | - Removed MMX/ISSE support as times have changed and the support was not going to be carried over to x64. 207 | - Removed "opt" parameter. TComb will now use SSE2 if available and will fallback on C++ if it is not supported. 208 | 209 | End of tritical version history 210 | ------------------------------------------------------------------------ 211 | 212 | 05/16/2006 v2.0 Beta 2 213 | 214 | + Stricter checking of othreshL/othreshC when looking for oscillation 215 | + For dot crawl detection require at least one vertical neighbor (y-1/y+1, x-1/x/x+1) 216 | - fixed possible crash with yuy2 input (sse2 planar<->packed conversions) 217 | 218 | 219 | 03/31/2006 v2.0 Beta 1 220 | 221 | - complete rewrite 222 | 223 | 224 | 06/24/2005 v0.9.0 225 | 226 | - Initial Release 227 | 228 | 229 | 230 | contact: GitHub (@Elegant996) 231 | -------------------------------------------------------------------------------- /TComb/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Visual Studio 2019 is supported from CMake 3.14.1 2 | # Tested generators: 3 | # "MinGW Makefiles": MSYS2/Mingw32 GCC 8.3 build 4 | # "Visual Studio 16 2019" optional platform generator Win32 and x64 5 | # "Visual Studio 16 2019" + LLVM 8.0 (clang) optional platform generator Win32 and x64 6 | CMAKE_MINIMUM_REQUIRED( VERSION 3.8.2 ) 7 | 8 | set(PluginName "TComb") 9 | 10 | if (NOT WIN32) 11 | string(TOLOWER "${PluginName}" PluginName) 12 | endif() 13 | 14 | set(ProjectName "${PluginName}") 15 | project(${ProjectName} LANGUAGES CXX) 16 | 17 | Include("Files.cmake") 18 | 19 | add_library(${PluginName} SHARED ${TComb_Sources}) 20 | 21 | set_target_properties(${PluginName} PROPERTIES "OUTPUT_NAME" "${PluginName}") 22 | if (MINGW) 23 | set_target_properties(${PluginName} PROPERTIES PREFIX "") 24 | set_target_properties(${PluginName} PROPERTIES IMPORT_PREFIX "") 25 | endif() 26 | 27 | IF(ENABLE_INTEL_SIMD) 28 | #require sse2, some other plugins may need to set sse4.1 for quick msvc->gcc porting 29 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DINTEL_INTRINSICS -msse2") 30 | ENDIF() 31 | 32 | # Automatically group source files according to directory structure 33 | foreach(FILE ${TComb_Sources}) 34 | get_filename_component(PARENT_DIR "${FILE}" PATH) 35 | 36 | string(REGEX REPLACE "(\\./)" "" GROUP "${PARENT_DIR}") 37 | string(REPLACE "/" "\\" GROUP "${GROUP}") 38 | 39 | # group into "Source Files" and "Header Files" 40 | if ("${FILE}" MATCHES ".*\\.cpp") 41 | set(GROUP "Source Files\\${GROUP}") 42 | elseif("${FILE}" MATCHES ".*\\.h") 43 | set(GROUP "Header Files\\${GROUP}") 44 | elseif("${FILE}" MATCHES ".*\\.asm") 45 | set(GROUP "Assembler Files\\${GROUP}") 46 | endif() 47 | 48 | source_group("${GROUP}" FILES "${FILE}") 49 | endforeach() 50 | 51 | if (MSVC_IDE) 52 | IF(CLANG_IN_VS STREQUAL "1") 53 | # special SSSE3 option for source files with *_ssse3.cpp pattern 54 | file(GLOB_RECURSE SRCS_SSSE3 "*_ssse3.cpp") 55 | set_source_files_properties(${SRCS_SSSE3} PROPERTIES COMPILE_FLAGS " -mssse3 ") 56 | 57 | # special SSE4.1 option for source files with *_sse41.cpp pattern 58 | file(GLOB_RECURSE SRCS_SSE41 "*_sse41.cpp") 59 | set_source_files_properties(${SRCS_SSE41} PROPERTIES COMPILE_FLAGS " -msse4.1 ") 60 | 61 | # special AVX option for source files with *_avx.cpp pattern 62 | file(GLOB_RECURSE SRCS_AVX "*_avx.cpp") 63 | set_source_files_properties(${SRCS_AVX} PROPERTIES COMPILE_FLAGS " -mavx ") 64 | 65 | # special AVX2 option for source files with *_avx2.cpp pattern 66 | file(GLOB_RECURSE SRCS_AVX2 "*_avx2.cpp") 67 | set_source_files_properties(${SRCS_AVX2} PROPERTIES COMPILE_FLAGS " -mavx2 -mfma ") 68 | 69 | # special AVX512 option for source files with *_avx512.cpp pattern 70 | file(GLOB_RECURSE SRCS_AVX512 "*_avx512.cpp") 71 | set_source_files_properties(${SRCS_AVX512} PROPERTIES COMPILE_FLAGS " -mavx512f -mavx512bw ") 72 | ELSE() 73 | # special AVX option for source files with *_avx.cpp pattern 74 | file(GLOB_RECURSE SRCS_AVX "*_avx.cpp") 75 | set_source_files_properties(${SRCS_AVX} PROPERTIES COMPILE_FLAGS " /arch:AVX ") 76 | 77 | # special AVX2 option for source files with *_avx2.cpp pattern 78 | file(GLOB_RECURSE SRCS_AVX2 "*_avx2.cpp") 79 | set_source_files_properties(${SRCS_AVX2} PROPERTIES COMPILE_FLAGS " /arch:AVX2 ") 80 | 81 | # special AVX512 option for source files with *_avx512.cpp pattern 82 | file(GLOB_RECURSE SRCS_AVX512 "*_avx512.cpp") 83 | set_source_files_properties(${SRCS_AVX512} PROPERTIES COMPILE_FLAGS " /arch:AVX512 ") 84 | ENDIF() 85 | else() 86 | # special SSSE3 option for source files with *_ssse3.cpp pattern 87 | file(GLOB_RECURSE SRCS_SSSE3 "*_ssse3.cpp") 88 | set_source_files_properties(${SRCS_SSSE3} PROPERTIES COMPILE_FLAGS " -mssse3 ") 89 | 90 | # special SSE4.1 option for source files with *_sse41.cpp pattern 91 | file(GLOB_RECURSE SRCS_SSE41 "*_sse41.cpp") 92 | set_source_files_properties(${SRCS_SSE41} PROPERTIES COMPILE_FLAGS " -msse4.1 ") 93 | 94 | # special AVX option for source files with *_avx.cpp pattern 95 | file(GLOB_RECURSE SRCS_AVX "*_avx.cpp") 96 | set_source_files_properties(${SRCS_AVX} PROPERTIES COMPILE_FLAGS " -mavx ") 97 | 98 | # special AVX2 option for source files with *_avx2.cpp pattern 99 | file(GLOB_RECURSE SRCS_AVX2 "*_avx2.cpp") 100 | set_source_files_properties(${SRCS_AVX2} PROPERTIES COMPILE_FLAGS " -mavx2 -mfma ") 101 | 102 | # special AVX512 option for source files with *_avx512.cpp pattern 103 | file(GLOB_RECURSE SRCS_AVX512 "*_avx512.cpp") 104 | set_source_files_properties(${SRCS_AVX512} PROPERTIES COMPILE_FLAGS " -mavx512f -mavx512bw ") 105 | endif() 106 | 107 | 108 | # Specify include directories 109 | target_include_directories(${ProjectName} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 110 | #dedicated include dir for avisynth.h 111 | #target_include_directories(${ProjectName} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) 112 | 113 | # Windows DLL dependencies 114 | if (MSVC OR MINGW) 115 | target_link_libraries(${ProjectName} "uuid" "winmm" "vfw32" "msacm32" "gdi32" "user32" "advapi32" "ole32" "imagehlp") 116 | else() 117 | #non Windows 118 | target_link_libraries(${ProjectName}) 119 | # "pthread" "dl" 120 | endif() 121 | 122 | include(GNUInstallDirs) 123 | 124 | INSTALL(TARGETS ${ProjectName} 125 | LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/avisynth") 126 | -------------------------------------------------------------------------------- /TComb/Files.cmake: -------------------------------------------------------------------------------- 1 | FILE(GLOB TComb_Sources RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" 2 | "*.c" 3 | "*.cpp" 4 | "*.hpp" 5 | "*.h" 6 | 7 | "avs/*.h" 8 | ) 9 | 10 | IF( MSVC OR MINGW ) 11 | # Export definitions in general are not needed on x64 and only cause warnings, 12 | # unfortunately we still must need a .def file for some COM functions. 13 | # NO C interface for this plugin 14 | # if(CMAKE_SIZEOF_VOID_P EQUAL 8) 15 | # LIST(APPEND TComb_Sources "TComb64.def") 16 | # else() 17 | # LIST(APPEND TComb_Sources "TComb.def") 18 | # endif() 19 | ENDIF() 20 | 21 | IF( MSVC_IDE ) 22 | # Ninja, unfortunately, seems to have some issues with using rc.exe 23 | LIST(APPEND TComb_Sources "TComb.rc") 24 | ENDIF() 25 | -------------------------------------------------------------------------------- /TComb/PlanarFrame.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ** My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar 3 | ** to packed conversions, and always gives 16 bit alignment for all 4 | ** planes. Supports YV12/YUY2 frames from avisynth, can do any planar format 5 | ** internally. 6 | ** 7 | ** Copyright (C) 2005-2006 Kevin Stone 8 | ** 9 | ** This program is free software; you can redistribute it and/or modify 10 | ** it under the terms of the GNU General Public License as published by 11 | ** the Free Software Foundation; either version 2 of the License, or 12 | ** (at your option) any later version. 13 | ** 14 | ** This program is distributed in the hope that it will be useful, 15 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | ** GNU General Public License for more details. 18 | ** 19 | ** You should have received a copy of the GNU General Public License 20 | ** along with this program; if not, write to the Free Software 21 | ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22 | */ 23 | 24 | #include "PlanarFrame.h" 25 | #include "avs/cpuid.h" 26 | #include "common.h" 27 | #include 28 | #ifdef INTEL_INTRINSICS 29 | #include 30 | #endif 31 | 32 | // 8 bits only!!! 33 | 34 | PlanarFrame::PlanarFrame(int cpuFlags) 35 | { 36 | ypitch = uvpitch = 0; 37 | ywidth = uvwidth = 0; 38 | yheight = uvheight = 0; 39 | y = u = v = NULL; 40 | useSIMD = true; 41 | packed = false; 42 | cpu = cpuFlags; 43 | } 44 | 45 | PlanarFrame::PlanarFrame(VideoInfo &viInfo, int cpuFlags) 46 | { 47 | ypitch = uvpitch = 0; 48 | ywidth = uvwidth = 0; 49 | yheight = uvheight = 0; 50 | y = u = v = NULL; 51 | useSIMD = true; 52 | packed = false; 53 | cpu = cpuFlags; 54 | allocSpace(viInfo); 55 | } 56 | 57 | PlanarFrame::PlanarFrame(VideoInfo &viInfo, bool _packed, int cpuFlags) 58 | { 59 | ypitch = uvpitch = 0; 60 | ywidth = uvwidth = 0; 61 | yheight = uvheight = 0; 62 | y = u = v = NULL; 63 | useSIMD = true; 64 | packed = _packed; 65 | cpu = cpuFlags; 66 | allocSpace(viInfo); 67 | } 68 | 69 | PlanarFrame::~PlanarFrame() 70 | { 71 | if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; } 72 | if (u != NULL) { _aligned_free(u); u = NULL; } 73 | if (v != NULL) { _aligned_free(v); v = NULL; } 74 | } 75 | 76 | void PlanarFrame::FillMemDebug() 77 | { 78 | if (!debug) return; 79 | // MIN_ALIGNMENT bytes before 80 | uint32_t* pInt = (uint32_t*)(y); 81 | for (int i = 0; i < MIN_ALIGNMENT / sizeof(uint32_t); i++) 82 | pInt[i] = 0xDEADBEEF; 83 | // MIN_ALIGNMENT bytes after 84 | pInt = (uint32_t*)(y + MIN_ALIGNMENT + ypitch * yheight); 85 | for (int i = 0; i < MIN_ALIGNMENT / sizeof(uint32_t); i++) 86 | pInt[i] = 0xDEADBEEF; 87 | y = y + MIN_ALIGNMENT; // our real pointer after guard area 88 | } 89 | 90 | bool PlanarFrame::allocSpace(VideoInfo &viInfo) 91 | { 92 | if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; } 93 | if (u != NULL) { _aligned_free(u); u = NULL; } 94 | if (v != NULL) { _aligned_free(v); v = NULL; } 95 | int height = viInfo.height; 96 | int width = viInfo.width; 97 | if (viInfo.IsPlanar()) 98 | { 99 | ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT)); 100 | ywidth = width; 101 | yheight = height; 102 | 103 | debug_padding = debug ? MIN_ALIGNMENT : 0; 104 | y = (uint8_t*)_aligned_malloc(ypitch * yheight + 2 * debug_padding, MIN_ALIGNMENT); 105 | if (y == NULL) return false; 106 | FillMemDebug(); 107 | 108 | if (!viInfo.IsY()) { 109 | width >>= viInfo.GetPlaneWidthSubsampling(PLANAR_U); 110 | height >>= viInfo.GetPlaneHeightSubsampling(PLANAR_U); 111 | uvpitch = width + ((width % MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width % MIN_ALIGNMENT)); 112 | uvwidth = width; 113 | uvheight = height; 114 | u = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT); 115 | if (u == NULL) return false; 116 | v = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT); 117 | if (v == NULL) return false; 118 | } 119 | return true; 120 | } 121 | else if (viInfo.IsYUY2()) 122 | { 123 | debug_padding = 0; 124 | 125 | if (!packed) 126 | { 127 | ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT)); 128 | ywidth = width; 129 | yheight = height; 130 | width >>= 1; 131 | uvpitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT)); 132 | uvwidth = width; 133 | uvheight = height; 134 | y = (uint8_t*)_aligned_malloc(ypitch*yheight, MIN_ALIGNMENT); 135 | if (y == NULL) return false; 136 | u = (uint8_t*)_aligned_malloc(uvpitch*uvheight, MIN_ALIGNMENT); 137 | if (u == NULL) return false; 138 | v = (uint8_t*)_aligned_malloc(uvpitch*uvheight, MIN_ALIGNMENT); 139 | if (v == NULL) return false; 140 | return true; 141 | } 142 | else 143 | { 144 | width *= 2; 145 | ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT)); 146 | ywidth = width; 147 | yheight = height; 148 | y = (uint8_t*)_aligned_malloc(ypitch*yheight, MIN_ALIGNMENT); 149 | if (y == NULL) return false; 150 | uvpitch = uvwidth = uvheight = 0; 151 | u = v = NULL; 152 | return true; 153 | } 154 | } 155 | return false; 156 | } 157 | 158 | bool PlanarFrame::allocSpace(int specs[4]) 159 | { 160 | if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; } 161 | if (u != NULL) { _aligned_free(u); u = NULL; } 162 | if (v != NULL) { _aligned_free(v); v = NULL; } 163 | int height = specs[0]; 164 | int width = specs[2]; 165 | ypitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT)); 166 | ywidth = width; 167 | yheight = height; 168 | height = specs[1]; 169 | width = specs[3]; 170 | uvpitch = width + ((width%MIN_ALIGNMENT) == 0 ? 0 : MIN_ALIGNMENT - (width%MIN_ALIGNMENT)); 171 | uvwidth = width; 172 | uvheight = height; 173 | 174 | const int debugpadding = debug ? MIN_ALIGNMENT : 0; 175 | y = (uint8_t*)_aligned_malloc(ypitch * yheight + 2 * debugpadding, MIN_ALIGNMENT); 176 | if (y == NULL) return false; 177 | FillMemDebug(); 178 | 179 | if (uvpitch) { 180 | u = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT); 181 | if (u == NULL) return false; 182 | v = (uint8_t*)_aligned_malloc(uvpitch * uvheight, MIN_ALIGNMENT); 183 | if (v == NULL) return false; 184 | } 185 | return true; 186 | } 187 | 188 | 189 | void PlanarFrame::createPlanar(int yheight, int uvheight, int ywidth, int uvwidth) 190 | { 191 | int specs[4] = { yheight, uvheight, ywidth, uvwidth }; 192 | allocSpace(specs); 193 | } 194 | 195 | void PlanarFrame::createPlanar(int height, int width, int chroma_format) 196 | { 197 | int specs[4]; 198 | if (chroma_format <= PLANAR_420) // 420 199 | { 200 | specs[0] = height; specs[1] = height >> 1; 201 | specs[2] = width; specs[3] = width >> 1; 202 | } 203 | else if (chroma_format == PLANAR_422) // 422 204 | { 205 | specs[0] = height; specs[1] = height; 206 | specs[2] = width; specs[3] = width >> 1; 207 | } 208 | else if (chroma_format == PLANAR_444) // 444 209 | { 210 | specs[0] = height; specs[1] = height; 211 | specs[2] = width; specs[3] = width; 212 | } 213 | else if (chroma_format == PLANAR_411) // 411 214 | { 215 | specs[0] = height; specs[1] = height; 216 | specs[2] = width; specs[3] = width >> 2; 217 | } 218 | else if (chroma_format == PLANAR_400) // greyscale 219 | { 220 | specs[0] = height; specs[1] = 0; 221 | specs[2] = width; specs[3] = 0; 222 | } 223 | allocSpace(specs); 224 | } 225 | 226 | void PlanarFrame::createFromProfile(VideoInfo &viInfo) 227 | { 228 | allocSpace(viInfo); 229 | } 230 | 231 | void PlanarFrame::createFromFrame(PVideoFrame &frame, VideoInfo &viInfo) 232 | { 233 | allocSpace(viInfo); 234 | copyInternalFrom(frame, viInfo); 235 | } 236 | 237 | void PlanarFrame::createFromPlanar(PlanarFrame &frame) 238 | { 239 | int specs[4] = { frame.yheight, frame.uvheight, frame.ywidth, frame.uvwidth }; 240 | allocSpace(specs); 241 | copyInternalFrom(frame); 242 | } 243 | 244 | void PlanarFrame::copyFrom(PVideoFrame &frame, VideoInfo &viInfo) 245 | { 246 | copyInternalFrom(frame, viInfo); 247 | } 248 | 249 | void PlanarFrame::copyFrom(PlanarFrame &frame) 250 | { 251 | copyInternalFrom(frame); 252 | } 253 | 254 | void PlanarFrame::copyTo(PVideoFrame &frame, VideoInfo &viInfo) 255 | { 256 | copyInternalTo(frame, viInfo); 257 | } 258 | 259 | void PlanarFrame::copyTo(PlanarFrame &frame) 260 | { 261 | copyInternalTo(frame); 262 | } 263 | 264 | void PlanarFrame::copyPlaneTo(PlanarFrame &frame, int plane) 265 | { 266 | copyInternalPlaneTo(frame, plane); 267 | } 268 | 269 | uint8_t* PlanarFrame::GetPtr(int plane) 270 | { 271 | if (plane == 0) return y; 272 | if (plane == 1) return u; 273 | return v; 274 | } 275 | 276 | int PlanarFrame::NumComponents() { 277 | if (uvpitch) 278 | return 3; 279 | return 1; 280 | } 281 | 282 | int PlanarFrame::GetWidth(int plane) 283 | { 284 | if (plane == 0) return ywidth; 285 | else return uvwidth; 286 | } 287 | 288 | int PlanarFrame::GetHeight(int plane) 289 | { 290 | if (plane == 0) return yheight; 291 | else return uvheight; 292 | } 293 | 294 | int PlanarFrame::GetPitch(int plane) 295 | { 296 | if (plane == 0) return ypitch; 297 | else return uvpitch; 298 | } 299 | 300 | void PlanarFrame::freePlanar() 301 | { 302 | if (y != NULL) { _aligned_free(y - debug_padding); y = NULL; } 303 | if (u != NULL) { _aligned_free(u); u = NULL; } 304 | if (v != NULL) { _aligned_free(v); v = NULL; } 305 | ypitch = uvpitch = 0; 306 | ywidth = uvwidth = 0; 307 | yheight = uvheight = 0; 308 | } 309 | 310 | void PlanarFrame::copyInternalFrom(PVideoFrame &frame, VideoInfo &viInfo) 311 | { 312 | if (y == NULL) return; 313 | if (viInfo.IsPlanar()) 314 | { 315 | BitBlt(y, ypitch, frame->GetReadPtr(PLANAR_Y), frame->GetPitch(PLANAR_Y), 316 | frame->GetRowSize(PLANAR_Y), frame->GetHeight(PLANAR_Y)); 317 | if (u == NULL || v == NULL) return; 318 | BitBlt(u, uvpitch, frame->GetReadPtr(PLANAR_U), frame->GetPitch(PLANAR_U), 319 | frame->GetRowSize(PLANAR_U), frame->GetHeight(PLANAR_U)); 320 | BitBlt(v, uvpitch, frame->GetReadPtr(PLANAR_V), frame->GetPitch(PLANAR_V), 321 | frame->GetRowSize(PLANAR_V), frame->GetHeight(PLANAR_V)); 322 | } 323 | else if (viInfo.IsYUY2()) 324 | { 325 | convYUY2to422(frame->GetReadPtr(), y, u, v, frame->GetPitch(), ypitch, uvpitch, 326 | viInfo.width, viInfo.height); 327 | } 328 | } 329 | 330 | void PlanarFrame::copyInternalFrom(PlanarFrame &frame) 331 | { 332 | if (y == NULL) return; 333 | BitBlt(y, ypitch, frame.y, frame.ypitch, frame.ywidth, frame.yheight); 334 | if (u == NULL || v == NULL) return; 335 | BitBlt(u, uvpitch, frame.u, frame.uvpitch, frame.uvwidth, frame.uvheight); 336 | BitBlt(v, uvpitch, frame.v, frame.uvpitch, frame.uvwidth, frame.uvheight); 337 | } 338 | 339 | void PlanarFrame::copyInternalTo(PVideoFrame &frame, VideoInfo &viInfo) 340 | { 341 | if (y == NULL) return; 342 | if (viInfo.IsPlanar()) 343 | { 344 | BitBlt(frame->GetWritePtr(PLANAR_Y), frame->GetPitch(PLANAR_Y), y, ypitch, ywidth, yheight); 345 | if (u == NULL || v == NULL) return; 346 | BitBlt(frame->GetWritePtr(PLANAR_U), frame->GetPitch(PLANAR_U), u, uvpitch, uvwidth, uvheight); 347 | BitBlt(frame->GetWritePtr(PLANAR_V), frame->GetPitch(PLANAR_V), v, uvpitch, uvwidth, uvheight); 348 | } 349 | else if (viInfo.IsYUY2()) 350 | { 351 | conv422toYUY2(y, u, v, frame->GetWritePtr(), ypitch, uvpitch, frame->GetPitch(), ywidth, yheight); 352 | } 353 | } 354 | 355 | void PlanarFrame::copyInternalTo(PlanarFrame &frame) 356 | { 357 | if (y == NULL) return; 358 | BitBlt(frame.y, frame.ypitch, y, ypitch, ywidth, yheight); 359 | if (u == NULL || v == NULL) return; 360 | BitBlt(frame.u, frame.uvpitch, u, uvpitch, uvwidth, uvheight); 361 | BitBlt(frame.v, frame.uvpitch, v, uvpitch, uvwidth, uvheight); 362 | } 363 | 364 | void PlanarFrame::copyInternalPlaneTo(PlanarFrame &frame, int plane) 365 | { 366 | if (plane == 0 && y != NULL) 367 | BitBlt(frame.y, frame.ypitch, y, ypitch, ywidth, yheight); 368 | else if (plane == 1 && u != NULL) 369 | BitBlt(frame.u, frame.uvpitch, u, uvpitch, uvwidth, uvheight); 370 | else if (plane == 2 && v != NULL) 371 | BitBlt(frame.v, frame.uvpitch, v, uvpitch, uvwidth, uvheight); 372 | } 373 | 374 | void PlanarFrame::copyChromaTo(PlanarFrame &dst) 375 | { 376 | BitBlt(dst.u, dst.uvpitch, u, uvpitch, dst.uvwidth, dst.uvheight); 377 | BitBlt(dst.v, dst.uvpitch, v, uvpitch, dst.uvwidth, dst.uvheight); 378 | } 379 | 380 | void PlanarFrame::copyToForBMP(PVideoFrame &dst, VideoInfo &viInfo) 381 | { 382 | uint8_t *dstp = dst->GetWritePtr(PLANAR_Y); 383 | if (viInfo.IsPlanar()) 384 | { 385 | int out_pitch = (ywidth + 3) & -4; 386 | BitBlt(dstp, out_pitch, y, ypitch, ywidth, yheight); 387 | BitBlt(dstp + (out_pitch*yheight), out_pitch >> 1, v, uvpitch, uvwidth, uvheight); 388 | BitBlt(dstp + (out_pitch*yheight) + ((out_pitch >> 1)*uvheight), out_pitch >> 1, u, uvpitch, uvwidth, uvheight); 389 | } 390 | else 391 | { 392 | int out_pitch = (dst->GetRowSize(PLANAR_Y) + 3) & -4; 393 | conv422toYUY2(y, u, v, dstp, ypitch, uvpitch, out_pitch, viInfo.width, viInfo.height); 394 | } 395 | } 396 | 397 | PlanarFrame& PlanarFrame::operator=(PlanarFrame &ob2) 398 | { 399 | cpu = ob2.cpu; 400 | ypitch = ob2.ypitch; 401 | yheight = ob2.yheight; 402 | ywidth = ob2.ywidth; 403 | uvpitch = ob2.uvpitch; 404 | uvheight = ob2.uvheight; 405 | uvwidth = ob2.uvwidth; 406 | this->copyFrom(ob2); 407 | return *this; 408 | } 409 | 410 | void PlanarFrame::convYUY2to422(const uint8_t *src, uint8_t *py, uint8_t *pu, 411 | uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height) 412 | { 413 | #ifdef INTEL_INTRINSICS 414 | if ((cpu&CPUF_SSE2) && useSIMD) 415 | convYUY2to422_SSE2(src, py, pu, pv, pitch1, pitch2Y, pitch2UV, width, height); 416 | else 417 | #endif 418 | { 419 | width >>= 1; 420 | for (int y = 0; y < height; ++y) 421 | { 422 | for (int x = 0; x < width; ++x) 423 | { 424 | py[x << 1] = src[x << 2]; 425 | pu[x] = src[(x << 2) + 1]; 426 | py[(x << 1) + 1] = src[(x << 2) + 2]; 427 | pv[x] = src[(x << 2) + 3]; 428 | } 429 | py += pitch2Y; 430 | pu += pitch2UV; 431 | pv += pitch2UV; 432 | src += pitch1; 433 | } 434 | } 435 | } 436 | 437 | 438 | #ifdef INTEL_INTRINSICS 439 | void PlanarFrame::convYUY2to422_SSE2(const uint8_t *src, uint8_t *py, uint8_t *pu, 440 | uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height) 441 | { 442 | width >>= 1; // mov ecx, width 443 | __m128i Ymask = _mm_set1_epi16(0x00FF); 444 | for (int y = 0; y < height; y++) { 445 | for (int x = 0; x < width; x += 4) { 446 | __m128i fullsrc = _mm_load_si128(reinterpret_cast(src + x * 4)); // VYUYVYUYVYUYVYUY 447 | __m128i yy = _mm_and_si128(fullsrc, Ymask); // 0Y0Y0Y0Y0Y0Y0Y0Y 448 | __m128i uvuv = _mm_srli_epi16(fullsrc, 8); // 0V0U0V0U0V0U0V0U 449 | yy = _mm_packus_epi16(yy, yy); // xxxxxxxxYYYYYYYY 450 | uvuv = _mm_packus_epi16(uvuv, uvuv); // xxxxxxxxVUVUVUVU 451 | __m128i uu = _mm_and_si128(uvuv, Ymask); // xxxxxxxx0U0U0U0U 452 | __m128i vv = _mm_srli_epi16(uvuv, 8); // xxxxxxxx0V0V0V0V 453 | uu = _mm_packus_epi16(uu, uu); // xxxxxxxxxxxxUUUU 454 | vv = _mm_packus_epi16(vv, vv); // xxxxxxxxxxxxVVVV 455 | _mm_storel_epi64(reinterpret_cast<__m128i *>(py + x * 2), yy); // store y 456 | *(uint32_t *)(pu + x) = _mm_cvtsi128_si32(uu); // store u 457 | *(uint32_t *)(pv + x) = _mm_cvtsi128_si32(vv); // store v 458 | } 459 | src += pitch1; 460 | py += pitch2Y; 461 | pu += pitch2UV; 462 | pv += pitch2UV; 463 | } 464 | } 465 | #endif 466 | 467 | void PlanarFrame::conv422toYUY2(uint8_t *py, uint8_t *pu, uint8_t *pv, 468 | uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height) 469 | { 470 | #ifdef INTEL_INTRINSICS 471 | if ((cpu&CPUF_SSE2) && useSIMD) 472 | conv422toYUY2_SSE2(py, pu, pv, dst, pitch1Y, pitch1UV, pitch2, width, height); 473 | else 474 | #endif 475 | { 476 | width >>= 1; 477 | for (int y = 0; y < height; ++y) 478 | { 479 | for (int x = 0; x < width; ++x) 480 | { 481 | dst[x << 2] = py[x << 1]; 482 | dst[(x << 2) + 1] = pu[x]; 483 | dst[(x << 2) + 2] = py[(x << 1) + 1]; 484 | dst[(x << 2) + 3] = pv[x]; 485 | } 486 | py += pitch1Y; 487 | pu += pitch1UV; 488 | pv += pitch1UV; 489 | dst += pitch2; 490 | } 491 | } 492 | } 493 | 494 | 495 | #ifdef INTEL_INTRINSICS 496 | void PlanarFrame::conv422toYUY2_SSE2(uint8_t *py, uint8_t *pu, uint8_t *pv, 497 | uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height) 498 | { 499 | width >>= 1; // mov ecx, width 500 | for (int y = 0; y < height; y++) { 501 | for (int x = 0; x < width; x += 4) { 502 | __m128i yy = _mm_loadl_epi64(reinterpret_cast(py + x * 2)); // YYYYYYYY 503 | __m128i uu = _mm_castps_si128(_mm_load_ss(reinterpret_cast(pu + x))); // 000000000000UUUU 504 | __m128i vv = _mm_castps_si128(_mm_load_ss(reinterpret_cast(pv + x))); // 000000000000VVVV 505 | __m128i uvuv = _mm_unpacklo_epi8(uu, vv); // 00000000VUVUVUVU 506 | __m128i yuyv = _mm_unpacklo_epi8(yy,uvuv); // VYUYVYUYVYUYVYUY 507 | _mm_store_si128(reinterpret_cast<__m128i *>(dst + x * 4), yuyv); 508 | } 509 | dst += pitch2; 510 | py += pitch1Y; 511 | pu += pitch1UV; 512 | pv += pitch1UV; 513 | } 514 | } 515 | #endif 516 | 517 | // Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. 518 | // http://www.avisynth.org 519 | 520 | // This program is free software; you can redistribute it and/or modify 521 | // it under the terms of the GNU General Public License as published by 522 | // the Free Software Foundation; either version 2 of the License, or 523 | // (at your option) any later version. 524 | // 525 | // This program is distributed in the hope that it will be useful, 526 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 527 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 528 | // GNU General Public License for more details. 529 | // 530 | // You should have received a copy of the GNU General Public License 531 | // along with this program; if not, write to the Free Software 532 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 533 | // http://www.gnu.org/copyleft/gpl.html . 534 | // 535 | // Linking Avisynth statically or dynamically with other modules is making a 536 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 537 | // General Public License cover the whole combination. 538 | // 539 | // As a special exception, the copyright holders of Avisynth give you 540 | // permission to link Avisynth with independent modules that communicate with 541 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 542 | // terms of these independent modules, and to copy and distribute the 543 | // resulting combined work under terms of your choice, provided that 544 | // every copy of the combined work is accompanied by a complete copy of 545 | // the source code of Avisynth (the version of Avisynth used to produce the 546 | // combined work), being distributed under the terms of the GNU General 547 | // Public License plus this exception. An independent module is a module 548 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 549 | // import and export plugins, or graphical user interfaces. 550 | 551 | // from Avisynth 2.55 source... 552 | // copied so we don't need an 553 | // IScriptEnvironment pointer 554 | // to call it 555 | 556 | #include "avisynth.h" 557 | #include 558 | 559 | void PlanarFrame::BitBlt(uint8_t* dstp, int dst_pitch, const uint8_t* srcp, 560 | int src_pitch, int row_size, int height) 561 | { 562 | if (!height || !row_size) return; 563 | if (height == 1 || (dst_pitch == src_pitch && src_pitch == row_size)) 564 | memcpy(dstp, srcp, src_pitch * height); 565 | else 566 | { 567 | for (int y = height; y > 0; --y) 568 | { 569 | memcpy(dstp, srcp, row_size); 570 | dstp += dst_pitch; 571 | srcp += src_pitch; 572 | } 573 | } 574 | } 575 | 576 | int PlanarFrame::CheckMemory() 577 | { 578 | if (!debug) return 0; 579 | if (!y) return 0; 580 | // check buffer overrun 581 | uint32_t* pInt = (uint32_t*)(y - MIN_ALIGNMENT); 582 | for (int i = 0; i < MIN_ALIGNMENT / sizeof(uint32_t); i++) 583 | if (pInt[i] != 0xDEADBEEF) 584 | return 1; 585 | return 0; 586 | } 587 | 588 | -------------------------------------------------------------------------------- /TComb/PlanarFrame.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar 3 | ** to packed conversions, and always gives 16 bit alignment for all 4 | ** planes. Supports YV12/YUY2 frames from avisynth, can do any planar format 5 | ** internally. 6 | ** 7 | ** Copyright (C) 2005-2006 Kevin Stone 8 | ** 9 | ** This program is free software; you can redistribute it and/or modify 10 | ** it under the terms of the GNU General Public License as published by 11 | ** the Free Software Foundation; either version 2 of the License, or 12 | ** (at your option) any later version. 13 | ** 14 | ** This program is distributed in the hope that it will be useful, 15 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | ** GNU General Public License for more details. 18 | ** 19 | ** You should have received a copy of the GNU General Public License 20 | ** along with this program; if not, write to the Free Software 21 | ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22 | */ 23 | 24 | #ifndef __PlanarFrame_H__ 25 | #define __PlanarFrame_H__ 26 | 27 | #include 28 | #include "avisynth.h" 29 | 30 | #define MIN_ALIGNMENT 64 31 | 32 | #define PLANAR_420 1 33 | #define PLANAR_422 2 34 | #define PLANAR_444 3 35 | #define PLANAR_411 4 36 | #define PLANAR_400 5 37 | 38 | class PlanarFrame 39 | { 40 | private: 41 | int cpu; 42 | bool useSIMD, packed; 43 | int ypitch, uvpitch; 44 | int ywidth, uvwidth; 45 | int yheight, uvheight; 46 | bool debug = false; 47 | int debug_padding = 0; 48 | uint8_t *y, *u, *v; 49 | bool allocSpace(VideoInfo &viInfo); 50 | bool allocSpace(int specs[4]); 51 | void copyInternalFrom(PVideoFrame &frame, VideoInfo &viInfo); 52 | void copyInternalFrom(PlanarFrame &frame); 53 | void copyInternalTo(PVideoFrame &frame, VideoInfo &viInfo); 54 | void copyInternalTo(PlanarFrame &frame); 55 | void copyInternalPlaneTo(PlanarFrame &frame, int plane); 56 | void convYUY2to422(const uint8_t *src, uint8_t *py, uint8_t *pu, 57 | uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height); 58 | void conv422toYUY2(uint8_t *py, uint8_t *pu, uint8_t *pv, 59 | uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height); 60 | #ifdef INTEL_INTRINSICS 61 | void convYUY2to422_SSE2(const uint8_t* src, uint8_t* py, uint8_t* pu, 62 | uint8_t* pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height); 63 | void conv422toYUY2_SSE2(uint8_t *py, uint8_t *pu, uint8_t *pv, 64 | uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height); 65 | #endif 66 | 67 | public: 68 | PlanarFrame(int cpuInfo); 69 | PlanarFrame(VideoInfo &viInfo, int cpuInfo); 70 | PlanarFrame(VideoInfo &viInfo, bool _packed, int cpuInfo); 71 | ~PlanarFrame(); 72 | void createPlanar(int yheight, int uvheight, int ywidth, int uvwidth); 73 | void createPlanar(int height, int width, int chroma_format); 74 | void createFromProfile(VideoInfo &viInfo); 75 | void createFromFrame(PVideoFrame &frame, VideoInfo &viInfo); 76 | void createFromPlanar(PlanarFrame &frame); 77 | void copyFrom(PVideoFrame &frame, VideoInfo &viInfo); 78 | void copyTo(PVideoFrame &frame, VideoInfo &viInfo); 79 | void copyFrom(PlanarFrame &frame); 80 | void copyTo(PlanarFrame &frame); 81 | void copyChromaTo(PlanarFrame &dst); 82 | void copyToForBMP(PVideoFrame &dst, VideoInfo &viInfo); 83 | void copyPlaneTo(PlanarFrame &dst, int plane); 84 | void freePlanar(); 85 | uint8_t* GetPtr(int plane = 0); 86 | int NumComponents(); 87 | int GetWidth(int plane = 0); 88 | int GetHeight(int plane = 0); 89 | int GetPitch(int plane = 0); 90 | void BitBlt(uint8_t* dstp, int dst_pitch, const uint8_t* srcp, 91 | int src_pitch, int row_size, int height); 92 | int CheckMemory(); 93 | void FillMemDebug(); 94 | PlanarFrame& operator=(PlanarFrame &ob2); 95 | }; 96 | 97 | #endif -------------------------------------------------------------------------------- /TComb/TComb.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** TComb v2.x for Avisynth 2.6 and Avisynth+ 3 | ** 4 | ** TComb is a temporal comb filter (it reduces cross-luminance (rainbowing) 5 | ** and cross-chrominance (dot crawl) artifacts in static areas of the picture). 6 | ** It will ONLY work with NTSC material, and WILL NOT work with telecined material 7 | ** where the rainbowing/dotcrawl was introduced prior to the telecine process! 8 | ** It must be used before ivtc or deinterlace. 9 | ** 10 | ** Copyright (C) 2021 Ferenc Pintér 11 | ** 12 | ** Copyright (C) 2015 Shane Panke 13 | ** 14 | ** Copyright (C) 2005-2006 Kevin Stone 15 | ** 16 | ** This program is free software; you can redistribute it and/or modify 17 | ** it under the terms of the GNU General Public License as published by 18 | ** the Free Software Foundation; either version 2 of the License, or 19 | ** (at your option) any later version. 20 | ** 21 | ** This program is distributed in the hope that it will be useful, 22 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of 23 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 24 | ** GNU General Public License for more details. 25 | ** 26 | ** You should have received a copy of the GNU General Public License 27 | ** along with this program; if not, write to the Free Software 28 | ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 29 | */ 30 | 31 | #if defined(_WIN32) && !defined(INTEL_INTRINSICS) 32 | #error Forgot to set INTEL_INTRINSICS? Comment out this line if not 33 | #endif 34 | 35 | #include "avisynth.h" 36 | #include "common.h" 37 | #include 38 | #include 39 | #include "PlanarFrame.h" 40 | 41 | // version appears in .rc as well 42 | #define VERSION "v2.3" 43 | 44 | //#define OLD_ASM 45 | 46 | #define min3(a,b,c) std::min(std::min(a,b),c) 47 | #define max3(a,b,c) std::max(std::max(a,b),c) 48 | #define min4(a,b,c,d) std::min(std::min(a,b),std::min(c,d)) 49 | #define max4(a,b,c,d) std::max(std::max(a,b),std::max(c,d)) 50 | 51 | class TCombFrame 52 | { 53 | public: 54 | int fnum; 55 | bool sc; 56 | bool isValid[11]; 57 | PlanarFrame* orig, * msk1, * msk2; 58 | PlanarFrame** b, * avg, * omsk; 59 | TCombFrame(); 60 | TCombFrame(VideoInfo& vi, int cpuFlags); 61 | ~TCombFrame(); 62 | void setFNum(int i); 63 | }; 64 | 65 | class TCombCache 66 | { 67 | public: 68 | TCombFrame** frames; 69 | int start_pos, size; 70 | TCombCache(); 71 | TCombCache(int _size, VideoInfo& vi, int cpuFlags); 72 | ~TCombCache(); 73 | void resetCacheStart(int first, int last); 74 | int getCachePos(int n); 75 | }; 76 | 77 | class TComb : public GenericVideoFilter 78 | { 79 | public: 80 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 81 | TComb(PClip _child, int _mode, int _fthreshL, int _fthreshC, int _othreshL, 82 | int othreshC, bool _map, double _scthresh, bool _debug, int _opt, IScriptEnvironment* env); 83 | ~TComb(); 84 | private: 85 | bool map, debug; 86 | int fthreshL, fthreshC; 87 | int othreshL, othreshC; 88 | int mode, opt; 89 | unsigned long diffmaxsc; 90 | double scthresh; 91 | PlanarFrame* dstPF, * tmpPF; 92 | PlanarFrame* minPF, * maxPF; 93 | PlanarFrame* padPF; 94 | TCombCache* tdc; 95 | char buf[256]; 96 | int mapn(int n); 97 | void getAverages(int lc, IScriptEnvironment* env); 98 | void buildOscillationMasks(int lc, IScriptEnvironment* env); 99 | void getFinalMasks(int lc, IScriptEnvironment* env); 100 | void insertFrame(PVideoFrame& src, int pos, int fnum, int lc, IScriptEnvironment* env); 101 | void buildDiffMask(TCombFrame* tf1, TCombFrame* tf2, int lc, IScriptEnvironment* env); 102 | void buildDiffMasks(int lc, IScriptEnvironment* env); 103 | void absDiff(PlanarFrame* src1, PlanarFrame* src2, PlanarFrame* dst, 104 | int lc, IScriptEnvironment* env); 105 | void absDiffAndMinMask(PlanarFrame* src1, PlanarFrame* src2, PlanarFrame* dst, 106 | int lc, IScriptEnvironment* env); 107 | void VerticalBlur3(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env); 108 | void HorizontalBlur3(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env); 109 | void getStartStop(int lc, int& start, int& stop); 110 | void buildFinalFrame(PlanarFrame* p2, PlanarFrame* p1, PlanarFrame* src, 111 | PlanarFrame* n1, PlanarFrame* n2, PlanarFrame* m1, PlanarFrame* m2, PlanarFrame* m3, 112 | PlanarFrame* dst, int lc, IScriptEnvironment* env); 113 | void copyPad(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env); 114 | void MinMax(PlanarFrame* src, PlanarFrame* dmin, PlanarFrame* dmax, int lc, 115 | IScriptEnvironment* env); 116 | void HorizontalBlur6(PlanarFrame* src, PlanarFrame* dst, int lc, IScriptEnvironment* env); 117 | void absDiffAndMinMaskThresh(PlanarFrame* src1, PlanarFrame* src2, PlanarFrame* dst, 118 | int lc, IScriptEnvironment* env); 119 | void buildFinalMask(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* m1, 120 | PlanarFrame* dst, int lc, IScriptEnvironment* env); 121 | void calcAverages(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* dst, int lc, IScriptEnvironment* env); 122 | void checkOscillation5(PlanarFrame* p2, PlanarFrame* p1, PlanarFrame* s1, 123 | PlanarFrame* n1, PlanarFrame* n2, PlanarFrame* dst, int lc, IScriptEnvironment* env); 124 | void checkAvgOscCorrelation(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* s3, 125 | PlanarFrame* s4, PlanarFrame* dst, int lc, IScriptEnvironment* env); 126 | void or3Masks(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* s3, 127 | PlanarFrame* dst, int lc, IScriptEnvironment* env); 128 | void orAndMasks(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* dst, int lc, IScriptEnvironment* env); 129 | void andMasks(PlanarFrame* s1, PlanarFrame* s2, PlanarFrame* dst, int lc, IScriptEnvironment* env); 130 | bool checkSceneChange(PlanarFrame* s1, PlanarFrame* s2, int n, IScriptEnvironment* env); 131 | void andNeighborsInPlace(PlanarFrame* src, int lc, IScriptEnvironment* env); 132 | }; 133 | 134 | void checkSceneChangePlanar_1_SSE2_simd(const uint8_t* prvp, const uint8_t* srcp, 135 | int height, int width, int prv_pitch, int src_pitch, uint64_t& diffp); 136 | 137 | template 138 | void checkSceneChangePlanar_1_c(const pixel_t* prvp, const pixel_t* srcp, 139 | int height, int width, int prv_pitch, int src_pitch, uint64_t& diffp); 140 | 141 | void andMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height); 142 | void andMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height); 143 | 144 | void orAndMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height); 145 | void orAndMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height); 146 | 147 | void or3Masks_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * s3p, uint8_t * dstp, int stride, int width, int height); 148 | void or3Masks_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, uint8_t* dstp, int stride, int width, int height); 149 | 150 | void calcAverages_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, uint8_t * dstp, int stride, int width, int height); 151 | void calcAverages_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height); 152 | 153 | void MinMax_SSE2_simd(const uint8_t * srcp, uint8_t * dstpMin, uint8_t * dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh); 154 | void MinMax_c(const uint8_t* srcp, uint8_t* dstpMin, uint8_t* dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh); 155 | 156 | void absDiff_SSE2_simd(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height); 157 | void absDiff_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height); 158 | 159 | void buildFinalMask_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * m1p, uint8_t * dstp, int stride, int width, int height, int thresh); 160 | void buildFinalMask_c(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * m1p, uint8_t * dstp, int stride, int width, int height, int thresh); 161 | 162 | void checkOscillation5_SSE2_simd(const uint8_t * p2p, const uint8_t * p1p, const uint8_t * s1p, const uint8_t * n1p, const uint8_t * n2p, uint8_t * dstp, int stride, int width, int height, int thresh); 163 | void checkOscillation5_c(const uint8_t * p2p, const uint8_t * p1p, const uint8_t * s1p, const uint8_t * n1p, const uint8_t * n2p, uint8_t * dstp, int stride, int width, int height, int thresh); 164 | 165 | void absDiffAndMinMaskThresh_SSE2_simd(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height, int thresh); 166 | void absDiffAndMinMaskThresh_c(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height, int thresh); 167 | 168 | void absDiffAndMinMask_SSE2_simd(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height); 169 | void absDiffAndMinMask_c(const uint8_t * srcp1, const uint8_t * srcp2, uint8_t * dstp, int stride, int width, int height); 170 | 171 | void checkAvgOscCorrelation_SSE2_simd(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * s3p, const uint8_t * s4p, uint8_t * dstp, int stride, int width, int height, int thresh); 172 | void checkAvgOscCorrelation_c(const uint8_t * s1p, const uint8_t * s2p, const uint8_t * s3p, const uint8_t * s4p, uint8_t * dstp, int stride, int width, int height, int thresh); 173 | 174 | void VerticalBlur3_SSE2_simd(const uint8_t * srcp, uint8_t * dstp, int stride, int width, int height); 175 | void VerticalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height); 176 | 177 | void HorizontalBlur3_SSE2_simd(const uint8_t * srcp, uint8_t * dstp, int stride, int width, int height); 178 | void HorizontalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height); 179 | 180 | void HorizontalBlur6_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height); 181 | void HorizontalBlur6_c(const uint8_t * srcp, uint8_t * dstp, int stride, int width, int height); 182 | 183 | void andNeighborsInPlace_SSE2_simd(uint8_t * srcp, int stride, int width, int height); 184 | // no distinct C here 185 | 186 | -------------------------------------------------------------------------------- /TComb/TComb.rc: -------------------------------------------------------------------------------- 1 | // Microsoft Visual C++ generated resource script. 2 | // 3 | #include "resource.h" 4 | 5 | #define APSTUDIO_READONLY_SYMBOLS 6 | ///////////////////////////////////////////////////////////////////////////// 7 | // 8 | // Generated from the TEXTINCLUDE 2 resource. 9 | // 10 | #include "winres.h" 11 | 12 | ///////////////////////////////////////////////////////////////////////////// 13 | #undef APSTUDIO_READONLY_SYMBOLS 14 | 15 | ///////////////////////////////////////////////////////////////////////////// 16 | // English (United States) resources 17 | 18 | #if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) 19 | LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US 20 | #pragma code_page(1252) 21 | 22 | #ifdef APSTUDIO_INVOKED 23 | ///////////////////////////////////////////////////////////////////////////// 24 | // 25 | // TEXTINCLUDE 26 | // 27 | 28 | 1 TEXTINCLUDE 29 | BEGIN 30 | "resource.h\0" 31 | END 32 | 33 | 2 TEXTINCLUDE 34 | BEGIN 35 | "#include ""winres.h""\r\n" 36 | "\0" 37 | END 38 | 39 | 3 TEXTINCLUDE 40 | BEGIN 41 | "\r\n" 42 | "\0" 43 | END 44 | 45 | #endif // APSTUDIO_INVOKED 46 | 47 | 48 | ///////////////////////////////////////////////////////////////////////////// 49 | // 50 | // Version 51 | // 52 | 53 | VS_VERSION_INFO VERSIONINFO 54 | FILEVERSION 2,3,0,0 55 | PRODUCTVERSION 2,3,0,0 56 | FILEFLAGSMASK 0x17L 57 | #ifdef _DEBUG 58 | FILEFLAGS 0x1L 59 | #else 60 | FILEFLAGS 0x0L 61 | #endif 62 | FILEOS 0x4L 63 | FILETYPE 0x2L 64 | FILESUBTYPE 0x0L 65 | BEGIN 66 | BLOCK "StringFileInfo" 67 | BEGIN 68 | BLOCK "040904b0" 69 | BEGIN 70 | VALUE "FileDescription", "TComb for Avisynth 2.6 and Avisynth+" 71 | VALUE "FileVersion", "2.3.0.0" 72 | VALUE "LegalCopyright", "Copyright (C) 2005-2006 Kevin Stone 2015- et al." 73 | VALUE "OriginalFilename", "TComb.dll" 74 | VALUE "ProductVersion", "2.3.0.0" 75 | END 76 | END 77 | BLOCK "VarFileInfo" 78 | BEGIN 79 | VALUE "Translation", 0x409, 1200 80 | END 81 | END 82 | 83 | #endif // English (United States) resources 84 | ///////////////////////////////////////////////////////////////////////////// 85 | 86 | 87 | 88 | #ifndef APSTUDIO_INVOKED 89 | ///////////////////////////////////////////////////////////////////////////// 90 | // 91 | // Generated from the TEXTINCLUDE 3 resource. 92 | // 93 | 94 | 95 | ///////////////////////////////////////////////////////////////////////////// 96 | #endif // not APSTUDIO_INVOKED 97 | 98 | -------------------------------------------------------------------------------- /TComb/TComb.sln: -------------------------------------------------------------------------------- 1 | Microsoft Visual Studio Solution File, Format Version 12.00 2 | # Visual Studio 2013 3 | VisualStudioVersion = 12.0.30501.0 4 | MinimumVisualStudioVersion = 10.0.40219.1 5 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TComb", "TComb.vcxproj", "{B4188B7A-C76E-4E35-946F-3477273D0A44}" 6 | EndProject 7 | Global 8 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 9 | Debug|Win32 = Debug|Win32 10 | Debug|x64 = Debug|x64 11 | Release|Win32 = Release|Win32 12 | Release|x64 = Release|x64 13 | EndGlobalSection 14 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 15 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|Win32.ActiveCfg = Debug|Win32 16 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|Win32.Build.0 = Debug|Win32 17 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|x64.ActiveCfg = Debug|x64 18 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Debug|x64.Build.0 = Debug|x64 19 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|Win32.ActiveCfg = Release|Win32 20 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|Win32.Build.0 = Release|Win32 21 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|x64.ActiveCfg = Release|x64 22 | {B4188B7A-C76E-4E35-946F-3477273D0A44}.Release|x64.Build.0 = Release|x64 23 | EndGlobalSection 24 | GlobalSection(SolutionProperties) = preSolution 25 | HideSolutionNode = FALSE 26 | EndGlobalSection 27 | EndGlobal 28 | -------------------------------------------------------------------------------- /TComb/TComb.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | 11 | 12 | 13 | 19 | 32 | 34 | 45 | 47 | 49 | 51 | 53 | 55 | 57 | 59 | 61 | 63 | 65 | 66 | 73 | 92 | 94 | 104 | 106 | 108 | 110 | 112 | 114 | 116 | 118 | 120 | 122 | 124 | 125 | 126 | 127 | 128 | 129 | 133 | 135 | 136 | 138 | 139 | 141 | 142 | 143 | 147 | 149 | 150 | 152 | 153 | 155 | 156 | 158 | 159 | 161 | 162 | 164 | 165 | 166 | 170 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | -------------------------------------------------------------------------------- /TComb/TComb.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {B4188B7A-C76E-4E35-946F-3477273D0A44} 23 | Win32Proj 24 | 10.0 25 | 26 | 27 | 28 | DynamicLibrary 29 | v142 30 | MultiByte 31 | true 32 | 33 | 34 | DynamicLibrary 35 | v142 36 | MultiByte 37 | 38 | 39 | DynamicLibrary 40 | v142 41 | MultiByte 42 | 43 | 44 | DynamicLibrary 45 | v142 46 | MultiByte 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | <_ProjectFileVersion>12.0.30501.0 67 | 68 | 69 | Debug\ 70 | Debug\ 71 | true 72 | 73 | 74 | true 75 | 76 | 77 | Release\ 78 | Release\ 79 | false 80 | 81 | 82 | false 83 | 84 | 85 | 86 | Disabled 87 | INTEL_INTRINSICS;WIN32;_DEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions) 88 | true 89 | EnableFastChecks 90 | MultiThreadedDebug 91 | true 92 | true 93 | 94 | Level3 95 | EditAndContinue 96 | NoListing 97 | 98 | 99 | $(OutDir)TComb.dll 100 | true 101 | $(OutDir)TComb.pdb 102 | Windows 103 | false 104 | false 105 | $(OutDir)TComb.lib 106 | MachineX86 107 | false 108 | 109 | 110 | 111 | 112 | Disabled 113 | INTEL_INTRINSICS;WIN32;_DEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions) 114 | EnableFastChecks 115 | MultiThreadedDebug 116 | true 117 | true 118 | 119 | 120 | Level3 121 | ProgramDatabase 122 | 123 | 124 | $(OutDir)TComb.dll 125 | true 126 | $(OutDir)TComb.pdb 127 | Windows 128 | false 129 | false 130 | $(OutDir)TComb.lib 131 | 132 | 133 | 134 | 135 | Full 136 | AnySuitable 137 | true 138 | Speed 139 | true 140 | false 141 | INTEL_INTRINSICS;WIN32;NDEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions) 142 | MultiThreaded 143 | false 144 | true 145 | 146 | Level3 147 | ProgramDatabase 148 | true 149 | NoListing 150 | 151 | 152 | $(OutDir)TComb.dll 153 | false 154 | Windows 155 | true 156 | true 157 | $(OutDir)TComb.lib 158 | MachineX86 159 | UseLinkTimeCodeGeneration 160 | false 161 | 162 | 163 | 164 | 165 | MaxSpeed 166 | AnySuitable 167 | true 168 | Speed 169 | true 170 | false 171 | INTEL_INTRINSICS;WIN32;NDEBUG;_WINDOWS;_USRDLL;TCOMB_EXPORTS;%(PreprocessorDefinitions) 172 | MultiThreaded 173 | false 174 | true 175 | 176 | 177 | Level3 178 | ProgramDatabase 179 | AssemblyAndSourceCode 180 | 181 | 182 | $(OutDir)TComb.dll 183 | true 184 | Windows 185 | true 186 | true 187 | $(OutDir)TComb.lib 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | true 215 | true 216 | 217 | 218 | true 219 | true 220 | 221 | 222 | 223 | 224 | 225 | 226 | -------------------------------------------------------------------------------- /TComb/TComb.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | Source Files 26 | 27 | 28 | 29 | 30 | Header Files 31 | 32 | 33 | Header Files 34 | 35 | 36 | Header Files 37 | 38 | 39 | Header Files 40 | 41 | 42 | Header Files 43 | 44 | 45 | Header Files 46 | 47 | 48 | Header Files 49 | 50 | 51 | Header Files 52 | 53 | 54 | Header Files 55 | 56 | 57 | Header Files 58 | 59 | 60 | Header Files 61 | 62 | 63 | Header Files 64 | 65 | 66 | 67 | 68 | Resource Files 69 | 70 | 71 | 72 | 73 | Source Files 74 | 75 | 76 | Source Files 77 | 78 | 79 | -------------------------------------------------------------------------------- /TComb/TComb_asm.asm: -------------------------------------------------------------------------------- 1 | .xmm 2 | .model flat,c 3 | 4 | .data 5 | 6 | align 16 7 | 8 | onesByte qword 2 dup(0101010101010101h) 9 | sixsMask_W qword 2 dup(0006000600060006h) 10 | eightsMask_W qword 2 dup(0008000800080008h) 11 | 12 | .code 13 | 14 | buildFinalMask_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,m1p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword 15 | 16 | mov eax,s1p 17 | mov ebx,s2p 18 | mov edx,m1p 19 | mov esi,dstp 20 | mov edi,width_ 21 | 22 | dec thresh 23 | movd xmm4,thresh 24 | punpcklbw xmm4, xmm4 25 | punpcklwd xmm4, xmm4 26 | punpckldq xmm4, xmm4 27 | punpcklqdq xmm4, xmm4 28 | 29 | pxor xmm5,xmm5 30 | 31 | yloop: 32 | xor ecx,ecx 33 | align 16 34 | xloop: 35 | movdqa xmm0,[eax+ecx] 36 | movdqa xmm1,[ebx+ecx] 37 | movdqa xmm2,xmm0 38 | psubusb xmm0,xmm1 39 | psubusb xmm1,xmm2 40 | por xmm0,xmm1 41 | psubusb xmm0,xmm4 42 | pcmpeqb xmm0,xmm5 43 | pand xmm0,[edx+ecx] 44 | movdqa [esi+ecx],xmm0 45 | 46 | add ecx,16 47 | cmp ecx,edi 48 | jl xloop 49 | 50 | add eax,stride 51 | add ebx,stride 52 | add edx,stride 53 | add esi,stride 54 | dec height 55 | jnz yloop 56 | 57 | ret 58 | 59 | buildFinalMask_SSE2 endp 60 | 61 | 62 | 63 | andNeighborsInPlace_SSE2 proc public uses esi edi srcp:dword,stride:dword,width_:dword,height:dword 64 | 65 | mov eax,srcp 66 | mov edx,width_ 67 | mov esi,eax 68 | sub esi,stride 69 | mov edi,eax 70 | add edi,stride 71 | 72 | yloop: 73 | xor ecx,ecx 74 | align 16 75 | xloop: 76 | movdqa xmm0,[esi+ecx] 77 | movdqu xmm1,[esi+ecx-1] 78 | por xmm0,xmm1 79 | movdqu xmm1,[esi+ecx+1] 80 | por xmm0,xmm1 81 | movdqa xmm1,[eax+ecx] 82 | movdqu xmm2,[edi+ecx-1] 83 | por xmm0,xmm2 84 | por xmm0,[edi+ecx] 85 | movdqu xmm2,[edi+ecx+1] 86 | por xmm0,xmm2 87 | pand xmm0,xmm1 88 | movdqa [eax+ecx],xmm0 89 | 90 | add ecx,16 91 | cmp ecx,edx 92 | jl xloop 93 | 94 | add eax,stride 95 | add esi,stride 96 | add edi,stride 97 | dec height 98 | jnz yloop 99 | 100 | ret 101 | 102 | andNeighborsInPlace_SSE2 endp 103 | 104 | 105 | 106 | absDiff_SSE2 proc public uses ebx esi srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword 107 | 108 | mov eax,srcp1 109 | mov esi,srcp2 110 | mov ebx,dstp 111 | mov edx,width_ 112 | 113 | yloop: 114 | xor ecx,ecx 115 | align 16 116 | xloop: 117 | movdqa xmm0,[eax+ecx] 118 | movdqa xmm1,[esi+ecx] 119 | movdqa xmm2,xmm0 120 | psubusb xmm0,xmm1 121 | psubusb xmm1,xmm2 122 | por xmm0,xmm1 123 | movdqa [ebx+ecx],xmm0 124 | 125 | add ecx,16 126 | cmp ecx,edx 127 | jl xloop 128 | 129 | add eax,stride 130 | add esi,stride 131 | add ebx,stride 132 | dec height 133 | jnz yloop 134 | 135 | ret 136 | 137 | absDiff_SSE2 endp 138 | 139 | 140 | 141 | absDiffAndMinMask_SSE2 proc public uses ebx esi edi srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword 142 | 143 | mov eax,srcp1 144 | mov esi,srcp2 145 | mov ebx,dstp 146 | mov edx,width_ 147 | mov edi,height 148 | 149 | yloop: 150 | xor ecx,ecx 151 | align 16 152 | xloop: 153 | movdqa xmm0,[eax+ecx] 154 | movdqa xmm1,[esi+ecx] 155 | movdqa xmm2,xmm0 156 | psubusb xmm0,xmm1 157 | psubusb xmm1,xmm2 158 | por xmm0,xmm1 159 | pminub xmm0,[ebx+ecx] 160 | movdqa [ebx+ecx],xmm0 161 | 162 | add ecx,16 163 | cmp ecx,edx 164 | jl xloop 165 | 166 | add eax,stride 167 | add esi,stride 168 | add ebx,stride 169 | dec edi 170 | jnz yloop 171 | 172 | ret 173 | 174 | absDiffAndMinMask_SSE2 endp 175 | 176 | 177 | 178 | absDiffAndMinMaskThresh_SSE2 proc public uses ebx esi edi srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword 179 | 180 | mov eax,srcp1 181 | mov esi,srcp2 182 | mov ebx,dstp 183 | mov edx,width_ 184 | mov edi,height 185 | 186 | dec thresh 187 | movd xmm3,thresh 188 | punpcklbw xmm3,xmm3 189 | punpcklwd xmm3,xmm3 190 | punpckldq xmm3,xmm3 191 | punpcklqdq xmm3,xmm3 192 | 193 | pxor xmm4,xmm4 194 | 195 | yloop: 196 | xor ecx,ecx 197 | align 16 198 | xloop: 199 | movdqa xmm0,[eax+ecx] 200 | movdqa xmm1,[esi+ecx] 201 | movdqa xmm2,xmm0 202 | psubusb xmm0,xmm1 203 | psubusb xmm1,xmm2 204 | por xmm0,xmm1 205 | pminub xmm0,[ebx+ecx] 206 | psubusb xmm0,xmm3 207 | pcmpeqb xmm0,xmm4 208 | movdqa [ebx+ecx],xmm0 209 | 210 | add ecx,16 211 | cmp ecx,edx 212 | jl xloop 213 | 214 | add eax,stride 215 | add esi,stride 216 | add ebx,stride 217 | dec edi 218 | jnz yloop 219 | 220 | ret 221 | 222 | absDiffAndMinMaskThresh_SSE2 endp 223 | 224 | 225 | 226 | MinMax_SSE2 proc public uses ebx esi edi srcp:dword,minp:dword,maxp:dword,src_stride:dword,min_stride:dword,width_:dword,height:dword,thresh:dword 227 | 228 | mov eax,srcp 229 | mov esi,eax 230 | sub esi,src_stride 231 | mov edi,eax 232 | add edi,src_stride 233 | mov ebx,minp 234 | mov edx,maxp 235 | 236 | movd xmm3,thresh 237 | punpcklbw xmm3,xmm3 238 | punpcklwd xmm3,xmm3 239 | punpckldq xmm3,xmm3 240 | punpcklqdq xmm3,xmm3 241 | 242 | yloop: 243 | xor ecx,ecx 244 | align 16 245 | xloop: 246 | ; srcp-1 is aligned because the pointer passed to this function is srcp+stride+1. 247 | movdqa xmm0,[esi+ecx-1] 248 | movdqa xmm1,xmm0 249 | movdqu xmm2,[esi+ecx] 250 | pminub xmm0,xmm2 251 | pmaxub xmm1,xmm2 252 | movdqu xmm2,[esi+ecx+1] 253 | pminub xmm0,xmm2 254 | pmaxub xmm1,xmm2 255 | movdqa xmm2,[eax+ecx-1] 256 | pminub xmm0,xmm2 257 | pmaxub xmm1,xmm2 258 | movdqu xmm2,[eax+ecx] 259 | pminub xmm0,xmm2 260 | pmaxub xmm1,xmm2 261 | movdqu xmm2,[eax+ecx+1] 262 | pminub xmm0,xmm2 263 | pmaxub xmm1,xmm2 264 | movdqa xmm2,[edi+ecx-1] 265 | pminub xmm0,xmm2 266 | pmaxub xmm1,xmm2 267 | movdqu xmm2,[edi+ecx] 268 | pminub xmm0,xmm2 269 | pmaxub xmm1,xmm2 270 | movdqu xmm2,[edi+ecx+1] 271 | pminub xmm0,xmm2 272 | pmaxub xmm1,xmm2 273 | psubusb xmm0,xmm3 274 | paddusb xmm1,xmm3 275 | movdqa [ebx+ecx],xmm0 276 | movdqa [edx+ecx],xmm1 277 | 278 | add ecx,16 279 | cmp ecx,width_ 280 | jl xloop 281 | 282 | add esi,src_stride 283 | add eax,src_stride 284 | add edi,src_stride 285 | add ebx,min_stride 286 | add edx,min_stride 287 | dec height 288 | jnz yloop 289 | 290 | ret 291 | 292 | MinMax_SSE2 endp 293 | 294 | 295 | 296 | checkOscillation5_SSE2 proc public uses ebx esi edi p2p:dword,p1p:dword,s1p:dword,n1p:dword,n2p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword 297 | 298 | mov eax,p2p 299 | mov ebx,p1p 300 | mov edx,s1p 301 | mov edi,n1p 302 | mov esi,n2p 303 | 304 | 305 | pxor xmm6,xmm6 306 | 307 | dec thresh 308 | movd xmm7,thresh 309 | punpcklbw xmm7,xmm7 310 | punpcklwd xmm7,xmm7 311 | punpckldq xmm7,xmm7 312 | punpcklqdq xmm7,xmm7 313 | 314 | yloop: 315 | xor ecx,ecx 316 | align 16 317 | xloop: 318 | movdqa xmm0,[eax+ecx] 319 | movdqa xmm2,[ebx+ecx] 320 | movdqa xmm1,xmm0 321 | movdqa xmm3,xmm2 322 | pminub xmm0,[edx+ecx] 323 | pmaxub xmm1,[edx+ecx] 324 | pminub xmm2,[edi+ecx] 325 | pmaxub xmm3,[edi+ecx] 326 | pminub xmm0,[esi+ecx] 327 | pmaxub xmm1,[esi+ecx] 328 | 329 | movdqa xmm4,xmm3 330 | movdqa xmm5,xmm1 331 | psubusb xmm4,xmm2 332 | psubusb xmm5,xmm0 333 | ; minus (thresh-1) 334 | psubusb xmm4,xmm7 335 | psubusb xmm5,xmm7 336 | ; minus 1 337 | psubusb xmm2,oword ptr onesByte 338 | psubusb xmm0,oword ptr onesByte 339 | 340 | psubusb xmm1,xmm2 341 | psubusb xmm3,xmm0 342 | 343 | pcmpeqb xmm1,xmm6 344 | pcmpeqb xmm3,xmm6 345 | pcmpeqb xmm4,xmm6 346 | pcmpeqb xmm5,xmm6 347 | mov eax,dstp 348 | por xmm1,xmm3 349 | pand xmm4,xmm5 350 | pand xmm1,xmm4 351 | movdqa [eax+ecx],xmm1 352 | 353 | add ecx,16 354 | mov eax,p2p 355 | cmp ecx,width_ 356 | jl xloop 357 | 358 | mov eax,stride 359 | add ebx,stride 360 | add p2p,eax 361 | add edx,stride 362 | add edi,stride 363 | add dstp,eax 364 | add esi,stride 365 | mov eax,p2p 366 | dec height 367 | jnz yloop 368 | 369 | ret 370 | 371 | checkOscillation5_SSE2 endp 372 | 373 | 374 | 375 | calcAverages_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword 376 | 377 | mov eax,s1p 378 | mov ebx,s2p 379 | mov edx,dstp 380 | mov edi,height 381 | mov esi,width_ 382 | 383 | yloop: 384 | xor ecx,ecx 385 | align 16 386 | xloop: 387 | movdqa xmm0,[eax+ecx] 388 | pavgb xmm0,[ebx+ecx] 389 | movdqa [edx+ecx],xmm0 390 | 391 | add ecx,16 392 | cmp ecx,esi 393 | jl xloop 394 | 395 | add eax,stride 396 | add ebx,stride 397 | add edx,stride 398 | dec edi 399 | jnz yloop 400 | 401 | ret 402 | 403 | calcAverages_SSE2 endp 404 | 405 | 406 | 407 | checkAvgOscCorrelation_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,s3p:dword,s4p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword 408 | 409 | mov eax,s1p 410 | mov ebx,s2p 411 | mov edx,s3p 412 | mov edi,s4p 413 | mov esi,dstp 414 | 415 | dec thresh 416 | movd xmm2, thresh 417 | punpcklbw xmm2, xmm2 418 | punpcklwd xmm2, xmm2 419 | punpckldq xmm2, xmm2 420 | punpcklqdq xmm2, xmm2 421 | 422 | pxor xmm3,xmm3 423 | 424 | yloop: 425 | xor ecx,ecx 426 | align 16 427 | xloop: 428 | movdqa xmm5,[eax+ecx] 429 | movdqa xmm0,xmm5 430 | movdqa xmm1,xmm5 431 | movdqa xmm5,[ebx+ecx] 432 | pminub xmm0,xmm5 433 | pmaxub xmm1,xmm5 434 | movdqa xmm5,[edx+ecx] 435 | pminub xmm0,xmm5 436 | pmaxub xmm1,xmm5 437 | movdqa xmm5,[edi+ecx] 438 | pminub xmm0,xmm5 439 | pmaxub xmm1,xmm5 440 | psubusb xmm1,xmm0 441 | movdqa xmm4,[esi+ecx] 442 | psubusb xmm1,xmm2 443 | pcmpeqb xmm1,xmm3 444 | pand xmm1,xmm4 445 | movdqa [esi+ecx],xmm1 446 | 447 | add ecx,16 448 | cmp ecx,width_ 449 | jl xloop 450 | 451 | add eax,stride 452 | add ebx,stride 453 | add edx,stride 454 | add edi,stride 455 | add esi,stride 456 | dec height 457 | jnz yloop 458 | 459 | ret 460 | 461 | checkAvgOscCorrelation_SSE2 endp 462 | 463 | 464 | 465 | or3Masks_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,s3p:dword,dstp:dword,stride:dword,width_:dword,height:dword 466 | 467 | mov eax,s1p 468 | mov ebx,s2p 469 | mov edx,s3p 470 | mov edi,dstp 471 | mov esi,width_ 472 | 473 | yloop: 474 | xor ecx,ecx 475 | align 16 476 | xloop: 477 | movdqa xmm0,[eax+ecx] 478 | por xmm0,[ebx+ecx] 479 | por xmm0,[edx+ecx] 480 | movdqa [edi+ecx],xmm0 481 | 482 | add ecx,16 483 | cmp ecx,esi 484 | jl xloop 485 | 486 | add eax,stride 487 | add ebx,stride 488 | add edx,stride 489 | add edi,stride 490 | dec height 491 | jnz yloop 492 | 493 | ret 494 | 495 | or3Masks_SSE2 endp 496 | 497 | 498 | 499 | orAndMasks_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword 500 | 501 | mov eax,s1p 502 | mov ebx,s2p 503 | mov edx,dstp 504 | mov edi,width_ 505 | mov esi,height 506 | 507 | yloop: 508 | xor ecx,ecx 509 | align 16 510 | xloop: 511 | movdqa xmm0,[eax+ecx] 512 | movdqa xmm1,[edx+ecx] 513 | pand xmm0,[ebx+ecx] 514 | por xmm1,xmm0 515 | movdqa [edx+ecx],xmm1 516 | 517 | add ecx,16 518 | cmp ecx,edi 519 | jl xloop 520 | 521 | add eax,stride 522 | add ebx,stride 523 | add edx,stride 524 | dec esi 525 | jnz yloop 526 | 527 | ret 528 | 529 | orAndMasks_SSE2 endp 530 | 531 | 532 | 533 | andMasks_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword 534 | 535 | mov eax,s1p 536 | mov ebx,s2p 537 | mov edx,dstp 538 | mov edi,width_ 539 | mov esi,height 540 | 541 | yloop: 542 | xor ecx,ecx 543 | align 16 544 | xloop: 545 | movdqa xmm0,[eax+ecx] 546 | pand xmm0,[ebx+ecx] 547 | movdqa [edx+ecx],xmm0 548 | 549 | add ecx,16 550 | cmp ecx,edi 551 | jl xloop 552 | 553 | add eax,stride 554 | add ebx,stride 555 | add edx,stride 556 | dec esi 557 | jnz yloop 558 | 559 | ret 560 | 561 | andMasks_SSE2 endp 562 | 563 | 564 | 565 | checkSceneChange_SSE2 proc public uses ebx esi edi s1p:dword,s2p:dword,stride:dword,width_:dword,height:dword,diffp:dword 566 | 567 | mov eax,s1p 568 | mov edi,s2p 569 | mov esi,stride 570 | mov edx,width_ 571 | pxor xmm1,xmm1 572 | 573 | yloop: 574 | xor ecx,ecx 575 | align 16 576 | xloop: 577 | movdqa xmm0,[eax+ecx] 578 | psadbw xmm0,[edi+ecx] 579 | paddd xmm1,xmm0 580 | 581 | add ecx,16 582 | cmp ecx,edx 583 | jl xloop 584 | 585 | add eax,esi 586 | add edi,esi 587 | dec height 588 | jnz yloop 589 | 590 | movdqa xmm2,xmm1 591 | psrldq xmm1,8 592 | paddd xmm2,xmm1 593 | 594 | mov eax, diffp 595 | movd DWORD PTR [eax],xmm2 596 | 597 | ret 598 | 599 | checkSceneChange_SSE2 endp 600 | 601 | 602 | 603 | VerticalBlur3_SSE2 proc public uses ebx esi edi srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword 604 | 605 | mov eax,srcp 606 | mov ebx,dstp 607 | mov edx,stride 608 | mov esi,eax 609 | mov edi,eax 610 | sub esi,edx 611 | add edi,edx 612 | mov edx,width_ 613 | 614 | ; 0x0002,for rounding 615 | pcmpeqb xmm6,xmm6 616 | psrlw xmm6,15 617 | psllw xmm6,1 618 | pxor xmm7,xmm7 619 | 620 | xor ecx,ecx 621 | 622 | toploop: 623 | movdqa xmm0,[eax+ecx] 624 | pavgb xmm0,[edi+ecx] 625 | movdqa [ebx+ecx],xmm0 626 | 627 | add ecx,16 628 | cmp ecx,edx 629 | jl toploop 630 | 631 | add esi,stride 632 | add eax,stride 633 | add edi,stride 634 | add ebx,stride 635 | sub height,2 ; the main loop processes 2 lines fewer than the height 636 | 637 | yloop: 638 | xor ecx,ecx 639 | xloop: 640 | movdqa xmm0,[esi+ecx] 641 | movdqa xmm1,[eax+ecx] 642 | movdqa xmm2,[edi+ecx] 643 | movdqa xmm3,xmm0 644 | movdqa xmm4,xmm1 645 | movdqa xmm5,xmm2 646 | punpcklbw xmm0,xmm7 647 | punpcklbw xmm1,xmm7 648 | punpcklbw xmm2,xmm7 649 | punpckhbw xmm3,xmm7 650 | punpckhbw xmm4,xmm7 651 | punpckhbw xmm5,xmm7 652 | 653 | ; add bottom to top 654 | paddw xmm0,xmm2 655 | paddw xmm3,xmm5 656 | 657 | ; multiply center by 2 658 | psllw xmm1,1 659 | psllw xmm4,1 660 | 661 | ; add center to sum 662 | paddw xmm0,xmm1 663 | paddw xmm3,xmm4 664 | 665 | ; add 2 to sum 666 | paddw xmm0,xmm6 667 | paddw xmm3,xmm6 668 | 669 | ; divide by 4 670 | psrlw xmm0,2 671 | psrlw xmm3,2 672 | packuswb xmm0,xmm3 673 | movdqa [ebx+ecx],xmm0 674 | 675 | add ecx,16 676 | cmp ecx,edx 677 | jl xloop 678 | 679 | add esi,stride 680 | add eax,stride 681 | add edi,stride 682 | add ebx,stride 683 | dec height 684 | jnz yloop 685 | 686 | xor ecx,ecx 687 | 688 | bottomloop: 689 | movdqa xmm0,[esi+ecx] 690 | pavgb xmm0,[eax+ecx] 691 | movdqa [ebx+ecx],xmm0 692 | 693 | add ecx,16 694 | cmp ecx,edx 695 | jl bottomloop 696 | 697 | ret 698 | 699 | VerticalBlur3_SSE2 endp 700 | 701 | 702 | 703 | HorizontalBlur3_SSE2 proc public srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword 704 | 705 | mov eax,srcp 706 | mov edx,dstp 707 | pxor xmm7,xmm7 708 | ; 0x0002,for rounding 709 | pcmpeqb xmm6,xmm6 710 | psrlw xmm6,15 711 | psllw xmm6,1 712 | 713 | yloop: 714 | xor ecx,ecx 715 | align 16 716 | xloop: 717 | movdqu xmm0,[eax+ecx-1] 718 | movdqa xmm1,[eax+ecx] 719 | movdqu xmm2,[eax+ecx+1] 720 | movdqa xmm3,xmm0 721 | movdqa xmm4,xmm1 722 | movdqa xmm5,xmm2 723 | punpcklbw xmm0,xmm7 724 | punpcklbw xmm1,xmm7 725 | punpcklbw xmm2,xmm7 726 | punpckhbw xmm3,xmm7 727 | punpckhbw xmm4,xmm7 728 | punpckhbw xmm5,xmm7 729 | ; center * 2 730 | psllw xmm1,1 731 | psllw xmm4,1 732 | paddw xmm1,xmm0 733 | paddw xmm4,xmm3 734 | paddw xmm1,xmm2 735 | paddw xmm4,xmm5 736 | 737 | ; add 2 to sum 738 | paddw xmm1,xmm6 739 | paddw xmm4,xmm6 740 | 741 | ; divide by 4 742 | psrlw xmm1,2 743 | psrlw xmm4,2 744 | packuswb xmm1,xmm4 745 | movdqa [edx+ecx],xmm1 746 | 747 | add ecx,16 748 | cmp ecx,width_ 749 | jl xloop 750 | 751 | add eax,stride 752 | add edx,stride 753 | dec height 754 | jnz yloop 755 | 756 | ret 757 | 758 | HorizontalBlur3_SSE2 endp 759 | 760 | 761 | 762 | HorizontalBlur6_SSE2 proc public srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword 763 | 764 | mov eax,srcp 765 | mov edx,dstp 766 | movdqu xmm6,oword ptr sixsMask_W 767 | pxor xmm7,xmm7 768 | 769 | yloop: 770 | xor ecx,ecx 771 | align 16 772 | xloop: 773 | movdqu xmm0,[eax+ecx-2] 774 | movdqu xmm1,[eax+ecx+2] 775 | movdqa xmm2,xmm0 776 | movdqa xmm3,xmm1 777 | punpcklbw xmm0,xmm7 778 | punpcklbw xmm1,xmm7 779 | punpckhbw xmm2,xmm7 780 | punpckhbw xmm3,xmm7 781 | 782 | ; srcp[x-2] + srcp[x+2] 783 | paddw xmm0,xmm1 784 | paddw xmm2,xmm3 785 | 786 | ; srcp[x-1] + srcp[x+1] 787 | movdqu xmm1,[eax+ecx-1] 788 | movdqu xmm3,[eax+ecx+1] 789 | movdqa xmm4,xmm1 790 | movdqa xmm5,xmm3 791 | punpcklbw xmm1,xmm7 792 | punpcklbw xmm3,xmm7 793 | punpckhbw xmm4,xmm7 794 | punpckhbw xmm5,xmm7 795 | paddw xmm1,xmm3 796 | paddw xmm4,xmm5 797 | 798 | ; (srcp[x-1 + srcp[x+])*4 799 | psllw xmm1,2 800 | psllw xmm4,2 801 | 802 | ; (srcp[x-1 + srcp[x+])*4 + srcp[x-2] + srcp[x+2] 803 | paddw xmm0,xmm1 804 | paddw xmm2,xmm4 805 | 806 | ; srcp[x] * 6 807 | movdqa xmm1,[eax+ecx] 808 | movdqu xmm5,oword ptr eightsMask_W 809 | movdqa xmm3,xmm1 810 | punpcklbw xmm1,xmm7 811 | punpckhbw xmm3,xmm7 812 | pmullw xmm1,xmm6 813 | pmullw xmm3,xmm6 814 | paddw xmm0,xmm1 815 | paddw xmm2,xmm3 816 | 817 | ; add 8 818 | paddw xmm0,xmm5 819 | paddw xmm2,xmm5 820 | 821 | ; divide by 16 822 | psrlw xmm0,4 823 | psrlw xmm2,4 824 | packuswb xmm0,xmm2 825 | movdqa [edx+ecx],xmm0 826 | 827 | add ecx,16 828 | cmp ecx,width_ 829 | jl xloop 830 | 831 | add eax,stride 832 | add edx,stride 833 | dec height 834 | jnz yloop 835 | 836 | ret 837 | 838 | HorizontalBlur6_SSE2 endp 839 | 840 | 841 | 842 | end -------------------------------------------------------------------------------- /TComb/TComb_asm_x64.asm: -------------------------------------------------------------------------------- 1 | .code 2 | 3 | ;buildFinalMask_SSE2 proc s1p:dword,s2p:dword,m1p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword 4 | ; s1p = rcx 5 | ; s2p = rdx 6 | ; m1p = r8 7 | ; dstp = r9 8 | 9 | buildFinalMask_SSE2 proc public frame 10 | 11 | stride equ dword ptr [rbp+48] 12 | width_ equ dword ptr [rbp+56] 13 | height equ dword ptr [rbp+64] 14 | thresh equ dword ptr [rbp+72] 15 | 16 | push rbp 17 | .pushreg rbp 18 | mov rbp,rsp 19 | push rbx 20 | .pushreg rbx 21 | push rsi 22 | .pushreg rsi 23 | push rdi 24 | .pushreg rdi 25 | .endprolog 26 | 27 | mov rax,rcx 28 | mov rbx,rdx 29 | mov rdx,r8 30 | mov rsi,r9 31 | movsxd r8,stride 32 | xor rdi,rdi 33 | mov edi,width_ 34 | xor r9,r9 35 | mov r9d,height 36 | mov r10,16 37 | 38 | dec thresh 39 | movd xmm4,thresh 40 | punpcklbw xmm4, xmm4 41 | punpcklwd xmm4, xmm4 42 | punpckldq xmm4, xmm4 43 | punpcklqdq xmm4, xmm4 44 | pxor xmm5,xmm5 45 | 46 | yloop: 47 | xor rcx,rcx 48 | xloop: 49 | movdqa xmm0,[rax+rcx] 50 | movdqa xmm1,[rbx+rcx] 51 | movdqa xmm2,xmm0 52 | psubusb xmm0,xmm1 53 | psubusb xmm1,xmm2 54 | por xmm0,xmm1 55 | psubusb xmm0,xmm4 56 | pcmpeqb xmm0,xmm5 57 | pand xmm0,[rdx+rcx] 58 | movdqa [rsi+rcx],xmm0 59 | 60 | add rcx,r10 61 | cmp rcx,rdi 62 | jl xloop 63 | 64 | add rax,r8 65 | add rbx,r8 66 | add rdx,r8 67 | add rsi,r8 68 | dec r9 69 | jnz yloop 70 | 71 | pop rdi 72 | pop rsi 73 | pop rbx 74 | pop rbp 75 | 76 | ret 77 | 78 | buildFinalMask_SSE2 endp 79 | 80 | 81 | 82 | ;andNeighborsInPlace_SSE2 proc srcp:dword,stride:dword,width_:dword,height:dword 83 | ; srcp = rcx 84 | ; stride = rdx 85 | ; width_ = r8d 86 | ; height = r9d 87 | 88 | andNeighborsInPlace_SSE2 proc public frame 89 | 90 | push rbp 91 | .pushreg rbp 92 | mov rbp,rsp 93 | push rsi 94 | .pushreg rsi 95 | push rdi 96 | .pushreg rdi 97 | .endprolog 98 | 99 | mov rax,rcx 100 | xchg r8,rdx 101 | movsxd r8,r8d 102 | mov rsi,rax 103 | mov rdi,rax 104 | sub rsi,r8 105 | add rdi,r8 106 | mov r10,16 107 | 108 | yloop: 109 | xor rcx,rcx 110 | xloop: 111 | movdqa xmm0,[rsi+rcx] 112 | movdqu xmm1,[rsi+rcx-1] 113 | por xmm0,xmm1 114 | movdqu xmm1,[rsi+rcx+1] 115 | por xmm0,xmm1 116 | movdqa xmm1,[rax+rcx] 117 | movdqu xmm2,[rdi+rcx-1] 118 | por xmm0,xmm2 119 | por xmm0,[rdi+rcx] 120 | movdqu xmm2,[rdi+rcx+1] 121 | por xmm0,xmm2 122 | pand xmm0,xmm1 123 | movdqa [rax+rcx],xmm0 124 | 125 | add rcx,r10 126 | cmp rcx,rdx 127 | jl xloop 128 | 129 | add rax,r8 130 | add rsi,r8 131 | add rdi,r8 132 | dec r9d 133 | jnz yloop 134 | 135 | pop rdi 136 | pop rsi 137 | pop rbp 138 | 139 | ret 140 | 141 | andNeighborsInPlace_SSE2 endp 142 | 143 | 144 | 145 | ;absDiff_SSE2 proc srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword 146 | ; srcp1 = rcx 147 | ; srcp2 = rdx 148 | ; dstp = r8 149 | ; stride = r9d 150 | 151 | absDiff_SSE2 proc public frame 152 | 153 | width_ equ dword ptr [rbp+48] 154 | height equ dword ptr [rbp+56] 155 | 156 | push rbp 157 | .pushreg rbp 158 | mov rbp,rsp 159 | push rbx 160 | .pushreg rbx 161 | push rsi 162 | .pushreg rsi 163 | .endprolog 164 | 165 | mov rax,rcx 166 | mov rsi,rdx 167 | mov rbx,r8 168 | movsxd r8,r9d 169 | xor rdx,rdx 170 | mov edx,width_ 171 | xor r9,r9 172 | mov r9d,height 173 | mov r10,16 174 | 175 | yloop: 176 | xor rcx,rcx 177 | xloop: 178 | movdqa xmm0,[rax+rcx] 179 | movdqa xmm1,[rsi+rcx] 180 | movdqa xmm2,xmm0 181 | psubusb xmm0,xmm1 182 | psubusb xmm1,xmm2 183 | por xmm0,xmm1 184 | movdqa [rbx+rcx],xmm0 185 | 186 | add rcx,r10 187 | cmp rcx,rdx 188 | jl xloop 189 | 190 | add rax,r8 191 | add rsi,r8 192 | add rbx,r8 193 | dec r9d 194 | jnz yloop 195 | 196 | pop rsi 197 | pop rbx 198 | pop rbp 199 | 200 | ret 201 | 202 | absDiff_SSE2 endp 203 | 204 | 205 | 206 | ;absDiffAndMinMask_SSE2 proc srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword 207 | ; srcp1 = rcx 208 | ; srcp2 = rdx 209 | ; dstp = r8 210 | ; stride = r9d 211 | 212 | absDiffAndMinMask_SSE2 proc public frame 213 | 214 | width_ equ dword ptr [rbp+48] 215 | height equ dword ptr [rbp+56] 216 | 217 | push rbp 218 | .pushreg rbp 219 | mov rbp,rsp 220 | push rbx 221 | .pushreg rbx 222 | push rsi 223 | .pushreg rsi 224 | push rdi 225 | .pushreg rdi 226 | .endprolog 227 | 228 | mov rax,rcx 229 | mov rsi,rdx 230 | mov rbx,r8 231 | movsxd r8,r9d 232 | xor rdx,rdx 233 | mov edx,width_ 234 | xor rdi,rdi 235 | mov edi,height 236 | mov r10,16 237 | 238 | yloop: 239 | xor rcx,rcx 240 | xloop: 241 | movdqa xmm0,[rax+rcx] 242 | movdqa xmm1,[rsi+rcx] 243 | movdqa xmm2,xmm0 244 | psubusb xmm0,xmm1 245 | psubusb xmm1,xmm2 246 | por xmm0,xmm1 247 | pminub xmm0,[rbx+rcx] 248 | movdqa [rbx+rcx],xmm0 249 | 250 | add rcx,r10 251 | cmp rcx,rdx 252 | jl xloop 253 | 254 | add rax,r8 255 | add rsi,r8 256 | add rbx,r8 257 | dec edi 258 | jnz yloop 259 | 260 | pop rdi 261 | pop rsi 262 | pop rbx 263 | pop rbp 264 | 265 | ret 266 | 267 | absDiffAndMinMask_SSE2 endp 268 | 269 | 270 | 271 | ;absDiffAndMinMaskThresh_SSE2 proc srcp1:dword,srcp2:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword 272 | ; srcp1 = rcx 273 | ; srcp2 = rdx 274 | ; dstp = r8 275 | ; stride = r9d 276 | 277 | absDiffAndMinMaskThresh_SSE2 proc public frame 278 | 279 | width_ equ dword ptr [rbp+48] 280 | height equ dword ptr [rbp+56] 281 | thresh equ dword ptr [rbp+64] 282 | 283 | push rbp 284 | .pushreg rbp 285 | mov rbp,rsp 286 | push rbx 287 | .pushreg rbx 288 | push rsi 289 | .pushreg rsi 290 | push rdi 291 | .pushreg rdi 292 | .endprolog 293 | 294 | mov rax,rcx 295 | mov rsi,rdx 296 | mov rbx,r8 297 | movsxd r8,r9d 298 | xor rdx,rdx 299 | mov edx,width_ 300 | xor rdi,rdi 301 | mov edi,height 302 | dec thresh 303 | movd xmm3,thresh 304 | punpcklbw xmm3,xmm3 305 | punpcklwd xmm3,xmm3 306 | punpckldq xmm3,xmm3 307 | punpcklqdq xmm3,xmm3 308 | pxor xmm4,xmm4 309 | mov r10,16 310 | 311 | yloop: 312 | xor rcx,rcx 313 | xloop: 314 | movdqa xmm0,[rax+rcx] 315 | movdqa xmm1,[rsi+rcx] 316 | movdqa xmm2,xmm0 317 | psubusb xmm0,xmm1 318 | psubusb xmm1,xmm2 319 | por xmm0,xmm1 320 | pminub xmm0,[rbx+rcx] 321 | psubusb xmm0,xmm3 322 | pcmpeqb xmm0,xmm4 323 | movdqa [rbx+rcx],xmm0 324 | 325 | add rcx,r10 326 | cmp rcx,rdx 327 | jl xloop 328 | 329 | add rax,r8 330 | add rsi,r8 331 | add rbx,r8 332 | dec edi 333 | jnz yloop 334 | 335 | pop rdi 336 | pop rsi 337 | pop rbx 338 | pop rbp 339 | 340 | ret 341 | 342 | absDiffAndMinMaskThresh_SSE2 endp 343 | 344 | 345 | 346 | ;MinMax_SSE2 proc srcp:dword,minp:dword,maxp:dword,src_stride:dword,min_stride:dword,width_:dword,height:dword,thresh:dword 347 | ; srcp = rcx 348 | ; minp = edx 349 | ; maxp = r8d 350 | ; src_stride = r9d 351 | 352 | MinMax_SSE2 proc public frame 353 | 354 | min_stride equ dword ptr [rbp+48] 355 | width_ equ dword ptr [rbp+56] 356 | height equ dword ptr [rbp+64] 357 | thresh equ dword ptr [rbp+72] 358 | 359 | push rbp 360 | .pushreg rbp 361 | mov rbp,rsp 362 | push rbx 363 | .pushreg rbx 364 | push rsi 365 | .pushreg rsi 366 | push rdi 367 | .pushreg rdi 368 | push r12 369 | .pushreg r12 370 | .endprolog 371 | 372 | mov rax,rcx 373 | mov rsi,rax 374 | mov rdi,rax 375 | mov rbx,rdx 376 | mov rdx,r8 377 | movsxd r8,r9d 378 | movsxd r9,min_stride 379 | mov r10d,width_ 380 | mov r11d,height 381 | mov r12,16 382 | sub rsi,r8 383 | add rdi,r8 384 | 385 | movd xmm3,thresh 386 | punpcklbw xmm3,xmm3 387 | punpcklwd xmm3,xmm3 388 | punpckldq xmm3,xmm3 389 | punpcklqdq xmm3,xmm3 390 | 391 | yloop: 392 | xor rcx,rcx 393 | xloop: 394 | ; srcp-1 is aligned because the pointer passed to this function is srcp+stride+1. 395 | movdqa xmm0,[rsi+rcx-1] 396 | movdqa xmm1,xmm0 397 | movdqu xmm2,[rsi+rcx] 398 | pminub xmm0,xmm2 399 | pmaxub xmm1,xmm2 400 | movdqu xmm2,[rsi+rcx+1] 401 | pminub xmm0,xmm2 402 | pmaxub xmm1,xmm2 403 | movdqa xmm2,[rax+rcx-1] 404 | pminub xmm0,xmm2 405 | pmaxub xmm1,xmm2 406 | movdqu xmm2,[rax+rcx] 407 | pminub xmm0,xmm2 408 | pmaxub xmm1,xmm2 409 | movdqu xmm2,[rax+rcx+1] 410 | pminub xmm0,xmm2 411 | pmaxub xmm1,xmm2 412 | movdqa xmm2,[rdi+rcx-1] 413 | pminub xmm0,xmm2 414 | pmaxub xmm1,xmm2 415 | movdqu xmm2,[rdi+rcx] 416 | pminub xmm0,xmm2 417 | pmaxub xmm1,xmm2 418 | movdqu xmm2,[rdi+rcx+1] 419 | pminub xmm0,xmm2 420 | pmaxub xmm1,xmm2 421 | psubusb xmm0,xmm3 422 | paddusb xmm1,xmm3 423 | movdqa [rbx+rcx],xmm0 424 | movdqa [rdx+rcx],xmm1 425 | 426 | add rcx,r12 427 | cmp rcx,r10 428 | jl xloop 429 | 430 | add rsi,r8 431 | add rax,r8 432 | add rdi,r8 433 | add rbx,r9 434 | add rdx,r9 435 | dec r11d 436 | jnz yloop 437 | 438 | pop r12 439 | pop rdi 440 | pop rsi 441 | pop rbx 442 | pop rbp 443 | 444 | ret 445 | 446 | MinMax_SSE2 endp 447 | 448 | 449 | 450 | ;checkOscillation5_SSE2 proc p2p:dword,p1p:dword,s1p:dword,n1p:dword,n2p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword 451 | ; p2p = rcx 452 | ; p1p = rdx 453 | ; s1p = r8 454 | ; n1p = r9 455 | 456 | checkOscillation5_SSE2 proc public frame 457 | 458 | n2p equ qword ptr [rbp+48] 459 | dstp equ qword ptr [rbp+56] 460 | stride equ dword ptr [rbp+64] 461 | width_ equ dword ptr [rbp+72] 462 | height equ dword ptr [rbp+80] 463 | thresh equ dword ptr [rbp+88] 464 | 465 | push rbp 466 | .pushreg rbp 467 | mov rbp,rsp 468 | push rbx 469 | .pushreg rbx 470 | push rsi 471 | .pushreg rsi 472 | push rdi 473 | .pushreg rdi 474 | push r12 475 | .pushreg r12 476 | sub rsp,64 477 | .allocstack 64 478 | movdqu oword ptr[rsp],xmm6 479 | .savexmm128 xmm6,0 480 | movdqu oword ptr[rsp+16],xmm7 481 | .savexmm128 xmm7,16 482 | movdqu oword ptr[rsp+32],xmm8 483 | .savexmm128 xmm8,32 484 | movdqu oword ptr[rsp+48],xmm9 485 | .savexmm128 xmm9,48 486 | .endprolog 487 | 488 | mov rax,rcx ; p2p 489 | mov rbx,rdx ; p1p 490 | mov rdx,r8 ; s1p 491 | mov rdi,r9 ; n1p 492 | mov rsi,n2p ; n2p 493 | mov r8,dstp 494 | movsxd r9,stride 495 | mov r10d,width_ 496 | mov r11d,height 497 | mov r12,16 498 | 499 | pxor xmm6,xmm6 500 | 501 | ; trick: 502 | ; x x<=(thresh-1) ==> x-(thresh-1)<=0 ==> sub_sat(x,thresh-1)==0 503 | ; pcmpeqb(psubusb(x,thresh-1),zero): 0xFF where x max22) || max22 == 0 || (max31 < min22) || max31 == 0) && 542 | ; max31 - min31 < thresh && max22 - min22 < thresh) 543 | ; No check for (max22 == 0) or (max31 == 0), like in C, sub_sat handles automatically 544 | 545 | movdqa xmm4,xmm3 ; max22 546 | movdqa xmm5,xmm1 ; max31 547 | psubusb xmm4,xmm2 ; max22-min22 548 | psubusb xmm5,xmm0 ; max31-min31 549 | ; minus (thresh-1) 550 | psubusb xmm4,xmm7 ; max22-min22 - (thresh-1) 551 | psubusb xmm5,xmm7 ; max31-min31 - (thresh-1) 552 | 553 | ; minus 1 554 | psubusb xmm2,xmm9 ; min22-1 555 | psubusb xmm0,xmm9 ; min31-1 556 | 557 | psubusb xmm1,xmm2 ; max31 - (min22-1) 558 | psubusb xmm3,xmm0 ; max22 - (min31-1) 559 | 560 | pcmpeqb xmm1,xmm6 561 | pcmpeqb xmm3,xmm6 562 | pcmpeqb xmm4,xmm6 563 | pcmpeqb xmm5,xmm6 564 | por xmm1,xmm3 565 | pand xmm4,xmm5 566 | pand xmm1,xmm4 567 | movdqa [r8+rcx],xmm1 568 | 569 | add rcx,r12 570 | cmp rcx,r10 571 | jl xloop 572 | 573 | add rax,r9 574 | add rbx,r9 575 | add rdx,r9 576 | add rdi,r9 577 | add rsi,r9 578 | add r8,r9 579 | dec r11d 580 | jnz yloop 581 | 582 | movdqu xmm9,oword ptr[rsp+48] 583 | movdqu xmm8,oword ptr[rsp+32] 584 | movdqu xmm7,oword ptr[rsp+16] 585 | movdqu xmm6,oword ptr[rsp] 586 | add rsp,64 587 | pop r12 588 | pop rdi 589 | pop rsi 590 | pop rbx 591 | pop rbp 592 | 593 | ret 594 | 595 | checkOscillation5_SSE2 endp 596 | 597 | 598 | 599 | ;calcAverages_SSE2 proc s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword 600 | ; s1p = rcx 601 | ; s2p = rdx 602 | ; dstp = r8 603 | ; stride = r9d 604 | 605 | calcAverages_SSE2 proc public frame 606 | 607 | width_ equ dword ptr [rbp+48] 608 | height equ dword ptr [rbp+56] 609 | 610 | push rbp 611 | .pushreg rbp 612 | mov rbp,rsp 613 | push rbx 614 | .pushreg rbx 615 | push rsi 616 | .pushreg rsi 617 | push rdi 618 | .pushreg rdi 619 | .endprolog 620 | 621 | mov rax,rcx 622 | mov rbx,rdx 623 | mov rdx,r8 624 | movsxd r8,r9d 625 | xor rdi,rdi 626 | mov edi,height 627 | xor rsi,rsi 628 | mov esi,width_ 629 | mov r10,16 630 | 631 | yloop: 632 | xor rcx,rcx 633 | xloop: 634 | movdqa xmm0,[rax+rcx] 635 | pavgb xmm0,[rbx+rcx] 636 | movdqa [rdx+rcx],xmm0 637 | 638 | add rcx,r10 639 | cmp rcx,rsi 640 | jl xloop 641 | 642 | add rax,r8 643 | add rbx,r8 644 | add rdx,r8 645 | dec edi 646 | jnz yloop 647 | 648 | pop rdi 649 | pop rsi 650 | pop rbx 651 | pop rbp 652 | 653 | ret 654 | 655 | calcAverages_SSE2 endp 656 | 657 | 658 | 659 | ;checkAvgOscCorrelation_SSE2 proc s1p:dword,s2p:dword,s3p:dword,s4p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword 660 | ; s1p = rcx 661 | ; s2p = rdx 662 | ; s3p = r8 663 | ; s4p = r9 664 | 665 | checkAvgOscCorrelation_SSE2 proc public frame 666 | 667 | dstp equ qword ptr [rbp+48] 668 | stride equ dword ptr [rbp+56] 669 | width_ equ dword ptr [rbp+64] 670 | height equ dword ptr [rbp+72] 671 | thresh equ dword ptr [rbp+80] 672 | 673 | push rbp 674 | .pushreg rbp 675 | mov rbp,rsp 676 | push rbx 677 | .pushreg rbx 678 | push rsi 679 | .pushreg rsi 680 | push rdi 681 | .pushreg rdi 682 | .endprolog 683 | 684 | mov rax,rcx 685 | mov rbx,rdx 686 | mov rdx,r8 687 | mov rdi,r9 688 | mov rsi,dstp 689 | movsxd r8,stride 690 | xor r9,r9 691 | mov r9d,width_ 692 | mov r10d,height 693 | mov r11,16 694 | 695 | dec thresh 696 | movd xmm2, thresh 697 | punpcklbw xmm2, xmm2 698 | punpcklwd xmm2, xmm2 699 | punpckldq xmm2, xmm2 700 | punpcklqdq xmm2, xmm2 701 | 702 | pxor xmm3,xmm3 703 | 704 | yloop: 705 | xor rcx,rcx 706 | xloop: 707 | movdqa xmm5,[rax+rcx] 708 | movdqa xmm0,xmm5 709 | movdqa xmm1,xmm5 710 | movdqa xmm5,[rbx+rcx] 711 | pminub xmm0,xmm5 712 | pmaxub xmm1,xmm5 713 | movdqa xmm5,[rdx+rcx] 714 | pminub xmm0,xmm5 715 | pmaxub xmm1,xmm5 716 | movdqa xmm5,[rdi+rcx] 717 | pminub xmm0,xmm5 718 | pmaxub xmm1,xmm5 719 | psubusb xmm1,xmm0 720 | movdqa xmm4,[rsi+rcx] 721 | psubusb xmm1,xmm2 722 | pcmpeqb xmm1,xmm3 723 | pand xmm1,xmm4 724 | movdqa [rsi+rcx],xmm1 725 | 726 | add rcx,r11 727 | cmp rcx,r9 728 | jl xloop 729 | 730 | add rax,r8 731 | add rbx,r8 732 | add rdx,r8 733 | add rdi,r8 734 | add rsi,r8 735 | dec r10d 736 | jnz yloop 737 | 738 | pop rdi 739 | pop rsi 740 | pop rbx 741 | pop rbp 742 | 743 | ret 744 | 745 | checkAvgOscCorrelation_SSE2 endp 746 | 747 | 748 | 749 | ;or3Masks_SSE2 proc s1p:dword,s2p:dword,s3p:dword,dstp:dword,stride:dword,width_:dword,height:dword 750 | ; s1p = rcx 751 | ; s2p = rdx 752 | ; s3p = r8 753 | ; dstp = r9 754 | 755 | or3Masks_SSE2 proc public frame 756 | 757 | stride equ dword ptr [rbp+48] 758 | width_ equ dword ptr [rbp+56] 759 | height equ dword ptr [rbp+64] 760 | 761 | push rbp 762 | .pushreg rbp 763 | mov rbp,rsp 764 | push rbx 765 | .pushreg rbx 766 | push rsi 767 | .pushreg rsi 768 | push rdi 769 | .pushreg rdi 770 | .endprolog 771 | 772 | mov rax,rcx 773 | mov rbx,rdx 774 | mov rdx,r8 775 | mov rdi,r9 776 | movsxd r8,stride 777 | xor rsi,rsi 778 | mov esi,width_ 779 | xor r9,r9 780 | mov r9d,height 781 | mov r10,16 782 | 783 | yloop: 784 | xor rcx,rcx 785 | xloop: 786 | movdqa xmm0,[rax+rcx] 787 | por xmm0,[rbx+rcx] 788 | por xmm0,[rdx+rcx] 789 | movdqa [rdi+rcx],xmm0 790 | 791 | add rcx,r10 792 | cmp rcx,rsi 793 | jl xloop 794 | 795 | add rax,r8 796 | add rbx,r8 797 | add rdx,r8 798 | add rdi,r8 799 | dec r9d 800 | jnz yloop 801 | 802 | pop rdi 803 | pop rsi 804 | pop rbx 805 | pop rbp 806 | 807 | ret 808 | 809 | or3Masks_SSE2 endp 810 | 811 | 812 | 813 | ;orAndMasks_SSE2 proc s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword 814 | ; s1p = rcx 815 | ; s2p = rdx 816 | ; dstp = r8 817 | ; stride = r9d 818 | 819 | orAndMasks_SSE2 proc public frame 820 | 821 | width_ equ dword ptr [rbp+48] 822 | height equ dword ptr [rbp+56] 823 | 824 | push rbp 825 | .pushreg rbp 826 | mov rbp,rsp 827 | push rbx 828 | .pushreg rbx 829 | push rsi 830 | .pushreg rsi 831 | push rdi 832 | .pushreg rdi 833 | .endprolog 834 | 835 | mov rax,rcx 836 | mov rbx,rdx 837 | mov rdx,r8 838 | movsxd r8,r9d 839 | xor rdi,rdi 840 | mov edi,width_ 841 | xor rsi,rsi 842 | mov esi,height 843 | mov r10,16 844 | 845 | yloop: 846 | xor rcx,rcx 847 | xloop: 848 | movdqa xmm0,[rax+rcx] 849 | movdqa xmm1,[rdx+rcx] 850 | pand xmm0,[rbx+rcx] 851 | por xmm1,xmm0 852 | movdqa [rdx+rcx],xmm1 853 | add rcx,16 854 | cmp rcx,rdi 855 | jl xloop 856 | 857 | add rax,r8 858 | add rbx,r8 859 | add rdx,r8 860 | dec esi 861 | jnz yloop 862 | 863 | pop rdi 864 | pop rsi 865 | pop rbx 866 | pop rbp 867 | 868 | ret 869 | 870 | orAndMasks_SSE2 endp 871 | 872 | 873 | 874 | ;andMasks_SSE2 proc s1p:dword,s2p:dword,dstp:dword,stride:dword,width_:dword,height:dword 875 | ; s1p = rcx 876 | ; s2p = rdx 877 | ; dstp = r8 878 | ; stride = r9d 879 | 880 | andMasks_SSE2 proc public frame 881 | 882 | width_ equ dword ptr [rbp+48] 883 | height equ dword ptr [rbp+56] 884 | 885 | push rbp 886 | .pushreg rbp 887 | mov rbp,rsp 888 | push rbx 889 | .pushreg rbx 890 | push rsi 891 | .pushreg rsi 892 | push rdi 893 | .pushreg rdi 894 | .endprolog 895 | 896 | mov rax,rcx 897 | mov rbx,rdx 898 | mov rdx,r8 899 | movsxd r8,r9d 900 | xor rdi,rdi 901 | mov edi,width_ 902 | xor rsi,rsi 903 | mov esi,height 904 | mov r10,16 905 | 906 | yloop: 907 | xor rcx,rcx 908 | xloop: 909 | movdqa xmm0,[rax+rcx] 910 | pand xmm0,[rbx+rcx] 911 | movdqa [rdx+rcx],xmm0 912 | add rcx,r10 913 | cmp rcx,rdi 914 | jl xloop 915 | 916 | add rax,r8 917 | add rbx,r8 918 | add rdx,r8 919 | dec esi 920 | jnz yloop 921 | 922 | pop rdi 923 | pop rsi 924 | pop rbx 925 | pop rbp 926 | 927 | ret 928 | 929 | andMasks_SSE2 endp 930 | 931 | 932 | 933 | ;checkSceneChange_SSE2 proc s1p:dword,s2p:dword,stride:dword,width_:dword,height:dword,diffp:dword 934 | ; s1p = rcx 935 | ; s2p = rdx 936 | ; stride = r8d 937 | ; width_ = r9d 938 | 939 | checkSceneChange_SSE2 proc public frame 940 | 941 | height equ dword ptr [rbp+48] 942 | diffp equ qword ptr [rbp+56] 943 | 944 | push rbp 945 | .pushreg rbp 946 | mov rbp,rsp 947 | push rsi 948 | .pushreg rsi 949 | .endprolog 950 | 951 | mov rax,rcx 952 | mov rsi,rdx 953 | movsxd r8,r8d 954 | xor rdx,rdx 955 | mov edx,r9d 956 | xor r9,r9 957 | mov r9d,height 958 | mov r10,16 959 | 960 | pxor xmm1,xmm1 961 | 962 | yloop: 963 | xor rcx,rcx 964 | xloop: 965 | movdqa xmm0,[rax+rcx] 966 | psadbw xmm0,[rsi+rcx] 967 | paddq xmm1,xmm0 968 | 969 | add rcx,r10 970 | cmp rcx,rdx 971 | jl xloop 972 | 973 | add rax,r8 974 | add rsi,r8 975 | dec r9d 976 | jnz yloop 977 | 978 | 979 | movdqa xmm2,xmm1 980 | psrldq xmm1,8 981 | paddq xmm2,xmm1 982 | 983 | mov rax,diffp 984 | movd QWORD PTR [rax],xmm2 985 | 986 | pop rsi 987 | pop rbp 988 | 989 | ret 990 | 991 | checkSceneChange_SSE2 endp 992 | 993 | 994 | 995 | ;VerticalBlur3_SSE2 proc srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword 996 | ; srcp = rcx 997 | ; dstp = rdx 998 | ; stride = r8d 999 | ; width_ = r9d 1000 | 1001 | VerticalBlur3_SSE2 proc public frame 1002 | 1003 | height equ dword ptr [rbp+48] 1004 | 1005 | push rbp 1006 | .pushreg rbp 1007 | mov rbp,rsp 1008 | push rbx 1009 | .pushreg rbx 1010 | push rsi 1011 | .pushreg rsi 1012 | push rdi 1013 | .pushreg rdi 1014 | 1015 | sub rsp,32 1016 | .allocstack 32 1017 | movdqu oword ptr[rsp],xmm6 1018 | .savexmm128 xmm6,0 1019 | movdqu oword ptr[rsp+16],xmm7 1020 | .savexmm128 xmm7,16 1021 | .endprolog 1022 | 1023 | mov rax,rcx 1024 | mov rbx,rdx 1025 | movsxd r8,r8d 1026 | mov rsi,rax 1027 | mov rdi,rax 1028 | sub rsi,r8 1029 | add rdi,r8 1030 | xor rdx,rdx 1031 | mov edx,r9d 1032 | xor r9,r9 1033 | mov r9d,height 1034 | mov r10,2 1035 | mov r11,16 1036 | 1037 | ; 0x0002,for rounding 1038 | pcmpeqb xmm6,xmm6 1039 | psrlw xmm6,15 1040 | psllw xmm6,1 1041 | 1042 | pxor xmm7,xmm7 1043 | 1044 | xor rcx,rcx 1045 | 1046 | toploop: 1047 | movdqa xmm0,[rax+rcx] 1048 | pavgb xmm0,[rdi+rcx] 1049 | movdqa [rbx+rcx],xmm0 1050 | add rcx,r11 1051 | cmp rcx,rdx 1052 | jl toploop 1053 | 1054 | add rsi,r8 1055 | add rax,r8 1056 | add rdi,r8 1057 | add rbx,r8 1058 | 1059 | sub r9d,r10d ; the main loop processes 2 lines fewer than the height 1060 | 1061 | yloop: 1062 | xor rcx,rcx 1063 | xloop: 1064 | movdqa xmm0,[rsi+rcx] 1065 | movdqa xmm1,[rax+rcx] 1066 | movdqa xmm2,[rdi+rcx] 1067 | movdqa xmm3,xmm0 1068 | movdqa xmm4,xmm1 1069 | movdqa xmm5,xmm2 1070 | punpcklbw xmm0,xmm7 1071 | punpcklbw xmm1,xmm7 1072 | punpcklbw xmm2,xmm7 1073 | punpckhbw xmm3,xmm7 1074 | punpckhbw xmm4,xmm7 1075 | punpckhbw xmm5,xmm7 1076 | 1077 | ; add bottom to top 1078 | paddw xmm0,xmm2 1079 | paddw xmm3,xmm5 1080 | 1081 | ; multiply center by 2 1082 | psllw xmm1,1 1083 | psllw xmm4,1 1084 | 1085 | ; add center to sum 1086 | paddw xmm0,xmm1 1087 | paddw xmm3,xmm4 1088 | 1089 | ; add 2 to sum 1090 | paddw xmm0,xmm6 1091 | paddw xmm3,xmm6 1092 | 1093 | ; divide by 4 1094 | psrlw xmm0,2 1095 | psrlw xmm3,2 1096 | packuswb xmm0,xmm3 1097 | movdqa [rbx+rcx],xmm0 1098 | 1099 | add rcx,r11 1100 | cmp rcx,rdx 1101 | jl xloop 1102 | 1103 | add rsi,r8 1104 | add rax,r8 1105 | add rdi,r8 1106 | add rbx,r8 1107 | dec r9d 1108 | jnz yloop 1109 | 1110 | xor rcx,rcx 1111 | 1112 | bottomloop: 1113 | movdqa xmm0,[rsi+rcx] 1114 | pavgb xmm0,[rax+rcx] 1115 | movdqa [rbx+rcx],xmm0 1116 | add rcx,r11 1117 | cmp rcx,rdx 1118 | jl bottomloop 1119 | 1120 | movdqu xmm7,oword ptr[rsp+16] 1121 | movdqu xmm6,oword ptr[rsp] 1122 | add rsp,32 1123 | 1124 | pop rdi 1125 | pop rsi 1126 | pop rbx 1127 | pop rbp 1128 | 1129 | ret 1130 | 1131 | VerticalBlur3_SSE2 endp 1132 | 1133 | 1134 | 1135 | ;HorizontalBlur3_SSE2 proc srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword 1136 | ; srcp = rcx 1137 | ; dstp = rdx 1138 | ; stride = r8d 1139 | ; width_ = r9d 1140 | 1141 | HorizontalBlur3_SSE2 proc public frame 1142 | 1143 | height equ dword ptr [rbp+48] 1144 | 1145 | push rbp 1146 | .pushreg rbp 1147 | mov rbp,rsp 1148 | sub rsp,32 1149 | .allocstack 32 1150 | movdqu oword ptr[rsp],xmm6 1151 | .savexmm128 xmm6,0 1152 | movdqu oword ptr[rsp+16],xmm7 1153 | .savexmm128 xmm7,16 1154 | .endprolog 1155 | 1156 | mov rax,rcx 1157 | movsxd r8,r8d 1158 | mov r10d,height 1159 | mov r11,16 1160 | 1161 | pxor xmm7,xmm7 1162 | 1163 | ; 0x0002,for rounding 1164 | pcmpeqb xmm6,xmm6 1165 | psrlw xmm6,15 1166 | psllw xmm6,1 1167 | 1168 | yloop: 1169 | xor rcx,rcx 1170 | xloop: 1171 | movdqu xmm0,[rax+rcx-1] 1172 | movdqa xmm1,[rax+rcx] 1173 | movdqu xmm2,[rax+rcx+1] 1174 | movdqa xmm3,xmm0 1175 | movdqa xmm4,xmm1 1176 | movdqa xmm5,xmm2 1177 | 1178 | punpcklbw xmm0,xmm7 1179 | punpcklbw xmm1,xmm7 1180 | punpcklbw xmm2,xmm7 1181 | punpckhbw xmm3,xmm7 1182 | punpckhbw xmm4,xmm7 1183 | punpckhbw xmm5,xmm7 1184 | 1185 | ; center * 2 1186 | psllw xmm1,1 1187 | psllw xmm4,1 1188 | paddw xmm1,xmm0 1189 | paddw xmm4,xmm3 1190 | paddw xmm1,xmm2 1191 | paddw xmm4,xmm5 1192 | 1193 | ; add 2 to sum 1194 | paddw xmm1,xmm6 1195 | paddw xmm4,xmm6 1196 | 1197 | ; divide by 4 1198 | psrlw xmm1,2 1199 | psrlw xmm4,2 1200 | packuswb xmm1,xmm4 1201 | movdqa [rdx+rcx],xmm1 1202 | 1203 | add rcx,r11 1204 | cmp rcx,r9 1205 | jl xloop 1206 | 1207 | add rax,r8 1208 | add rdx,r8 1209 | dec r10d 1210 | jnz yloop 1211 | 1212 | movdqu xmm7,oword ptr[rsp+16] 1213 | movdqu xmm6,oword ptr[rsp] 1214 | add rsp,32 1215 | pop rbp 1216 | 1217 | ret 1218 | 1219 | HorizontalBlur3_SSE2 endp 1220 | 1221 | 1222 | 1223 | ;HorizontalBlur6_SSE2 proc srcp:dword,dstp:dword,stride:dword,width_:dword,height:dword 1224 | ; srcp = rcx 1225 | ; dstp = rdx 1226 | ; stride = r8d 1227 | ; width_ = r9d 1228 | 1229 | HorizontalBlur6_SSE2 proc public frame 1230 | 1231 | height equ dword ptr [rbp+48] 1232 | 1233 | push rbp 1234 | .pushreg rbp 1235 | mov rbp,rsp 1236 | sub rsp,112 1237 | .allocstack 112 1238 | movdqu oword ptr[rsp],xmm6 1239 | .savexmm128 xmm6,0 1240 | movdqu oword ptr[rsp+16],xmm7 1241 | .savexmm128 xmm7,16 1242 | movdqu oword ptr[rsp+32],xmm8 1243 | .savexmm128 xmm8,32 1244 | movdqu oword ptr[rsp+48],xmm9 1245 | .savexmm128 xmm9,48 1246 | movdqu oword ptr[rsp+64],xmm10 1247 | .savexmm128 xmm10,64 1248 | movdqu oword ptr[rsp+80],xmm11 1249 | .savexmm128 xmm11,80 1250 | movdqu oword ptr[rsp+96],xmm12 1251 | .savexmm128 xmm12,96 1252 | .endprolog 1253 | 1254 | mov rax,rcx 1255 | movsxd r8,r8d 1256 | mov r10d,height 1257 | mov r11,16 1258 | 1259 | pxor xmm12,xmm12 1260 | 1261 | ; 0x0006 1262 | pcmpeqb xmm11,xmm11 1263 | psrlw xmm11,14 1264 | psllw xmm11,1 1265 | 1266 | ; 0x0008 1267 | pcmpeqb xmm10,xmm10 1268 | psrlw xmm10,15 1269 | psllw xmm10,3 1270 | 1271 | yloop: 1272 | xor rcx,rcx 1273 | xloop: 1274 | movdqu xmm0,[rax+rcx-2] 1275 | movdqu xmm1,[rax+rcx-1] 1276 | movdqa xmm2,[rax+rcx] 1277 | movdqu xmm3,[rax+rcx+1] 1278 | movdqu xmm4,[rax+rcx+2] 1279 | movdqa xmm5,xmm0 1280 | movdqa xmm6,xmm1 1281 | movdqa xmm7,xmm2 1282 | movdqa xmm8,xmm3 1283 | movdqa xmm9,xmm4 1284 | punpcklbw xmm0,xmm12 1285 | punpcklbw xmm1,xmm12 1286 | punpcklbw xmm2,xmm12 1287 | punpcklbw xmm3,xmm12 1288 | punpcklbw xmm4,xmm12 1289 | punpckhbw xmm5,xmm12 1290 | punpckhbw xmm6,xmm12 1291 | punpckhbw xmm7,xmm12 1292 | punpckhbw xmm8,xmm12 1293 | punpckhbw xmm9,xmm12 1294 | 1295 | ; srcp[x-2] + srcp[x+2] 1296 | paddw xmm0,xmm4 1297 | paddw xmm5,xmm9 1298 | 1299 | ; srcp[x-1] + srcp[x+1] 1300 | paddw xmm1,xmm3 1301 | paddw xmm6,xmm8 1302 | 1303 | ; (srcp[x-1 + srcp[x+1])*4 1304 | psllw xmm1,2 1305 | psllw xmm6,2 1306 | 1307 | ; (srcp[x-1] + srcp[x+1])*4 + srcp[x-2] + srcp[x+2] 1308 | paddw xmm0,xmm1 1309 | paddw xmm5,xmm6 1310 | 1311 | ; srcp[x] * 6 1312 | pmullw xmm2,xmm11 1313 | pmullw xmm7,xmm11 1314 | paddw xmm0,xmm2 1315 | paddw xmm5,xmm7 1316 | 1317 | ; add 8 1318 | paddw xmm0,xmm10 1319 | paddw xmm5,xmm10 1320 | 1321 | ; divide by 16 1322 | psrlw xmm0,4 1323 | psrlw xmm5,4 1324 | packuswb xmm0,xmm5 1325 | movdqa [rdx+rcx],xmm0 1326 | 1327 | add rcx,r11 1328 | cmp rcx,r9 1329 | jl xloop 1330 | 1331 | add rax,r8 1332 | add rdx,r8 1333 | dec r10d 1334 | jnz yloop 1335 | 1336 | movdqu xmm12,oword ptr[rsp+96] 1337 | movdqu xmm11,oword ptr[rsp+80] 1338 | movdqu xmm10,oword ptr[rsp+64] 1339 | movdqu xmm9,oword ptr[rsp+48] 1340 | movdqu xmm8,oword ptr[rsp+32] 1341 | movdqu xmm7,oword ptr[rsp+16] 1342 | movdqu xmm6,oword ptr[rsp] 1343 | add rsp,112 1344 | pop rbp 1345 | 1346 | ret 1347 | 1348 | HorizontalBlur6_SSE2 endp 1349 | 1350 | 1351 | 1352 | end -------------------------------------------------------------------------------- /TComb/TComb_core.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ** TComb v2.x for Avisynth 2.6 and Avisynth+ 3 | ** 4 | ** TComb is a temporal comb filter (it reduces cross-luminance (rainbowing) 5 | ** and cross-chrominance (dot crawl) artifacts in static areas of the picture). 6 | ** It will ONLY work with NTSC material, and WILL NOT work with telecined material 7 | ** where the rainbowing/dotcrawl was introduced prior to the telecine process! 8 | ** It must be used before ivtc or deinterlace. 9 | ** 10 | ** Copyright (C) 2021 Ferenc Pintér 11 | ** 12 | ** Copyright (C) 2015 Shane Panke 13 | ** 14 | ** Copyright (C) 2005-2006 Kevin Stone 15 | ** 16 | ** This program is free software; you can redistribute it and/or modify 17 | ** it under the terms of the GNU General Public License as published by 18 | ** the Free Software Foundation; either version 2 of the License, or 19 | ** (at your option) any later version. 20 | ** 21 | ** This program is distributed in the hope that it will be useful, 22 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of 23 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 24 | ** GNU General Public License for more details. 25 | ** 26 | ** You should have received a copy of the GNU General Public License 27 | ** along with this program; if not, write to the Free Software 28 | ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 29 | */ 30 | 31 | #include "TComb.h" 32 | #include 33 | 34 | #ifdef INTEL_INTRINSICS 35 | #include 36 | #include 37 | #endif 38 | #include 39 | 40 | template 41 | void checkSceneChangePlanar_1_c(const pixel_t* srcp, const pixel_t* nxtp, 42 | int height, int width, int src_pitch, int nxt_pitch, uint64_t& diff) 43 | { 44 | for (int y = 0; y < height; ++y) 45 | { 46 | uint32_t rowdiff = 0; 47 | for (int x = 0; x < width; x += 4) 48 | { 49 | rowdiff += abs(srcp[x + 0] - nxtp[x + 0]); 50 | rowdiff += abs(srcp[x + 1] - nxtp[x + 1]); 51 | rowdiff += abs(srcp[x + 2] - nxtp[x + 2]); 52 | rowdiff += abs(srcp[x + 3] - nxtp[x + 3]); 53 | } 54 | diff += rowdiff; 55 | srcp += src_pitch; 56 | nxtp += nxt_pitch; 57 | } 58 | } 59 | 60 | // instantiate 61 | template void checkSceneChangePlanar_1_c(const uint8_t* srcp, const uint8_t* nxtp, 62 | int height, int width, int src_pitch, int nxt_pitch, uint64_t& diff); 63 | 64 | #ifdef INTEL_INTRINSICS 65 | void checkSceneChangePlanar_1_SSE2_simd(const uint8_t* prvp, const uint8_t* srcp, 66 | int height, int width, int prv_pitch, int src_pitch, uint64_t& diffp) 67 | { 68 | __m128i sum = _mm_setzero_si128(); 69 | while (height--) { 70 | for (int x = 0; x < width; x += 16) 71 | { 72 | __m128i src1 = _mm_load_si128(reinterpret_cast(prvp + x)); 73 | __m128i src2 = _mm_load_si128(reinterpret_cast(srcp + x)); 74 | __m128i sad = _mm_sad_epu8(src1, src2); 75 | sum = _mm_add_epi32(sum, sad); 76 | } 77 | prvp += prv_pitch; 78 | srcp += src_pitch; 79 | } 80 | __m128i res = _mm_add_epi32(sum, _mm_srli_si128(sum, 8)); 81 | diffp = _mm_cvtsi128_si32(res); 82 | } 83 | #endif 84 | 85 | #ifdef INTEL_INTRINSICS 86 | void andMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height) 87 | { 88 | for (int y = 0; y < height; ++y) 89 | { 90 | for (int x = 0; x < width; x += 16) 91 | { 92 | __m128i src1 = _mm_load_si128(reinterpret_cast(s1p + x)); 93 | __m128i src2 = _mm_load_si128(reinterpret_cast(s2p + x)); 94 | __m128i result = _mm_and_si128(src1, src2); 95 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result); 96 | } 97 | 98 | s1p += stride; 99 | s2p += stride; 100 | dstp += stride; 101 | } 102 | } 103 | #endif 104 | 105 | void andMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height) 106 | { 107 | for (int y = 0; y < height; ++y) 108 | { 109 | for (int x = 0; x < width; ++x) 110 | dstp[x] = (s1p[x] & s2p[x]); 111 | 112 | s1p += stride; 113 | s2p += stride; 114 | dstp += stride; 115 | } 116 | } 117 | 118 | #ifdef INTEL_INTRINSICS 119 | void orAndMasks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height) 120 | { 121 | for (int y = 0; y < height; ++y) 122 | { 123 | for (int x = 0; x < width; x += 16) 124 | { 125 | __m128i src1 = _mm_load_si128(reinterpret_cast(s1p + x)); 126 | __m128i src2 = _mm_load_si128(reinterpret_cast(s2p + x)); 127 | __m128i dst = _mm_load_si128(reinterpret_cast(dstp + x)); 128 | __m128i result = _mm_or_si128(dst, _mm_and_si128(src1, src2)); 129 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result); 130 | } 131 | 132 | s1p += stride; 133 | s2p += stride; 134 | dstp += stride; 135 | } 136 | } 137 | #endif 138 | 139 | void orAndMasks_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height) 140 | { 141 | for (int y = 0; y < height; ++y) 142 | { 143 | for (int x = 0; x < width; ++x) 144 | dstp[x] |= (s1p[x] & s2p[x]); 145 | 146 | s1p += stride; 147 | s2p += stride; 148 | dstp += stride; 149 | } 150 | } 151 | 152 | #ifdef INTEL_INTRINSICS 153 | void or3Masks_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, uint8_t* dstp, int stride, int width, int height) 154 | { 155 | for (int y = 0; y < height; ++y) 156 | { 157 | for (int x = 0; x < width; x += 16) 158 | { 159 | __m128i src1 = _mm_load_si128(reinterpret_cast(s1p + x)); 160 | __m128i src2 = _mm_load_si128(reinterpret_cast(s2p + x)); 161 | __m128i src3 = _mm_load_si128(reinterpret_cast(s3p + x)); 162 | __m128i result = _mm_or_si128(src1, _mm_or_si128(src2, src3)); 163 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result); 164 | } 165 | 166 | s1p += stride; 167 | s2p += stride; 168 | s3p += stride; 169 | dstp += stride; 170 | } 171 | } 172 | #endif 173 | 174 | void or3Masks_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, uint8_t* dstp, int stride, int width, int height) 175 | { 176 | for (int y = 0; y < height; ++y) 177 | { 178 | for (int x = 0; x < width; ++x) 179 | dstp[x] = (s1p[x] | s2p[x] | s3p[x]); 180 | 181 | s1p += stride; 182 | s2p += stride; 183 | s3p += stride; 184 | dstp += stride; 185 | } 186 | } 187 | 188 | #ifdef INTEL_INTRINSICS 189 | void calcAverages_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height) 190 | { 191 | for (int y = 0; y < height; ++y) 192 | { 193 | for (int x = 0; x < width; x += 16) 194 | { 195 | __m128i src1 = _mm_load_si128(reinterpret_cast(s1p + x)); 196 | __m128i src2 = _mm_load_si128(reinterpret_cast(s2p + x)); 197 | __m128i result = _mm_avg_epu8(src1, src2); 198 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result); 199 | } 200 | 201 | s1p += stride; 202 | s2p += stride; 203 | dstp += stride; 204 | } 205 | } 206 | #endif 207 | 208 | void calcAverages_c(const uint8_t* s1p, const uint8_t* s2p, uint8_t* dstp, int stride, int width, int height) 209 | { 210 | for (int y = 0; y < height; ++y) 211 | { 212 | for (int x = 0; x < width; ++x) 213 | dstp[x] = (s1p[x] + s2p[x] + 1) >> 1; 214 | 215 | s1p += stride; 216 | s2p += stride; 217 | dstp += stride; 218 | } 219 | } 220 | 221 | #ifdef INTEL_INTRINSICS 222 | void MinMax_SSE2_simd(const uint8_t* srcp, uint8_t* dstpMin, uint8_t* dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh) 223 | { 224 | const uint8_t* srcpp = srcp - src_stride; 225 | const uint8_t* srcpn = srcp + src_stride; 226 | 227 | const auto threshp = _mm_set1_epi8(thresh); 228 | 229 | for (int y = 0; y < height; ++y) 230 | { 231 | for (int x = 0; x < width; x += 16) 232 | { 233 | __m128i srcpp_m_1 = _mm_load_si128(reinterpret_cast(srcpp + x - 1)); 234 | __m128i srcpp_0 = _mm_loadu_si128(reinterpret_cast(srcpp + x)); 235 | __m128i srcpp_p_1 = _mm_loadu_si128(reinterpret_cast(srcpp + x + 1)); 236 | 237 | __m128i srcp_m_1 = _mm_load_si128(reinterpret_cast(srcp + x - 1)); 238 | __m128i srcp_0 = _mm_loadu_si128(reinterpret_cast(srcp + x)); 239 | __m128i srcp_p_1 = _mm_loadu_si128(reinterpret_cast(srcp + x + 1)); 240 | 241 | __m128i srcpn_m_1 = _mm_load_si128(reinterpret_cast(srcpn + x - 1)); 242 | __m128i srcpn_0 = _mm_loadu_si128(reinterpret_cast(srcpn + x)); 243 | __m128i srcpn_p_1 = _mm_loadu_si128(reinterpret_cast(srcpn + x + 1)); 244 | 245 | auto tmpmin = _mm_min_epu8(_mm_min_epu8(_mm_min_epu8(_mm_min_epu8(srcpp_m_1, srcpp_0), 246 | _mm_min_epu8(srcpp_p_1, srcp_m_1)), 247 | _mm_min_epu8(_mm_min_epu8(srcp_0, srcp_p_1), 248 | _mm_min_epu8(srcpn_m_1, srcpn_0))), srcpn_p_1); 249 | 250 | auto min = _mm_subs_epu8(tmpmin, threshp); 251 | 252 | _mm_store_si128(reinterpret_cast<__m128i*>(dstpMin + x), min); 253 | 254 | auto tmpmax = _mm_max_epu8(_mm_max_epu8(_mm_max_epu8(_mm_max_epu8(srcpp_m_1, srcpp_0), 255 | _mm_max_epu8(srcpp_p_1, srcp_m_1)), 256 | _mm_max_epu8(_mm_max_epu8(srcp_0, srcp_p_1), 257 | _mm_max_epu8(srcpn_m_1, srcpn_0))), srcpn_p_1); 258 | 259 | auto max = _mm_adds_epu8(tmpmax, threshp); // future warning: 10-14 bitss 260 | 261 | _mm_store_si128(reinterpret_cast<__m128i*>(dstpMax + x), max); 262 | } 263 | 264 | srcpp += src_stride; 265 | srcp += src_stride; 266 | srcpn += src_stride; 267 | dstpMin += dmin_stride; 268 | dstpMax += dmin_stride; 269 | } 270 | } 271 | #endif 272 | 273 | void MinMax_c(const uint8_t* srcp, uint8_t* dstpMin, uint8_t* dstpMax, int src_stride, int dmin_stride, int width, int height, int thresh) 274 | { 275 | const uint8_t* srcpp = srcp - src_stride; 276 | const uint8_t* srcpn = srcp + src_stride; 277 | 278 | for (int y = 0; y < height; ++y) 279 | { 280 | for (int x = 0; x < width; ++x) 281 | { 282 | dstpMin[x] = std::max(std::min(std::min(std::min(std::min(srcpp[x - 1], srcpp[x]), 283 | std::min(srcpp[x + 1], srcp[x - 1])), 284 | std::min(std::min(srcp[x], srcp[x + 1]), 285 | std::min(srcpn[x - 1], srcpn[x]))), srcpn[x + 1]) - thresh, 0); 286 | dstpMax[x] = std::min(std::max(std::max(std::max(std::max(srcpp[x - 1], srcpp[x]), 287 | std::max(srcpp[x + 1], srcp[x - 1])), 288 | std::max(std::max(srcp[x], srcp[x + 1]), 289 | std::max(srcpn[x - 1], srcpn[x]))), srcpn[x + 1]) + thresh, 255); 290 | } 291 | 292 | srcpp += src_stride; 293 | srcp += src_stride; 294 | srcpn += src_stride; 295 | dstpMin += dmin_stride; 296 | dstpMax += dmin_stride; 297 | } 298 | } 299 | 300 | #ifdef INTEL_INTRINSICS 301 | void absDiff_SSE2_simd(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height) 302 | { 303 | for (int y = 0; y < height; ++y) 304 | { 305 | for (int x = 0; x < width; x += 16) { 306 | auto src1 = _mm_load_si128(reinterpret_cast(srcp1 + x)); 307 | auto src2 = _mm_load_si128(reinterpret_cast(srcp2 + x)); 308 | auto diff12 = _mm_subs_epu8(src1, src2); 309 | auto diff21 = _mm_subs_epu8(src2, src1); 310 | auto diff = _mm_or_si128(diff12, diff21); 311 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), diff); 312 | } 313 | 314 | srcp1 += stride; 315 | srcp2 += stride; 316 | dstp += stride; 317 | } 318 | } 319 | #endif 320 | 321 | void absDiff_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height) 322 | { 323 | for (int y = 0; y < height; ++y) 324 | { 325 | for (int x = 0; x < width; ++x) 326 | dstp[x] = abs(srcp1[x] - srcp2[x]); 327 | 328 | srcp1 += stride; 329 | srcp2 += stride; 330 | dstp += stride; 331 | } 332 | } 333 | 334 | #ifdef INTEL_INTRINSICS 335 | void buildFinalMask_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* m1p, uint8_t* dstp, int stride, int width, int height, int thresh) 336 | { 337 | auto thresh_minus1 = _mm_set1_epi8(thresh-1); 338 | auto zero = _mm_setzero_si128(); 339 | 340 | for (int y = 0; y < height; ++y) 341 | { 342 | for (int x = 0; x < width; x += 16) 343 | { 344 | auto src1 = _mm_load_si128(reinterpret_cast(s1p + x)); 345 | auto src2 = _mm_load_si128(reinterpret_cast(s2p + x)); 346 | auto diff12 = _mm_subs_epu8(src1, src2); 347 | auto diff21 = _mm_subs_epu8(src2, src1); 348 | auto diff = _mm_or_si128(diff12, diff21); 349 | auto addedsthresh = _mm_subs_epu8(diff, thresh_minus1); 350 | auto cmpresult = _mm_cmpeq_epi8(addedsthresh, zero); 351 | auto m1 = _mm_load_si128(reinterpret_cast(m1p + x)); 352 | auto tmp = _mm_and_si128(cmpresult, m1); 353 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), tmp); 354 | 355 | /* 356 | if (m1p[x] && abs(s1p[x] - s2p[x]) < thresh) 357 | dstp[x] = 0xFF; 358 | else 359 | dstp[x] = 0; 360 | */ 361 | } 362 | 363 | m1p += stride; 364 | s1p += stride; 365 | s2p += stride; 366 | dstp += stride; 367 | } 368 | } 369 | #endif 370 | 371 | void buildFinalMask_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* m1p, uint8_t* dstp, int stride, int width, int height, int thresh) 372 | { 373 | for (int y = 0; y < height; ++y) 374 | { 375 | for (int x = 0; x < width; ++x) 376 | { 377 | if (m1p[x] && abs(s1p[x] - s2p[x]) < thresh) 378 | dstp[x] = 0xFF; 379 | else 380 | dstp[x] = 0; 381 | } 382 | 383 | m1p += stride; 384 | s1p += stride; 385 | s2p += stride; 386 | dstp += stride; 387 | } 388 | } 389 | 390 | #ifdef INTEL_INTRINSICS 391 | void checkOscillation5_SSE2_simd(const uint8_t* p2p, const uint8_t* p1p, const uint8_t* s1p, const uint8_t* n1p, const uint8_t* n2p, uint8_t* dstp, int stride, int width, int height, int thresh) 392 | { 393 | int threshm1 = std::min(std::max(thresh - 1, 0), 255); 394 | auto thresh_minus1 = _mm_set1_epi8(threshm1); 395 | auto one = _mm_set1_epi8(1); 396 | auto zero = _mm_setzero_si128(); 397 | 398 | for (int y = 0; y < height; ++y) 399 | { 400 | for (int x = 0; x < width; x += 16) 401 | { 402 | // trick: x < thresh ==> x <= (thresh - 1) ==> x - (thresh - 1) <= 0 ==> sub_sat(x, thresh - 1) == 0 403 | // pcmpeqb(psubusb(x, thresh - 1), zero) : 0xFF where x < thresh 404 | 405 | __m128i src_p2p = _mm_load_si128(reinterpret_cast(p2p + x)); 406 | __m128i src_s1p = _mm_load_si128(reinterpret_cast(s1p + x)); 407 | __m128i src_n2p = _mm_load_si128(reinterpret_cast(n2p + x)); 408 | __m128i src_p1p = _mm_load_si128(reinterpret_cast(p1p + x)); 409 | __m128i src_n1p = _mm_load_si128(reinterpret_cast(n1p + x)); 410 | 411 | auto min31 = _mm_min_epu8(_mm_min_epu8(src_p2p, src_s1p), src_n2p); 412 | auto max31 = _mm_max_epu8(_mm_max_epu8(src_p2p, src_s1p), src_n2p); 413 | auto min22 = _mm_min_epu8(src_p1p, src_n1p); 414 | auto max22 = _mm_max_epu8(src_p1p, src_n1p); 415 | 416 | auto cmp1 = _mm_cmpeq_epi8(_mm_subs_epu8(max22, _mm_subs_epu8(min31, one)), zero); 417 | auto cmp2 = _mm_cmpeq_epi8(_mm_subs_epu8(max31, _mm_subs_epu8(min22, one)), zero); 418 | // No check for (max22 == 0) or (max31 == 0), like in C, sub_sat handles automatically 419 | auto maxmindiff31 = _mm_subs_epu8(max31, min31); 420 | auto cmp3 = _mm_cmpeq_epi8(_mm_subs_epu8(maxmindiff31, thresh_minus1), zero); 421 | auto maxmindiff22 = _mm_subs_epu8(max22, min22); 422 | auto cmp4 = _mm_cmpeq_epi8(_mm_subs_epu8(maxmindiff22, thresh_minus1), zero); 423 | 424 | auto result = _mm_and_si128(_mm_or_si128(cmp1, cmp2), _mm_and_si128(cmp3, cmp4)); 425 | /* 426 | if (((max22 < min31) || max22 == 0 || (max31 < min22) || max31 == 0) && 427 | max31 - min31 < thresh && max22 - min22 < thresh) 428 | dstp[x] = 0xFF; 429 | else dstp[x] = 0; 430 | */ 431 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result); 432 | } 433 | 434 | p2p += stride; 435 | p1p += stride; 436 | s1p += stride; 437 | n1p += stride; 438 | n2p += stride; 439 | dstp += stride; 440 | } 441 | } 442 | #endif 443 | 444 | void checkOscillation5_c(const uint8_t* p2p, const uint8_t* p1p, const uint8_t* s1p, const uint8_t* n1p, const uint8_t* n2p, uint8_t* dstp, int stride, int width, int height, int thresh) 445 | { 446 | for (int y = 0; y < height; ++y) 447 | { 448 | for (int x = 0; x < width; ++x) 449 | { 450 | const int min31 = min3(p2p[x], s1p[x], n2p[x]); 451 | const int max31 = max3(p2p[x], s1p[x], n2p[x]); 452 | const int min22 = std::min(p1p[x], n1p[x]); 453 | const int max22 = std::max(p1p[x], n1p[x]); 454 | if (((max22 < min31) || max22 == 0 || (max31 < min22) || max31 == 0) && 455 | max31 - min31 < thresh && max22 - min22 < thresh) 456 | dstp[x] = 0xFF; 457 | else dstp[x] = 0; 458 | } 459 | 460 | p2p += stride; 461 | p1p += stride; 462 | s1p += stride; 463 | n1p += stride; 464 | n2p += stride; 465 | dstp += stride; 466 | } 467 | } 468 | 469 | #ifdef INTEL_INTRINSICS 470 | void absDiffAndMinMaskThresh_SSE2_simd(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height, int thresh) 471 | { 472 | int threshm1 = std::min(std::max(thresh - 1, 0), 255); 473 | auto thresh_minus1 = _mm_set1_epi8(threshm1); 474 | auto zero = _mm_setzero_si128(); 475 | 476 | for (int y = 0; y < height; ++y) 477 | { 478 | for (int x = 0; x < width; x += 16) 479 | { 480 | __m128i src1 = _mm_load_si128(reinterpret_cast(srcp1 + x)); 481 | __m128i src2 = _mm_load_si128(reinterpret_cast(srcp2 + x)); 482 | __m128i dst = _mm_load_si128(reinterpret_cast(dstp + x)); 483 | auto diff12 = _mm_subs_epu8(src1, src2); 484 | auto diff21 = _mm_subs_epu8(src2, src1); 485 | auto diff = _mm_or_si128(diff12, diff21); 486 | 487 | auto tmp_min = _mm_min_epu8(diff, dst); 488 | auto result = _mm_cmpeq_epi8(_mm_subs_epu8(tmp_min, thresh_minus1), zero); 489 | /* 490 | if (diff < dstp[x]) dstp[x] = diff; // min 491 | if (dstp[x] < thresh) 492 | dstp[x] = 0xFF; 493 | else 494 | dstp[x] = 0; 495 | */ 496 | 497 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result); 498 | 499 | } 500 | 501 | srcp1 += stride; 502 | srcp2 += stride; 503 | dstp += stride; 504 | } 505 | } 506 | #endif 507 | 508 | void absDiffAndMinMaskThresh_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height, int thresh) 509 | { 510 | for (int y = 0; y < height; ++y) 511 | { 512 | for (int x = 0; x < width; ++x) 513 | { 514 | const int diff = abs(srcp1[x] - srcp2[x]); 515 | if (diff < dstp[x]) 516 | dstp[x] = diff; 517 | if (dstp[x] < thresh) 518 | dstp[x] = 0xFF; 519 | else 520 | dstp[x] = 0; 521 | } 522 | 523 | srcp1 += stride; 524 | srcp2 += stride; 525 | dstp += stride; 526 | } 527 | } 528 | 529 | #ifdef INTEL_INTRINSICS 530 | void absDiffAndMinMask_SSE2_simd(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height) 531 | { 532 | for (int y = 0; y < height; ++y) 533 | { 534 | for (int x = 0; x < width; x += 16) 535 | { 536 | __m128i src1 = _mm_load_si128(reinterpret_cast(srcp1 + x)); 537 | __m128i src2 = _mm_load_si128(reinterpret_cast(srcp2 + x)); 538 | __m128i dst = _mm_load_si128(reinterpret_cast(dstp + x)); 539 | auto diff12 = _mm_subs_epu8(src1, src2); 540 | auto diff21 = _mm_subs_epu8(src2, src1); 541 | auto diff = _mm_or_si128(diff12, diff21); 542 | 543 | auto tmp_min = _mm_min_epu8(diff, dst); 544 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), tmp_min); 545 | 546 | /* 547 | const int diff = abs(srcp1[x] - srcp2[x]); 548 | if (diff < dstp[x]) 549 | dstp[x] = diff; 550 | */ 551 | } 552 | 553 | srcp1 += stride; 554 | srcp2 += stride; 555 | dstp += stride; 556 | } 557 | } 558 | #endif 559 | 560 | void absDiffAndMinMask_c(const uint8_t* srcp1, const uint8_t* srcp2, uint8_t* dstp, int stride, int width, int height) 561 | { 562 | for (int y = 0; y < height; ++y) 563 | { 564 | for (int x = 0; x < width; ++x) 565 | { 566 | const int diff = abs(srcp1[x] - srcp2[x]); 567 | if (diff < dstp[x]) 568 | dstp[x] = diff; 569 | } 570 | 571 | srcp1 += stride; 572 | srcp2 += stride; 573 | dstp += stride; 574 | } 575 | } 576 | 577 | #ifdef INTEL_INTRINSICS 578 | void checkAvgOscCorrelation_SSE2_simd(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, const uint8_t* s4p, uint8_t* dstp, int stride, int width, int height, int thresh) 579 | { 580 | int threshm1 = std::min(std::max(thresh - 1, 0), 255); 581 | auto thresh_minus1 = _mm_set1_epi8(threshm1); 582 | auto zero = _mm_setzero_si128(); 583 | 584 | for (int y = 0; y < height; ++y) 585 | { 586 | for (int x = 0; x < width; x += 16) 587 | { 588 | __m128i s1 = _mm_load_si128(reinterpret_cast(s1p + x)); 589 | __m128i s2 = _mm_load_si128(reinterpret_cast(s2p + x)); 590 | __m128i s3 = _mm_load_si128(reinterpret_cast(s3p + x)); 591 | __m128i s4 = _mm_load_si128(reinterpret_cast(s4p + x)); 592 | 593 | auto min = _mm_min_epu8(_mm_min_epu8(_mm_min_epu8(s1, s2), s3), s4); 594 | auto max = _mm_max_epu8(_mm_max_epu8(_mm_max_epu8(s1, s2), s3), s4); 595 | 596 | auto diffmaxmin = _mm_subs_epu8(max, min); 597 | auto cmp = _mm_cmpeq_epi8(_mm_subs_epu8(diffmaxmin, thresh_minus1), zero); 598 | 599 | __m128i dst = _mm_load_si128(reinterpret_cast(dstp + x)); 600 | auto result = _mm_and_si128(cmp, dst); 601 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result); 602 | 603 | /* 604 | if (max4(s1p[x], s2p[x], s3p[x], s4p[x]) - min4(s1p[x], s2p[x], s3p[x], s4p[x]) >= thresh) 605 | dstp[x] = 0; 606 | that is: 607 | if(max-min < thresh) dstp[x] = dstp[x] else 0 (dst=dst&FF 0=dst&00) 608 | */ 609 | } 610 | 611 | s1p += stride; 612 | s2p += stride; 613 | s3p += stride; 614 | s4p += stride; 615 | dstp += stride; 616 | } 617 | } 618 | #endif 619 | 620 | void checkAvgOscCorrelation_c(const uint8_t* s1p, const uint8_t* s2p, const uint8_t* s3p, const uint8_t* s4p, uint8_t* dstp, int stride, int width, int height, int thresh) 621 | { 622 | for (int y = 0; y < height; ++y) 623 | { 624 | for (int x = 0; x < width; ++x) 625 | { 626 | if (max4(s1p[x], s2p[x], s3p[x], s4p[x]) - 627 | min4(s1p[x], s2p[x], s3p[x], s4p[x]) >= thresh) 628 | dstp[x] = 0; 629 | } 630 | 631 | s1p += stride; 632 | s2p += stride; 633 | s3p += stride; 634 | s4p += stride; 635 | dstp += stride; 636 | } 637 | } 638 | 639 | #ifdef INTEL_INTRINSICS 640 | void VerticalBlur3_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height) 641 | { 642 | const uint8_t* srcpp = srcp - stride; 643 | const uint8_t* srcpn = srcp + stride; 644 | 645 | auto zero = _mm_setzero_si128(); 646 | auto two = _mm_set1_epi16(2); 647 | 648 | // top line 649 | for (int x = 0; x < width; x += 16) { 650 | __m128i s1 = _mm_load_si128(reinterpret_cast(srcp + x)); 651 | __m128i s2 = _mm_load_si128(reinterpret_cast(srcpn + x)); 652 | auto avg = _mm_avg_epu8(s1, s2); 653 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), avg); 654 | // dstp[x] = (srcp[x] + srcpn[x] + 1) >> 1; 655 | } 656 | 657 | srcpp += stride; 658 | srcp += stride; 659 | srcpn += stride; 660 | dstp += stride; 661 | 662 | for (int y = 1; y < height - 1; ++y) 663 | { 664 | for (int x = 0; x < width; x += 16) { 665 | __m128i p = _mm_load_si128(reinterpret_cast(srcpp + x)); 666 | __m128i s = _mm_load_si128(reinterpret_cast(srcp + x)); 667 | __m128i n = _mm_load_si128(reinterpret_cast(srcpn + x)); 668 | 669 | auto p_lo = _mm_unpacklo_epi8(p, zero); 670 | auto p_hi = _mm_unpackhi_epi8(p, zero); 671 | auto s_lo = _mm_unpacklo_epi8(s, zero); 672 | auto s_hi = _mm_unpackhi_epi8(s, zero); 673 | auto n_lo = _mm_unpacklo_epi8(n, zero); 674 | auto n_hi = _mm_unpackhi_epi8(n, zero); 675 | auto res_lo = _mm_add_epi16(_mm_add_epi16(p_lo, _mm_slli_epi16(s_lo, 1)), n_lo); 676 | auto res_hi = _mm_add_epi16(_mm_add_epi16(p_hi, _mm_slli_epi16(s_hi, 1)), n_hi); 677 | res_lo = _mm_srli_epi16(_mm_add_epi16(res_lo, two), 2); 678 | res_hi = _mm_srli_epi16(_mm_add_epi16(res_hi, two), 2); 679 | auto result = _mm_packus_epi16(res_lo, res_hi); 680 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result); 681 | // dstp[x] = (srcpp[x] + (srcp[x] << 1) + srcpn[x] + 2) >> 2; 682 | } 683 | 684 | srcpp += stride; 685 | srcp += stride; 686 | srcpn += stride; 687 | dstp += stride; 688 | } 689 | 690 | // bottom 691 | for (int x = 0; x < width; x += 16) { 692 | __m128i s1 = _mm_load_si128(reinterpret_cast(srcpp + x)); 693 | __m128i s2 = _mm_load_si128(reinterpret_cast(srcp + x)); 694 | auto avg = _mm_avg_epu8(s1, s2); 695 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), avg); 696 | //dstp[x] = (srcpp[x] + srcp[x] + 1) >> 1; 697 | } 698 | 699 | } 700 | #endif 701 | 702 | void VerticalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height) 703 | { 704 | const uint8_t* srcpp = srcp - stride; 705 | const uint8_t* srcpn = srcp + stride; 706 | 707 | for (int x = 0; x < width; ++x) 708 | dstp[x] = (srcp[x] + srcpn[x] + 1) >> 1; 709 | 710 | srcpp += stride; 711 | srcp += stride; 712 | srcpn += stride; 713 | dstp += stride; 714 | 715 | for (int y = 1; y < height - 1; ++y) 716 | { 717 | for (int x = 0; x < width; ++x) 718 | dstp[x] = (srcpp[x] + (srcp[x] << 1) + srcpn[x] + 2) >> 2; 719 | 720 | srcpp += stride; 721 | srcp += stride; 722 | srcpn += stride; 723 | dstp += stride; 724 | } 725 | 726 | for (int x = 0; x < width; ++x) 727 | dstp[x] = (srcpp[x] + srcp[x] + 1) >> 1; 728 | } 729 | 730 | #ifdef INTEL_INTRINSICS 731 | // width mod 16 and srcp alignment guaranteed 732 | void HorizontalBlur3_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height) 733 | { 734 | auto zero = _mm_setzero_si128(); 735 | auto two = _mm_set1_epi16(2); 736 | 737 | for (int y = 0; y < height; ++y) 738 | { 739 | for (int x = 0; x < width; x += 16) 740 | { 741 | __m128i p = _mm_loadu_si128(reinterpret_cast(srcp + x - 1)); 742 | __m128i s = _mm_load_si128(reinterpret_cast(srcp + x)); 743 | __m128i n = _mm_loadu_si128(reinterpret_cast(srcp + x + 1)); 744 | 745 | auto p_lo = _mm_unpacklo_epi8(p, zero); 746 | auto p_hi = _mm_unpackhi_epi8(p, zero); 747 | auto s_lo = _mm_unpacklo_epi8(s, zero); 748 | auto s_hi = _mm_unpackhi_epi8(s, zero); 749 | auto n_lo = _mm_unpacklo_epi8(n, zero); 750 | auto n_hi = _mm_unpackhi_epi8(n, zero); 751 | auto res_lo = _mm_add_epi16(_mm_add_epi16(p_lo, _mm_slli_epi16(s_lo, 1)), n_lo); 752 | auto res_hi = _mm_add_epi16(_mm_add_epi16(p_hi, _mm_slli_epi16(s_hi, 1)), n_hi); 753 | res_lo = _mm_srli_epi16(_mm_add_epi16(res_lo, two), 2); 754 | res_hi = _mm_srli_epi16(_mm_add_epi16(res_hi, two), 2); 755 | auto result = _mm_packus_epi16(res_lo, res_hi); 756 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result); 757 | // dstp[x] = (srcp[x - 1] + (srcp[x] << 1) + srcp[x + 1] + 2) >> 2; 758 | } 759 | 760 | srcp += stride; 761 | dstp += stride; 762 | } 763 | 764 | } 765 | #endif 766 | 767 | void HorizontalBlur3_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height) 768 | { 769 | for (int y = 0; y < height; ++y) 770 | { 771 | dstp[0] = (srcp[0] + srcp[1] + 1) >> 1; 772 | 773 | for (int x = 1; x < width - 1; ++x) 774 | dstp[x] = (srcp[x - 1] + (srcp[x] << 1) + srcp[x + 1] + 2) >> 2; 775 | 776 | dstp[width - 1] = (srcp[width - 2] + srcp[width - 1] + 1) >> 1; 777 | 778 | srcp += stride; 779 | dstp += stride; 780 | } 781 | } 782 | 783 | #ifdef INTEL_INTRINSICS 784 | void HorizontalBlur6_SSE2_simd(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height) 785 | { 786 | auto zero = _mm_setzero_si128(); 787 | auto eight = _mm_set1_epi16(8); 788 | auto six = _mm_set1_epi16(6); 789 | 790 | for (int y = 0; y < height; y++) 791 | { 792 | for (int x = 0; x < width; x += 16) { 793 | __m128i pp = _mm_loadu_si128(reinterpret_cast(srcp + x - 2)); 794 | __m128i p = _mm_loadu_si128(reinterpret_cast(srcp + x - 1)); 795 | __m128i s = _mm_load_si128(reinterpret_cast(srcp + x)); 796 | __m128i n = _mm_loadu_si128(reinterpret_cast(srcp + x + 1)); 797 | __m128i nn = _mm_loadu_si128(reinterpret_cast(srcp + x + 2)); 798 | 799 | auto pp_lo = _mm_unpacklo_epi8(pp, zero); 800 | auto pp_hi = _mm_unpackhi_epi8(pp, zero); 801 | auto p_lo = _mm_unpacklo_epi8(p, zero); 802 | auto p_hi = _mm_unpackhi_epi8(p, zero); 803 | auto s_lo = _mm_unpacklo_epi8(s, zero); 804 | auto s_hi = _mm_unpackhi_epi8(s, zero); 805 | auto n_lo = _mm_unpacklo_epi8(n, zero); 806 | auto n_hi = _mm_unpackhi_epi8(n, zero); 807 | auto nn_lo = _mm_unpacklo_epi8(nn, zero); 808 | auto nn_hi = _mm_unpackhi_epi8(nn, zero); 809 | 810 | auto centermulsix_lo = _mm_mullo_epi16(s_lo, six); 811 | auto centermulsix_hi = _mm_mullo_epi16(s_hi, six); 812 | auto res_lo = _mm_add_epi16(centermulsix_lo, _mm_add_epi16(_mm_add_epi16(pp_lo, _mm_slli_epi16(_mm_add_epi16(p_lo, n_lo), 2)), nn_lo)); 813 | auto res_hi = _mm_add_epi16(centermulsix_hi, _mm_add_epi16(_mm_add_epi16(pp_hi, _mm_slli_epi16(_mm_add_epi16(p_hi, n_hi), 2)), nn_hi)); 814 | 815 | res_lo = _mm_srli_epi16(_mm_add_epi16(res_lo, eight), 4); 816 | res_hi = _mm_srli_epi16(_mm_add_epi16(res_hi, eight), 4); 817 | auto result = _mm_packus_epi16(res_lo, res_hi); 818 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + x), result); 819 | // dstp[x] = (srcp[x - 2] + ((srcp[x - 1] + srcp[x + 1]) << 2) + srcp[x] * 6 + srcp[x + 2] + 8) >> 4; 820 | } 821 | 822 | srcp += stride; 823 | dstp += stride; 824 | } 825 | } 826 | #endif 827 | 828 | void HorizontalBlur6_c(const uint8_t* srcp, uint8_t* dstp, int stride, int width, int height) 829 | { 830 | for (int y = 0; y < height; y++) 831 | { 832 | dstp[0] = (srcp[0] * 6 + (srcp[1] << 3) + (srcp[2] << 1) + 8) >> 4; 833 | dstp[1] = (((srcp[0] + srcp[2]) << 2) + srcp[1] * 6 + (srcp[3] << 1) + 8) >> 4; 834 | 835 | for (int x = 2; x < width - 2; ++x) 836 | dstp[x] = (srcp[x - 2] + ((srcp[x - 1] + srcp[x + 1]) << 2) + srcp[x] * 6 + srcp[x + 2] + 8) >> 4; 837 | 838 | dstp[width - 2] = ((srcp[width - 4] << 1) + ((srcp[width - 3] + srcp[width - 1]) << 2) + srcp[width - 2] * 6 + 8) >> 4; 839 | dstp[width - 1] = ((srcp[width - 3] << 1) + (srcp[width - 2] << 3) + srcp[width - 1] * 6 + 8) >> 4; 840 | 841 | srcp += stride; 842 | dstp += stride; 843 | } 844 | } 845 | 846 | #ifdef INTEL_INTRINSICS 847 | void andNeighborsInPlace_SSE2_simd(uint8_t* srcp, int stride, int width, int height) 848 | { 849 | uint8_t* srcpp = srcp - stride; 850 | uint8_t* srcpn = srcp + stride; 851 | 852 | for (int y = 0; y < height; y++) 853 | { 854 | for (int x = 0; x < width; x += 16) { 855 | __m128i src_0 = _mm_load_si128(reinterpret_cast(srcp + x)); 856 | __m128i src_p_m1 = _mm_loadu_si128(reinterpret_cast(srcpp + x - 1)); 857 | __m128i src_p = _mm_loadu_si128(reinterpret_cast(srcpp + x)); 858 | __m128i src_p_p1 = _mm_loadu_si128(reinterpret_cast(srcpp + x + 1)); 859 | __m128i src_n_m1 = _mm_loadu_si128(reinterpret_cast(srcpn + x - 1)); 860 | __m128i src_n = _mm_loadu_si128(reinterpret_cast(srcpn + x)); 861 | __m128i src_n_p1 = _mm_loadu_si128(reinterpret_cast(srcpn + x + 1)); 862 | auto result_p = _mm_or_si128(_mm_or_si128(src_p_m1, src_p), src_p_p1); 863 | auto result_n = _mm_or_si128(_mm_or_si128(src_n_m1, src_n), src_n_p1); 864 | auto result = _mm_and_si128(src_0, _mm_or_si128(result_p, result_n)); 865 | _mm_store_si128(reinterpret_cast<__m128i*>(srcp + x), result); 866 | // srcp[x] &= (srcpp[x - 1] | srcpp[x] | srcpp[x + 1] | srcpn[x - 1] | srcpn[x] | srcpn[x + 1]); 867 | } 868 | 869 | srcpp += stride; 870 | srcp += stride; 871 | srcpn += stride; 872 | } 873 | } 874 | #endif 875 | -------------------------------------------------------------------------------- /TComb/avs/alignment.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_ALIGNMENT_H 34 | #define AVS_ALIGNMENT_H 35 | 36 | // Functions and macros to help work with alignment requirements. 37 | 38 | // Tells if a number is a power of two. 39 | #define IS_POWER2(n) ((n) && !((n) & ((n) - 1))) 40 | 41 | // Tells if the pointer "ptr" is aligned to "align" bytes. 42 | #define IS_PTR_ALIGNED(ptr, align) (((uintptr_t)ptr & ((uintptr_t)(align-1))) == 0) 43 | 44 | // Rounds up the number "n" to the next greater multiple of "align" 45 | #define ALIGN_NUMBER(n, align) (((n) + (align)-1) & (~((align)-1))) 46 | 47 | // Rounds up the pointer address "ptr" to the next greater multiple of "align" 48 | #define ALIGN_POINTER(ptr, align) (((uintptr_t)(ptr) + (align)-1) & (~(uintptr_t)((align)-1))) 49 | 50 | #ifdef __cplusplus 51 | 52 | #include 53 | #include 54 | #include 55 | #include "config.h" 56 | 57 | #if defined(MSVC) && _MSC_VER<1400 58 | // needed for VS2013, otherwise C++11 'alignas' works 59 | #define avs_alignas(x) __declspec(align(x)) 60 | #else 61 | // assumes C++11 support 62 | #define avs_alignas(x) alignas(x) 63 | #endif 64 | 65 | template 66 | static bool IsPtrAligned(T* ptr, size_t align) 67 | { 68 | assert(IS_POWER2(align)); 69 | return (bool)IS_PTR_ALIGNED(ptr, align); 70 | } 71 | 72 | template 73 | static T AlignNumber(T n, T align) 74 | { 75 | assert(IS_POWER2(align)); 76 | return ALIGN_NUMBER(n, align); 77 | } 78 | 79 | template 80 | static T* AlignPointer(T* ptr, size_t align) 81 | { 82 | assert(IS_POWER2(align)); 83 | return (T*)ALIGN_POINTER(ptr, align); 84 | } 85 | 86 | extern "C" 87 | { 88 | #else 89 | #include 90 | #endif // __cplusplus 91 | 92 | // Returns a new buffer that is at least the size "nbytes". 93 | // The buffer will be aligned to "align" bytes. 94 | // Returns NULL on error. On successful allocation, 95 | // the returned buffer must be freed using "avs_free". 96 | inline void* avs_malloc(size_t nbytes, size_t align) 97 | { 98 | if (!IS_POWER2(align)) 99 | return NULL; 100 | 101 | size_t offset = sizeof(void*) + align - 1; 102 | 103 | void *orig = malloc(nbytes + offset); 104 | if (orig == NULL) 105 | return NULL; 106 | 107 | void **aligned = (void**)(((uintptr_t)orig + (uintptr_t)offset) & (~(uintptr_t)(align-1))); 108 | aligned[-1] = orig; 109 | return aligned; 110 | } 111 | 112 | // Buffers allocated using "avs_malloc" must be freed 113 | // using "avs_free" instead of "free". 114 | inline void avs_free(void *ptr) 115 | { 116 | // Mirroring free()'s semantic requires us to accept NULLs 117 | if (ptr == NULL) 118 | return; 119 | 120 | free(((void**)ptr)[-1]); 121 | } 122 | 123 | #ifdef __cplusplus 124 | } // extern "C" 125 | 126 | // The point of these undef's is to force using the template functions 127 | // if we are in C++ mode. For C, the user can rely only on the macros. 128 | #undef IS_PTR_ALIGNED 129 | #undef ALIGN_NUMBER 130 | #undef ALIGN_POINTER 131 | 132 | #endif // __cplusplus 133 | 134 | #endif //AVS_ALIGNMENT_H 135 | -------------------------------------------------------------------------------- /TComb/avs/capi.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_CAPI_H 34 | #define AVS_CAPI_H 35 | 36 | #include "config.h" 37 | 38 | #ifdef AVS_POSIX 39 | // this is also defined in avs/posix.h 40 | #ifndef AVS_HAIKU 41 | #define __declspec(x) 42 | #endif 43 | #endif 44 | 45 | #ifdef __cplusplus 46 | # define EXTERN_C extern "C" 47 | #else 48 | # define EXTERN_C 49 | #endif 50 | 51 | #ifdef AVS_WINDOWS 52 | #ifdef BUILDING_AVSCORE 53 | # if defined(GCC) && defined(X86_32) 54 | # define AVSC_CC 55 | # else // MSVC builds and 64-bit GCC 56 | # ifndef AVSC_USE_STDCALL 57 | # define AVSC_CC __cdecl 58 | # else 59 | # define AVSC_CC __stdcall 60 | # endif 61 | # endif 62 | #else // needed for programs that talk to AviSynth+ 63 | # ifndef AVSC_WIN32_GCC32 // see comment below 64 | # ifndef AVSC_USE_STDCALL 65 | # define AVSC_CC __cdecl 66 | # else 67 | # define AVSC_CC __stdcall 68 | # endif 69 | # else 70 | # define AVSC_CC 71 | # endif 72 | #endif 73 | # else 74 | # define AVSC_CC 75 | #endif 76 | 77 | // On 64-bit Windows, there's only one calling convention, 78 | // so there is no difference between MSVC and GCC. On 32-bit, 79 | // this isn't true. The convention that GCC needs to use to 80 | // even build AviSynth+ as 32-bit makes anything that uses 81 | // it incompatible with 32-bit MSVC builds of AviSynth+. 82 | // The AVSC_WIN32_GCC32 define is meant to provide a user 83 | // switchable way to make builds of FFmpeg to test 32-bit 84 | // GCC builds of AviSynth+ without having to screw around 85 | // with alternate headers, while still default to the usual 86 | // situation of using 32-bit MSVC builds of AviSynth+. 87 | 88 | // Hopefully, this situation will eventually be resolved 89 | // and a broadly compatible solution will arise so the 90 | // same 32-bit FFmpeg build can handle either MSVC or GCC 91 | // builds of AviSynth+. 92 | 93 | #define AVSC_INLINE static __inline 94 | 95 | #ifdef BUILDING_AVSCORE 96 | #ifdef AVS_WINDOWS 97 | # define AVSC_EXPORT __declspec(dllexport) 98 | # define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name 99 | #else 100 | # define AVSC_EXPORT EXTERN_C 101 | # define AVSC_API(ret, name) EXTERN_C ret AVSC_CC name 102 | #endif 103 | #else 104 | # define AVSC_EXPORT EXTERN_C __declspec(dllexport) 105 | # ifndef AVSC_NO_DECLSPEC 106 | # define AVSC_API(ret, name) EXTERN_C __declspec(dllimport) ret AVSC_CC name 107 | # else 108 | # define AVSC_API(ret, name) typedef ret (AVSC_CC *name##_func) 109 | # endif 110 | #endif 111 | 112 | #endif //AVS_CAPI_H 113 | -------------------------------------------------------------------------------- /TComb/avs/config.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_CONFIG_H 34 | #define AVS_CONFIG_H 35 | 36 | // Undefine this to get cdecl calling convention 37 | #define AVSC_USE_STDCALL 1 38 | 39 | // NOTE TO PLUGIN AUTHORS: 40 | // Because FRAME_ALIGN can be substantially higher than the alignment 41 | // a plugin actually needs, plugins should not use FRAME_ALIGN to check for 42 | // alignment. They should always request the exact alignment value they need. 43 | // This is to make sure that plugins work over the widest range of AviSynth 44 | // builds possible. 45 | #define FRAME_ALIGN 64 46 | 47 | #if defined(_M_AMD64) || defined(__x86_64) 48 | # define X86_64 49 | #elif defined(_M_IX86) || defined(__i386__) 50 | # define X86_32 51 | // VS2017 introduced _M_ARM64 52 | #elif defined(_M_ARM64) || defined(__aarch64__) 53 | # define ARM64 54 | #elif defined(_M_ARM) || defined(__arm__) 55 | # define ARM32 56 | #elif defined(__PPC64__) 57 | # define PPC64 58 | #elif defined(_M_PPC) || defined(__PPC__) || defined(__POWERPC__) 59 | # define PPC32 60 | #else 61 | # error Unsupported CPU architecture. 62 | #endif 63 | 64 | // VC++ LLVM-Clang-cl MinGW-Gnu 65 | // MSVC x x 66 | // MSVC_PURE x 67 | // CLANG x 68 | // GCC x 69 | 70 | #if defined(__clang__) 71 | // Check clang first. clang-cl also defines __MSC_VER 72 | // We set MSVC because they are mostly compatible 73 | # define CLANG 74 | #if defined(_MSC_VER) 75 | # define MSVC 76 | # define AVS_FORCEINLINE __attribute__((always_inline)) 77 | #else 78 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline 79 | #endif 80 | #elif defined(_MSC_VER) 81 | # define MSVC 82 | # define MSVC_PURE 83 | # define AVS_FORCEINLINE __forceinline 84 | #elif defined(__GNUC__) 85 | # define GCC 86 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline 87 | #else 88 | # error Unsupported compiler. 89 | # define AVS_FORCEINLINE inline 90 | # undef __forceinline 91 | # define __forceinline inline 92 | #endif 93 | 94 | #if defined(_WIN32) 95 | # define AVS_WINDOWS 96 | #elif defined(__linux__) 97 | # define AVS_LINUX 98 | # define AVS_POSIX 99 | #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) 100 | # define AVS_BSD 101 | # define AVS_POSIX 102 | #elif defined(__APPLE__) 103 | # define AVS_MACOS 104 | # define AVS_POSIX 105 | #elif defined(__HAIKU__) 106 | # define AVS_HAIKU 107 | # define AVS_POSIX 108 | #else 109 | # error Operating system unsupported. 110 | #endif 111 | 112 | // useful warnings disabler macros for supported compilers 113 | 114 | #if defined(_MSC_VER) 115 | #define DISABLE_WARNING_PUSH __pragma(warning( push )) 116 | #define DISABLE_WARNING_POP __pragma(warning( pop )) 117 | #define DISABLE_WARNING(warningNumber) __pragma(warning( disable : warningNumber )) 118 | 119 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(4101) 120 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(4505) 121 | // other warnings you want to deactivate... 122 | 123 | #elif defined(__GNUC__) || defined(__clang__) 124 | #define DO_PRAGMA(X) _Pragma(#X) 125 | #define DISABLE_WARNING_PUSH DO_PRAGMA(GCC diagnostic push) 126 | #define DISABLE_WARNING_POP DO_PRAGMA(GCC diagnostic pop) 127 | #define DISABLE_WARNING(warningName) DO_PRAGMA(GCC diagnostic ignored #warningName) 128 | 129 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(-Wunused-variable) 130 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(-Wunused-function) 131 | // other warnings you want to deactivate... 132 | 133 | #else 134 | #define DISABLE_WARNING_PUSH 135 | #define DISABLE_WARNING_POP 136 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE 137 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION 138 | // other warnings you want to deactivate... 139 | 140 | #endif 141 | 142 | #if defined(AVS_POSIX) 143 | #define NEW_AVSVALUE 144 | #else 145 | #define NEW_AVSVALUE 146 | #endif 147 | 148 | #if defined(AVS_WINDOWS) 149 | // Windows XP does not have proper initialization for 150 | // thread local variables. 151 | // Use workaround instead __declspec(thread) 152 | #define XP_TLS 153 | #endif 154 | 155 | #endif //AVS_CONFIG_H 156 | -------------------------------------------------------------------------------- /TComb/avs/cpuid.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_CPUID_H 33 | #define AVSCORE_CPUID_H 34 | 35 | // For GetCPUFlags. These are backwards-compatible with those in VirtualDub. 36 | // ending with SSE4_2 37 | // For emulation see https://software.intel.com/en-us/articles/intel-software-development-emulator 38 | enum { 39 | /* oldest CPU to support extension */ 40 | CPUF_FORCE = 0x01, // N/A 41 | CPUF_FPU = 0x02, // 386/486DX 42 | CPUF_MMX = 0x04, // P55C, K6, PII 43 | CPUF_INTEGER_SSE = 0x08, // PIII, Athlon 44 | CPUF_SSE = 0x10, // PIII, Athlon XP/MP 45 | CPUF_SSE2 = 0x20, // PIV, K8 46 | CPUF_3DNOW = 0x40, // K6-2 47 | CPUF_3DNOW_EXT = 0x80, // Athlon 48 | CPUF_X86_64 = 0xA0, // Hammer (note: equiv. to 3DNow + SSE2, which 49 | // only Hammer will have anyway) 50 | CPUF_SSE3 = 0x100, // PIV+, K8 Venice 51 | CPUF_SSSE3 = 0x200, // Core 2 52 | CPUF_SSE4 = 0x400, 53 | CPUF_SSE4_1 = 0x400, // Penryn, Wolfdale, Yorkfield 54 | CPUF_AVX = 0x800, // Sandy Bridge, Bulldozer 55 | CPUF_SSE4_2 = 0x1000, // Nehalem 56 | // AVS+ 57 | CPUF_AVX2 = 0x2000, // Haswell 58 | CPUF_FMA3 = 0x4000, 59 | CPUF_F16C = 0x8000, 60 | CPUF_MOVBE = 0x10000, // Big Endian move 61 | CPUF_POPCNT = 0x20000, 62 | CPUF_AES = 0x40000, 63 | CPUF_FMA4 = 0x80000, 64 | 65 | CPUF_AVX512F = 0x100000, // AVX-512 Foundation. 66 | CPUF_AVX512DQ = 0x200000, // AVX-512 DQ (Double/Quad granular) Instructions 67 | CPUF_AVX512PF = 0x400000, // AVX-512 Prefetch 68 | CPUF_AVX512ER = 0x800000, // AVX-512 Exponential and Reciprocal 69 | CPUF_AVX512CD = 0x1000000, // AVX-512 Conflict Detection 70 | CPUF_AVX512BW = 0x2000000, // AVX-512 BW (Byte/Word granular) Instructions 71 | CPUF_AVX512VL = 0x4000000, // AVX-512 VL (128/256 Vector Length) Extensions 72 | CPUF_AVX512IFMA = 0x8000000, // AVX-512 IFMA integer 52 bit 73 | CPUF_AVX512VBMI = 0x10000000,// AVX-512 VBMI 74 | }; 75 | 76 | #ifdef BUILDING_AVSCORE 77 | int GetCPUFlags(); 78 | void SetMaxCPU(int new_flags); 79 | #endif 80 | 81 | #endif // AVSCORE_CPUID_H 82 | -------------------------------------------------------------------------------- /TComb/avs/filesystem.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Snippet copied from filesystem/README.md 4 | 5 | #if defined(__cplusplus) && __cplusplus >= 201703L && defined(__has_include) 6 | #if __has_include() 7 | #define GHC_USE_STD_FS 8 | #include 9 | namespace fs = std::filesystem; 10 | #endif 11 | #endif 12 | #ifndef GHC_USE_STD_FS 13 | #include 14 | namespace fs = ghc::filesystem; 15 | #endif 16 | -------------------------------------------------------------------------------- /TComb/avs/minmax.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_MINMAX_H 33 | #define AVSCORE_MINMAX_H 34 | 35 | template 36 | T min(T v1, T v2) 37 | { 38 | return v1 < v2 ? v1 : v2; 39 | } 40 | 41 | template 42 | T max(T v1, T v2) 43 | { 44 | return v1 > v2 ? v1 : v2; 45 | } 46 | 47 | template 48 | T clamp(T n, T min, T max) 49 | { 50 | n = n > max ? max : n; 51 | return n < min ? min : n; 52 | } 53 | 54 | #endif // AVSCORE_MINMAX_H 55 | -------------------------------------------------------------------------------- /TComb/avs/posix.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifdef AVS_POSIX 33 | #ifndef AVSCORE_POSIX_H 34 | #define AVSCORE_POSIX_H 35 | 36 | #ifdef __cplusplus 37 | #include 38 | #endif 39 | #include 40 | #include 41 | 42 | // Define these MSVC-extension used in Avisynth 43 | #define __single_inheritance 44 | 45 | // These things don't exist in Linux 46 | #if defined(AVS_HAIKU) 47 | #undef __declspec 48 | #endif 49 | #define __declspec(x) 50 | #define lstrlen strlen 51 | #define lstrcmp strcmp 52 | #define lstrcmpi strcasecmp 53 | #define _stricmp strcasecmp 54 | #define _strnicmp strncasecmp 55 | #define _strdup strdup 56 | #define SetCurrentDirectory(x) chdir(x) 57 | #define SetCurrentDirectoryW(x) chdir(x) 58 | #define GetCurrentDirectoryW(x) getcwd(x) 59 | #define _putenv putenv 60 | #define _alloca alloca 61 | 62 | // Borrowing some compatibility macros from AvxSynth, slightly modified 63 | #define UInt32x32To64(a, b) ((uint64_t)(((uint64_t)((uint32_t)(a))) * ((uint32_t)(b)))) 64 | #define Int64ShrlMod32(a, b) ((uint64_t)((uint64_t)(a) >> (b))) 65 | #define Int32x32To64(a, b) ((int64_t)(((int64_t)((long)(a))) * ((long)(b)))) 66 | 67 | #define InterlockedIncrement(x) __sync_add_and_fetch((x), 1) 68 | #define InterlockedDecrement(x) __sync_sub_and_fetch((x), 1) 69 | #define MulDiv(nNumber, nNumerator, nDenominator) (int32_t) (((int64_t) (nNumber) * (int64_t) (nNumerator) + (int64_t) ((nDenominator)/2)) / (int64_t) (nDenominator)) 70 | 71 | #ifndef TRUE 72 | #define TRUE true 73 | #endif 74 | 75 | #ifndef FALSE 76 | #define FALSE false 77 | #endif 78 | 79 | #define S_FALSE (0x00000001) 80 | #define E_FAIL (0x80004005) 81 | #define FAILED(hr) ((hr) & 0x80000000) 82 | #define SUCCEEDED(hr) (!FAILED(hr)) 83 | 84 | // Statuses copied from comments in exception.cpp 85 | #define STATUS_GUARD_PAGE_VIOLATION 0x80000001 86 | #define STATUS_DATATYPE_MISALIGNMENT 0x80000002 87 | #define STATUS_BREAKPOINT 0x80000003 88 | #define STATUS_SINGLE_STEP 0x80000004 89 | #define STATUS_ACCESS_VIOLATION 0xc0000005 90 | #define STATUS_IN_PAGE_ERROR 0xc0000006 91 | #define STATUS_INVALID_HANDLE 0xc0000008 92 | #define STATUS_NO_MEMORY 0xc0000017 93 | #define STATUS_ILLEGAL_INSTRUCTION 0xc000001d 94 | #define STATUS_NONCONTINUABLE_EXCEPTION 0xc0000025 95 | #define STATUS_INVALID_DISPOSITION 0xc0000026 96 | #define STATUS_ARRAY_BOUNDS_EXCEEDED 0xc000008c 97 | #define STATUS_FLOAT_DENORMAL_OPERAND 0xc000008d 98 | #define STATUS_FLOAT_DIVIDE_BY_ZERO 0xc000008e 99 | #define STATUS_FLOAT_INEXACT_RESULT 0xc000008f 100 | #define STATUS_FLOAT_INVALID_OPERATION 0xc0000090 101 | #define STATUS_FLOAT_OVERFLOW 0xc0000091 102 | #define STATUS_FLOAT_STACK_CHECK 0xc0000092 103 | #define STATUS_FLOAT_UNDERFLOW 0xc0000093 104 | #define STATUS_INTEGER_DIVIDE_BY_ZERO 0xc0000094 105 | #define STATUS_INTEGER_OVERFLOW 0xc0000095 106 | #define STATUS_PRIVILEGED_INSTRUCTION 0xc0000096 107 | #define STATUS_STACK_OVERFLOW 0xc00000fd 108 | 109 | // Calling convension 110 | #ifndef AVS_HAIKU 111 | #define __stdcall 112 | #define __cdecl 113 | #endif 114 | 115 | // PowerPC OS X is really niche these days, but this painless equivocation 116 | // of the function/macro names used in posix_get_available_memory() 117 | // is all it takes to let it work. The G5 was 64-bit, and if 10.5 Leopard 118 | // can run in native 64-bit, it probably uses the names in that block as-is. 119 | #ifdef AVS_MACOS 120 | #ifdef PPC32 121 | #define vm_statistics64_data_t vm_statistics_data_t 122 | #define HOST_VM_INFO64_COUNT HOST_VM_INFO_COUNT 123 | #define HOST_VM_INFO64 HOST_VM_INFO 124 | #define host_statistics64 host_statistics 125 | #endif // PPC32 126 | #endif // AVS_MACOS 127 | 128 | #endif // AVSCORE_POSIX_H 129 | #endif // AVS_POSIX 130 | -------------------------------------------------------------------------------- /TComb/avs/types.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_TYPES_H 34 | #define AVS_TYPES_H 35 | 36 | // Define all types necessary for interfacing with avisynth.dll 37 | #include 38 | #include 39 | #ifdef __cplusplus 40 | #include 41 | #include 42 | #else 43 | #include 44 | #include 45 | #endif 46 | 47 | // Raster types used by VirtualDub & Avisynth 48 | typedef uint32_t Pixel32; 49 | typedef uint8_t BYTE; 50 | 51 | // Audio Sample information 52 | typedef float SFLOAT; 53 | 54 | #endif //AVS_TYPES_H 55 | -------------------------------------------------------------------------------- /TComb/avs/win.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_WIN_H 33 | #define AVSCORE_WIN_H 34 | 35 | // Whenever you need windows headers, start by including this file, then the rest. 36 | 37 | // WWUUT? We require XP now? 38 | #if !defined(NTDDI_VERSION) && !defined(_WIN32_WINNT) 39 | #define NTDDI_VERSION 0x05020000 40 | #define _WIN32_WINNT 0x0502 41 | #endif 42 | 43 | #define WIN32_LEAN_AND_MEAN 44 | #define STRICT 45 | #if !defined(NOMINMAX) 46 | #define NOMINMAX 47 | #endif 48 | 49 | #include 50 | 51 | // Provision for UTF-8 max 4 bytes per code point 52 | #define AVS_MAX_PATH MAX_PATH*4 53 | 54 | #endif // AVSCORE_WIN_H 55 | -------------------------------------------------------------------------------- /TComb/common.h: -------------------------------------------------------------------------------- 1 | #ifndef __COMMON_H__ 2 | #define __COMMON_H__ 3 | 4 | #include "avisynth.h" 5 | #include 6 | 7 | #if defined(__clang__) 8 | // Check clang first. clang-cl also defines __MSC_VER 9 | // We set MSVC because they are mostly compatible 10 | # define CLANG 11 | #if defined(_MSC_VER) 12 | # define MSVC 13 | # define TC_FORCEINLINE __attribute__((always_inline)) 14 | #else 15 | # define TC_FORCEINLINE __attribute__((always_inline)) inline 16 | #endif 17 | #elif defined(_MSC_VER) 18 | # define MSVC 19 | # define MSVC_PURE 20 | # define TC_FORCEINLINE __forceinline 21 | #elif defined(__GNUC__) 22 | # define GCC 23 | # define TC_FORCEINLINE __attribute__((always_inline)) inline 24 | #else 25 | # error Unsupported compiler. 26 | # define TC_FORCEINLINE inline 27 | # undef __forceinline 28 | # define __forceinline inline 29 | #endif 30 | 31 | 32 | #ifndef _WIN32 33 | #define OutputDebugString(x) 34 | #endif 35 | 36 | #if (defined(GCC) || defined(CLANG)) && !defined(_WIN32) 37 | #include 38 | #define _aligned_malloc(size, alignment) aligned_alloc(alignment, size) 39 | #define _aligned_free(ptr) free(ptr) 40 | #endif 41 | 42 | #ifndef _WIN32 43 | #include 44 | #ifdef AVS_POSIX 45 | #ifndef _POSIX_C_SOURCE 46 | #define _POSIX_C_SOURCE 1 47 | #endif 48 | #include 49 | #endif 50 | #endif 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /TComb/resource.h: -------------------------------------------------------------------------------- 1 | //{{NO_DEPENDENCIES}} 2 | // Microsoft Visual C++ generated include file. 3 | // Used by TComb.rc 4 | 5 | // Next default values for new objects 6 | // 7 | #ifdef APSTUDIO_INVOKED 8 | #ifndef APSTUDIO_READONLY_SYMBOLS 9 | #define _APS_NEXT_RESOURCE_VALUE 101 10 | #define _APS_NEXT_COMMAND_VALUE 40001 11 | #define _APS_NEXT_CONTROL_VALUE 1001 12 | #define _APS_NEXT_SYMED_VALUE 101 13 | #endif 14 | #endif 15 | -------------------------------------------------------------------------------- /cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 2 | message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 3 | endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 4 | 5 | file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) 6 | string(REGEX REPLACE "\n" ";" files "${files}") 7 | foreach(file ${files}) 8 | message(STATUS "Uninstalling $ENV{DESTDIR}${file}") 9 | if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 10 | exec_program( 11 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 12 | OUTPUT_VARIABLE rm_out 13 | RETURN_VALUE rm_retval 14 | ) 15 | if(NOT "${rm_retval}" STREQUAL 0) 16 | message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") 17 | endif(NOT "${rm_retval}" STREQUAL 0) 18 | else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 19 | message(STATUS "File $ENV{DESTDIR}${file} does not exist.") 20 | endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 21 | endforeach(file) 22 | --------------------------------------------------------------------------------