├── .gitignore ├── src ├── version.hpp.in ├── utils.h ├── impl_dispatch.h ├── random.h ├── flash3kyuu_deband_impl_avx2.cpp ├── flash3kyuu_deband_impl_avx512.cpp ├── process_plane_context.h ├── flash3kyuu_deband_impl_sse4.cpp ├── process_plane_context.cpp ├── constants.h ├── pixel_proc_c_high_no_dithering.h ├── neo_f3kdb.hpp ├── sse_utils.h ├── version.rc.in ├── pixel_proc_c_high_bit_depth_common.h ├── compiler_compat.h ├── pixel_proc_c_16bit.h ├── bit_utils.h ├── f3kdb.h ├── impl_dispatch.cpp ├── pixel_proc_c.h ├── random.cpp ├── pixel_proc_c_high_ordered_dithering.h ├── core.h ├── impl_dispatch_decl.h ├── pixel_proc_c_high_f_s_dithering.h ├── cpuid.cpp ├── dither_high.h ├── f3kdb.hpp └── core.cpp ├── debian ├── changelog ├── rules ├── control └── copyright ├── main.cpp ├── include ├── avisynth │ └── avs │ │ ├── filesystem.h │ │ ├── minmax.h │ │ ├── types.h │ │ ├── win.h │ │ ├── cpuid.h │ │ ├── capi.h │ │ ├── alignment.h │ │ ├── posix.h │ │ └── config.h ├── dualsynth │ ├── ds_videoinfo.hpp │ ├── ds_common.hpp │ ├── ds_filter.hpp │ ├── ds_format.hpp │ ├── avs_wrapper.hpp │ ├── vs_wrapper.hpp │ └── ds_frame.hpp └── vapoursynth │ ├── VSScript.h │ └── VSHelper.h ├── .github └── workflows │ └── windows.yml ├── VCL2 ├── vectorclass.h ├── instrset_detect.cpp ├── LICENSE └── vectormath_common.h ├── README.md └── CMakeLists.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.user 2 | *.dyn 3 | *.obj 4 | *.sublime-* 5 | build 6 | *.DS_Store 7 | -------------------------------------------------------------------------------- /src/version.hpp.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define PLUGIN_VERSION "@PROJECT_VERSION_STRING_FULL@" -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | neo-f3kdb (0.07) unstable; urgency=medium 2 | 3 | * Initial release 4 | 5 | -- Xinyue Lu Fri, 01 Jan 2021 00:00:00 -0500 6 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "src/neo_f3kdb.hpp" 6 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | 2 | static inline int clamp_pixel(int pixel, int min, int max) 3 | { 4 | if (pixel > max) { 5 | pixel = max; 6 | } else if (pixel < min) { 7 | pixel = min; 8 | } 9 | return pixel; 10 | } 11 | 12 | -------------------------------------------------------------------------------- /src/impl_dispatch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "core.h" 4 | 5 | extern const process_plane_impl_t** process_plane_impls[]; 6 | 7 | #define DITHER_CONTEXT_BUFFER_SIZE 8192 8 | 9 | #define CONTEXT_BUFFER_SIZE DITHER_CONTEXT_BUFFER_SIZE 10 | -------------------------------------------------------------------------------- /src/random.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "constants.h" 4 | #include "f3kdb.h" 5 | 6 | #define DEFAULT_RANDOM_PARAM 1.0 7 | 8 | // returns a random number in [-range, range] 9 | int random(RANDOM_ALGORITHM algo, int& seed, int range, double param); 10 | -------------------------------------------------------------------------------- /src/flash3kyuu_deband_impl_avx2.cpp: -------------------------------------------------------------------------------- 1 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) 2 | #include 3 | #include "flash3kyuu_deband_avx2_base.h" 4 | 5 | #define DECLARE_IMPL_AVX2 6 | #include "impl_dispatch_decl.h" 7 | #endif 8 | -------------------------------------------------------------------------------- /src/flash3kyuu_deband_impl_avx512.cpp: -------------------------------------------------------------------------------- 1 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) 2 | #include 3 | #include "flash3kyuu_deband_avx512_base.h" 4 | 5 | #define DECLARE_IMPL_AVX512 6 | #include "impl_dispatch_decl.h" 7 | #endif 8 | -------------------------------------------------------------------------------- /src/process_plane_context.h: -------------------------------------------------------------------------------- 1 | 2 | typedef void (*destroy_data_t)(void* data); 3 | 4 | typedef struct _process_plane_context 5 | { 6 | void* data; 7 | destroy_data_t destroy; 8 | } process_plane_context; 9 | 10 | void destroy_context(process_plane_context* context); 11 | 12 | void init_context(process_plane_context* context); -------------------------------------------------------------------------------- /src/flash3kyuu_deband_impl_sse4.cpp: -------------------------------------------------------------------------------- 1 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) 2 | #include 3 | #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) 4 | #include "sse2neon.h" 5 | #endif 6 | #include "flash3kyuu_deband_sse_base.h" 7 | 8 | #define DECLARE_IMPL_SSE4 9 | #include "impl_dispatch_decl.h" 10 | -------------------------------------------------------------------------------- /include/avisynth/avs/filesystem.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Snippet copied from filesystem/README.md 4 | 5 | #if defined(__cplusplus) && __cplusplus >= 201703L && defined(__has_include) 6 | #if __has_include() 7 | #define GHC_USE_STD_FS 8 | #include 9 | namespace fs = std::filesystem; 10 | #endif 11 | #endif 12 | #ifndef GHC_USE_STD_FS 13 | #include 14 | namespace fs = ghc::filesystem; 15 | #endif 16 | -------------------------------------------------------------------------------- /src/process_plane_context.cpp: -------------------------------------------------------------------------------- 1 | #include "process_plane_context.h" 2 | 3 | #include 4 | #include 5 | 6 | void destroy_context(process_plane_context* context) 7 | { 8 | assert(context); 9 | 10 | if (context->data) { 11 | assert(context->destroy); 12 | context->destroy(context->data); 13 | memset(context, 0, sizeof(process_plane_context)); 14 | } 15 | } 16 | 17 | void init_context(process_plane_context* context) 18 | { 19 | assert(context); 20 | memset(context, 0, sizeof(process_plane_context)); 21 | } 22 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | include /usr/share/dpkg/architecture.mk 4 | 5 | %: 6 | dh $@ 7 | 8 | override_dh_auto_install: 9 | install -m 755 -D -t debian/dualsynth-neo-f3kdb-yuuki/usr/lib/$(DEB_HOST_MULTIARCH)/dualsynth obj-$(DEB_HOST_MULTIARCH)/libneo-f3kdb.so 10 | mkdir debian/dualsynth-neo-f3kdb-yuuki/usr/lib/$(DEB_HOST_MULTIARCH)/avisynth 11 | mkdir debian/dualsynth-neo-f3kdb-yuuki/usr/lib/$(DEB_HOST_MULTIARCH)/vapoursynth 12 | ln -s ../dualsynth/libneo-f3kdb.so debian/dualsynth-neo-f3kdb-yuuki/usr/lib/$(DEB_HOST_MULTIARCH)/avisynth/ 13 | ln -s ../dualsynth/libneo-f3kdb.so debian/dualsynth-neo-f3kdb-yuuki/usr/lib/$(DEB_HOST_MULTIARCH)/vapoursynth/ 14 | -------------------------------------------------------------------------------- /src/constants.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // alignment for SSE operations 4 | #define FRAME_LUT_ALIGNMENT 16 5 | 6 | #define INTERNAL_BIT_DEPTH 16 7 | 8 | // these range values are defined in internal bit depth 9 | #define TV_RANGE_Y_MIN (16 << (INTERNAL_BIT_DEPTH - 8)) 10 | #define TV_RANGE_Y_MAX (235 << (INTERNAL_BIT_DEPTH - 8)) 11 | 12 | #define TV_RANGE_C_MIN TV_RANGE_Y_MIN 13 | #define TV_RANGE_C_MAX (240 << (INTERNAL_BIT_DEPTH - 8)) 14 | 15 | #define FULL_RANGE_Y_MIN 0 16 | #define FULL_RANGE_Y_MAX ((1 << INTERNAL_BIT_DEPTH) - 1) 17 | 18 | #define FULL_RANGE_C_MIN FULL_RANGE_Y_MIN 19 | #define FULL_RANGE_C_MAX FULL_RANGE_Y_MAX 20 | 21 | #define VALUE_8BIT(x) ( x >> ( INTERNAL_BIT_DEPTH - 8 ) ) 22 | 23 | -------------------------------------------------------------------------------- /src/pixel_proc_c_high_no_dithering.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | namespace pixel_proc_high_no_dithering { 4 | 5 | static inline void init_context(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth) 6 | { 7 | // nothing to do 8 | } 9 | 10 | static inline void destroy_context(void* context) 11 | { 12 | // nothing to do 13 | } 14 | 15 | static inline void next_pixel(void* context) 16 | { 17 | // nothing to do 18 | } 19 | 20 | static inline void next_row(void* context) 21 | { 22 | // nothing to do 23 | } 24 | 25 | static inline int dither(void* context, int pixel, int row, int column) 26 | { 27 | return pixel; 28 | } 29 | 30 | #include "pixel_proc_c_high_bit_depth_common.h" 31 | 32 | }; -------------------------------------------------------------------------------- /src/neo_f3kdb.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Xinyue Lu 3 | * 4 | * DualSynth bridge - plugin. 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include "version.hpp" 11 | #include "f3kdb.hpp" 12 | 13 | namespace Plugin { 14 | const char* Identifier = "in.7086.neo_f3kdb"; 15 | const char* Namespace = "neo_f3kdb"; 16 | const char* Description = "Neo F3KDB Deband Filter " PLUGIN_VERSION; 17 | } 18 | 19 | std::vector RegisterVSFilters() 20 | { 21 | return std::vector { VSInterface::RegisterFilter }; 22 | } 23 | 24 | std::vector RegisterAVSFilters() 25 | { 26 | return std::vector { AVSInterface::RegisterFilter }; 27 | } 28 | -------------------------------------------------------------------------------- /src/sse_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // See Intel Optimization Guide: Ch. 5.6.6.2 Clipping to an Arbitrary Unsigned Range [High, Low] 4 | // high_add = 0xffff - high 5 | // high_sub = 0xffff - high + low 6 | static __m128i __forceinline high_bit_depth_pixels_clamp(__m128i pixels, __m128i high_add, __m128i high_sub, const __m128i& low) 7 | { 8 | pixels = _mm_adds_epu16(pixels, high_add); 9 | pixels = _mm_subs_epu16(pixels, high_sub); 10 | pixels = _mm_add_epi16(pixels, low); 11 | 12 | return pixels; 13 | } 14 | 15 | 16 | // like high_bit_depth_pixels_clamp, but all values are 8bit 17 | static __m128i __forceinline low_bit_depth_pixels_clamp(__m128i pixels, __m128i high_add, __m128i high_sub, const __m128i& low) 18 | { 19 | pixels = _mm_adds_epu8(pixels, high_add); 20 | pixels = _mm_subs_epu8(pixels, high_sub); 21 | pixels = _mm_add_epi8(pixels, low); 22 | 23 | return pixels; 24 | } 25 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: neo-f3kdb 2 | Section: libs 3 | Priority: optional 4 | Maintainer: Xinyue Lu 5 | Bugs: https://github.com/HomeOfAviSynthPlusEvolution/neo_f3kdb/issues 6 | Homepage: https://github.com/HomeOfAviSynthPlusEvolution/neo_f3kdb 7 | Standards-Version: 4.5.0 8 | Build-Depends: debhelper-compat (= 12), 9 | avisynthplus-yuuki-dev, 10 | liblsmash-yuuki-dev 11 | 12 | # -yuuki tag is used for yuuki's own builds. 13 | # Replace it with your own tag for your own builds. 14 | 15 | Package: dualsynth-neo-f3kdb-yuuki 16 | Architecture: any 17 | Multi-Arch: same 18 | Depends: ${shlibs:Depends}, ${misc:Depends} 19 | Provides: dualsynth-neo-f3kdb, neo-f3kdb 20 | Conflicts: dualsynth-neo-f3kdb, neo-f3kdb 21 | Description: Plugin for AviSynthPlus and VapourSynth: neo-f3kdb 22 | F3KDB is a deband filter. It was originally written for AviUtl by bunyuchan and later ported to AviSynth by SAPikachu many years ago. 23 | -------------------------------------------------------------------------------- /src/version.rc.in: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | VS_VERSION_INFO VERSIONINFO 4 | FILEVERSION @VERSION_MAJOR@, @VERSION_MINOR@, @VERSION_PATCH@, @VERSION_BUILD@ 5 | PRODUCTVERSION @VERSION_MAJOR@, @VERSION_MINOR@, @VERSION_PATCH@, @VERSION_BUILD@ 6 | FILEFLAGSMASK VS_FFI_FILEFLAGSMASK 7 | @RC_FILEFLAGS_LINE@ 8 | FILEOS VOS_NT_WINDOWS32 9 | FILETYPE VFT_DLL 10 | FILESUBTYPE VFT2_UNKNOWN 11 | BEGIN 12 | BLOCK "StringFileInfo" 13 | BEGIN 14 | BLOCK "040904B0" 15 | BEGIN 16 | VALUE "FileDescription", "@FILE_DESCRIPTION@" 17 | VALUE "FileVersion", "@PROJECT_VERSION_STRING_FULL@" 18 | VALUE "InternalName", "@INTERNAL_NAME@" 19 | VALUE "OriginalFilename", "@ORIGINAL_FILENAME@" 20 | VALUE "ProductName", "@PRODUCT_NAME@" 21 | VALUE "ProductVersion", "@PROJECT_VERSION_STRING_FULL@" 22 | END 23 | END 24 | BLOCK "VarFileInfo" 25 | BEGIN 26 | VALUE "Translation", 0x0409, 1200 27 | END 28 | END 29 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: neo_f3kdb 3 | Source: https://github.com/HomeOfAviSynthPlusEvolution/neo_f3kdb 4 | 5 | Files: * 6 | Copyright: 2019-2020 Xinyue Lu , and previous developers 7 | License: GPL-3+ 8 | This package is free software: you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation, either version 3 of the License, or 11 | (at your option) any later version. 12 | . 13 | This package is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | . 18 | You should have received a copy of the GNU General Public License 19 | along with this program. If not, see 20 | . 21 | On Debian systems, the complete text of the GNU General 22 | Public License version 3 can be found in "/usr/share/common-licenses/GPL-3". 23 | -------------------------------------------------------------------------------- /src/pixel_proc_c_high_bit_depth_common.h: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include "constants.h" 3 | 4 | static inline int upsample(void* context, unsigned char pixel) 5 | { 6 | return pixel << (INTERNAL_BIT_DEPTH - 8); 7 | } 8 | 9 | #if defined(HAS_DOWNSAMPLE) 10 | #undef HAS_DOWNSAMPLE 11 | #else 12 | static inline int downsample(void* context, int pixel, int row, int column, int pixel_min, int pixel_max, int output_depth) 13 | { 14 | pixel = dither(context, pixel, row, column); 15 | return clamp_pixel(pixel, pixel_min, pixel_max) >> (INTERNAL_BIT_DEPTH - output_depth); 16 | } 17 | #endif 18 | 19 | static inline int avg_2(void* context, int pixel1, int pixel2) 20 | { 21 | return (pixel1 + pixel2 + 1) >> 1; 22 | } 23 | 24 | static inline int avg_4(void* context, int pixel1, int pixel2, int pixel3, int pixel4) 25 | { 26 | // consistent with SSE code 27 | int avg1 = (pixel1 + pixel2 + 1) >> 1; 28 | int avg2 = (pixel3 + pixel4 + 1) >> 1; 29 | if (avg1 > 0) 30 | { 31 | avg1 -= 1; 32 | } 33 | return (avg1 + avg2 + 1) >> 1; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /src/compiler_compat.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef _MSC_VER 4 | #include 5 | #include 6 | #define _stricmp strcasecmp 7 | #define _strnicmp strncasecmp 8 | #endif 9 | 10 | #ifndef _WIN32 11 | #include 12 | #define __forceinline inline 13 | #ifndef __cdecl 14 | #define __cdecl 15 | #endif 16 | #define _InterlockedCompareExchangePointer(a,b,c) __sync_val_compare_and_swap(a,c,b) 17 | 18 | static inline void* _aligned_malloc(size_t size, size_t alignment) 19 | { 20 | void *tmp; 21 | if (posix_memalign(&tmp, alignment, size)) 22 | { 23 | tmp = 0; 24 | } 25 | return tmp; 26 | } 27 | #define _aligned_free free 28 | #else 29 | #include 30 | // ICL complains about unresolved external symbol 31 | #if __INTEL_COMPILER && !_WIN64 32 | __forceinline void* _InterlockedCompareExchangePointer( 33 | void* volatile *Destination, void* Exchange, void* Comperand) { 34 | return (void*) _InterlockedCompareExchange((long volatile *) Destination, (long) Exchange, (long) Comperand); 35 | } 36 | #endif 37 | #endif 38 | 39 | 40 | #define ALIGNED_ARRAY(type, decl, alignment) alignas(alignment) type decl 41 | -------------------------------------------------------------------------------- /src/pixel_proc_c_16bit.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace pixel_proc_16bit { 4 | 5 | static inline void init_context(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth) 6 | { 7 | // sanity check only 8 | assert(output_depth == 16); 9 | } 10 | 11 | static inline void destroy_context(void* context) 12 | { 13 | // nothing to do 14 | } 15 | 16 | static inline void next_pixel(void* context) 17 | { 18 | // nothing to do 19 | } 20 | 21 | static inline void next_row(void* context) 22 | { 23 | // nothing to do 24 | } 25 | 26 | static inline int dither(void* context, int pixel, int row, int column) 27 | { 28 | return pixel; 29 | } 30 | 31 | #define HAS_DOWNSAMPLE 32 | 33 | #include "pixel_proc_c_high_bit_depth_common.h" 34 | 35 | static inline int downsample(void* context, int pixel, int row, int column, int pixel_min, int pixel_max, int output_depth) 36 | { 37 | assert(output_depth == 16); 38 | // I know the method name is totally wrong... 39 | return clamp_pixel(pixel, pixel_min, pixel_max) << (output_depth - INTERNAL_BIT_DEPTH); 40 | } 41 | 42 | 43 | }; -------------------------------------------------------------------------------- /src/bit_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ 16 | #define CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ 17 | 18 | #include 19 | #include 20 | #include 21 | #include "cpu_features_macros.h" 22 | 23 | CPU_FEATURES_START_CPP_NAMESPACE 24 | 25 | inline static bool IsBitSet(uint32_t reg, uint32_t bit) { 26 | return (reg >> bit) & 0x1; 27 | } 28 | 29 | inline static uint32_t ExtractBitRange(uint32_t reg, uint32_t msb, 30 | uint32_t lsb) { 31 | const uint64_t bits = msb - lsb + 1ULL; 32 | const uint64_t mask = (1ULL << bits) - 1ULL; 33 | assert(msb >= lsb); 34 | return (reg >> lsb) & mask; 35 | } 36 | 37 | CPU_FEATURES_END_CPP_NAMESPACE 38 | 39 | #endif // CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ 40 | -------------------------------------------------------------------------------- /.github/workflows/windows.yml: -------------------------------------------------------------------------------- 1 | name: Build (Windows) 2 | 3 | on: 4 | release: 5 | types: [created] 6 | push: 7 | pull_request: 8 | workflow_dispatch: 9 | inputs: 10 | tag: 11 | description: 'which tag to create and release?' 12 | required: true 13 | default: 'v100' 14 | 15 | jobs: 16 | build-windows: 17 | runs-on: windows-2022 18 | 19 | defaults: 20 | run: 21 | shell: cmd 22 | 23 | steps: 24 | - name: Checkout repo 25 | uses: actions/checkout@v4 26 | with: 27 | fetch-depth: 0 28 | 29 | - name: Setup MSVC 30 | uses: ilammy/msvc-dev-cmd@v1 31 | 32 | - name: Configure 33 | run: cmake -S . -B build -G "Visual Studio 17 2022" -A x64 \ 34 | -D CMAKE_MSVC_RUNTIME_LIBRARY="MultiThreaded" 35 | 36 | - name: Build 37 | run: cmake --build build --config Release --parallel %NUMBER_OF_PROCESSORS% --verbose 38 | 39 | - name: Package Release 40 | shell: bash 41 | run: | 42 | git clone https://github.com/AkarinVS/exe/ 43 | cd build/Release/ 44 | ../../exe/zip.exe -9r ../../Neo_f3kdb-Windows-x64.zip *.dll 45 | 46 | - name: Upload 47 | uses: actions/upload-artifact@v4 48 | with: 49 | name: Neo_f3kdb-Windows-x64 50 | path: build/Release/*.dll 51 | 52 | - name: Release 53 | uses: softprops/action-gh-release@v2 54 | if: github.event_name == 'release' || github.event_name == 'workflow_dispatch' && github.event.inputs.tag != '' 55 | with: 56 | files: "Neo_f3kdb-Windows-x64.zip" 57 | prerelease: true 58 | env: 59 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 60 | -------------------------------------------------------------------------------- /src/f3kdb.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | typedef enum _PIXEL_MODE : int { 4 | DEFAULT_PIXEL_MODE = -1, 5 | LOW_BIT_DEPTH = 0, 6 | INVALID_OPTION1, 7 | HIGH_BIT_DEPTH_INTERLEAVED, 8 | PIXEL_MODE_COUNT 9 | } PIXEL_MODE; 10 | 11 | typedef enum _DITHER_ALGORITHM : int { 12 | // _DEPRECATED_DA_LOW = 0, 13 | DA_HIGH_NO_DITHERING = 1, 14 | DA_HIGH_ORDERED_DITHERING, 15 | DA_HIGH_FLOYD_STEINBERG_DITHERING, 16 | DA_16BIT_INTERLEAVED, 17 | 18 | DA_COUNT, 19 | DA_USER_PARAM_MAX = DA_HIGH_FLOYD_STEINBERG_DITHERING 20 | } DITHER_ALGORITHM; 21 | 22 | typedef enum _RANDOM_ALGORITHM : int { 23 | RANDOM_ALGORITHM_OLD = 0, 24 | RANDOM_ALGORITHM_UNIFORM, 25 | RANDOM_ALGORITHM_GAUSSIAN, 26 | RANDOM_ALGORITHM_COUNT 27 | } RANDOM_ALGORITHM; 28 | 29 | typedef enum _OPTIMIZATION_MODE : int { 30 | IMPL_AUTO_DETECT = -1, 31 | IMPL_C = 0, 32 | IMPL_SSE2, 33 | IMPL_SSSE3, 34 | IMPL_SSE4, 35 | IMPL_AVX2, 36 | IMPL_AVX512, 37 | 38 | IMPL_COUNT 39 | } OPTIMIZATION_MODE; 40 | 41 | typedef struct _f3kdb_params_t { 42 | int range {15}; 43 | int Y {64}; 44 | int Cb {64}; 45 | int Cr {64}; 46 | int grainY {64}; 47 | int grainC {64}; 48 | int sample_mode {2}; 49 | int seed {0}; 50 | bool blur_first {true}; 51 | bool dynamic_grain {false}; 52 | DITHER_ALGORITHM dither_algo {DA_HIGH_FLOYD_STEINBERG_DITHERING}; 53 | bool keep_tv_range {false}; 54 | int output_depth {-1}; 55 | RANDOM_ALGORITHM random_algo_ref {RANDOM_ALGORITHM_UNIFORM}; 56 | RANDOM_ALGORITHM random_algo_grain {RANDOM_ALGORITHM_UNIFORM}; 57 | double random_param_ref {1.0f}; 58 | double random_param_grain {1.0f}; 59 | int Y_1 {-1}; 60 | int Cb_1 {-1}; 61 | int Cr_1 {-1}; 62 | int Y_2 {-1}; 63 | int Cb_2 {-1}; 64 | int Cr_2 {-1}; 65 | double angle_boost {1.5}; 66 | double max_angle {0.15}; 67 | } f3kdb_params_t; 68 | -------------------------------------------------------------------------------- /include/dualsynth/ds_videoinfo.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Xinyue Lu 3 | * 4 | * DualSynth wrapper - DSVideoInfo. 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | struct DSVideoInfo 11 | { 12 | DSFormat Format; 13 | int64_t FPSNum {1}, FPSDenom {1}; 14 | int Width {0}, Height {0}; 15 | int Frames {0}; 16 | 17 | int Audio_SPS {0}; 18 | int Audio_SType {0}; 19 | int64_t Audio_NSamples {0}; 20 | int Audio_NChannels {0}; 21 | 22 | int Field {0}; 23 | 24 | DSVideoInfo() {} 25 | DSVideoInfo(DSFormat format, int64_t fpsnum, int64_t fpsdenom, int width, int height, int frames) 26 | : Format(format) 27 | , FPSNum(fpsnum), FPSDenom(fpsdenom) 28 | , Width(width), Height(height) 29 | , Frames(frames) 30 | { } 31 | DSVideoInfo(const VSVideoInfo* vsvi) 32 | : Format(vsvi->format) 33 | , FPSNum(vsvi->fpsNum), FPSDenom(vsvi->fpsDen) 34 | , Width(vsvi->width), Height(vsvi->height) 35 | , Frames(vsvi->numFrames) 36 | { } 37 | DSVideoInfo(const VideoInfo avsvi) 38 | : Format(avsvi.pixel_type) 39 | , FPSNum(avsvi.fps_numerator), FPSDenom(avsvi.fps_denominator) 40 | , Width(avsvi.width), Height(avsvi.height) 41 | , Frames(avsvi.num_frames) 42 | , Audio_SPS(avsvi.audio_samples_per_second) 43 | , Audio_SType(avsvi.sample_type) 44 | , Audio_NSamples(avsvi.num_audio_samples) 45 | , Audio_NChannels(avsvi.nchannels) 46 | , Field(avsvi.image_type) 47 | { } 48 | const VSVideoInfo* ToVSVI(const VSCore* vscore, const VSAPI* vsapi) { 49 | return new VSVideoInfo {Format.ToVSFormat(vscore, vsapi), FPSNum, FPSDenom, Width, Height, Frames, 0}; 50 | } 51 | const VideoInfo ToAVSVI() { 52 | return VideoInfo{Width, Height, static_cast(FPSNum), static_cast(FPSDenom), Frames, Format.ToAVSFormat(), Audio_SPS, Audio_SType, Audio_NSamples, Audio_NChannels, Field}; 53 | } 54 | }; 55 | -------------------------------------------------------------------------------- /include/dualsynth/ds_common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Xinyue Lu 3 | * 4 | * DualSynth wrapper - Common header+. 5 | * 6 | */ 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "ds_format.hpp" 20 | #include "ds_videoinfo.hpp" 21 | #include "ds_frame.hpp" 22 | 23 | typedef void (*register_vsfilter_proc)(VSRegisterFunction, VSPlugin*); 24 | typedef void (*register_avsfilter_proc)(IScriptEnvironment* env); 25 | std::vector RegisterVSFilters(); 26 | std::vector RegisterAVSFilters(); 27 | 28 | enum ParamType 29 | { 30 | Clip, Integer, Float, Boolean, String 31 | }; 32 | 33 | struct Param 34 | { 35 | const char* Name; 36 | const ParamType Type; 37 | const bool IsArray {false}; 38 | bool AVSEnabled {true}; 39 | bool VSEnabled {true}; 40 | const bool IsOptional {true}; 41 | }; 42 | 43 | struct InDelegator 44 | { 45 | virtual void Read(const char* name, int& output) = 0; 46 | virtual void Read(const char* name, int64_t& output) = 0; 47 | virtual void Read(const char* name, float& output) = 0; 48 | virtual void Read(const char* name, double& output) = 0; 49 | virtual void Read(const char* name, bool& output) = 0; 50 | virtual void Read(const char* name, std::string& output) = 0; 51 | virtual void Read(const char* name, std::vector& output) = 0; 52 | virtual void Read(const char* name, std::vector& output) = 0; 53 | virtual void Read(const char* name, std::vector& output) = 0; 54 | virtual void Read(const char* name, std::vector& output) = 0; 55 | virtual void Read(const char* name, std::vector& output) = 0; 56 | virtual void Read(const char* name, void*& output) = 0; 57 | virtual void Free(void*& clip) = 0; 58 | }; 59 | 60 | struct FetchFrameFunctor 61 | { 62 | virtual DSFrame operator()(int n) = 0; 63 | virtual ~FetchFrameFunctor() {} 64 | }; 65 | -------------------------------------------------------------------------------- /src/impl_dispatch.cpp: -------------------------------------------------------------------------------- 1 | #include "core.h" 2 | 3 | #define IMPL_DISPATCH_IMPORT_DECLARATION 4 | 5 | #include "impl_dispatch_decl.h" 6 | 7 | const process_plane_impl_t* process_plane_impl_high_precision_no_dithering[] = { 8 | process_plane_impl_c_high_no_dithering, 9 | process_plane_impl_c_high_no_dithering, 10 | process_plane_impl_c_high_no_dithering, 11 | process_plane_impl_sse4_high_no_dithering, 12 | process_plane_impl_avx2_high_no_dithering, 13 | process_plane_impl_avx512_high_no_dithering 14 | }; 15 | 16 | const process_plane_impl_t* process_plane_impl_high_precision_ordered_dithering[] = { 17 | process_plane_impl_c_high_ordered_dithering, 18 | process_plane_impl_c_high_ordered_dithering, 19 | process_plane_impl_c_high_ordered_dithering, 20 | process_plane_impl_sse4_high_ordered_dithering, 21 | process_plane_impl_avx2_high_ordered_dithering, 22 | process_plane_impl_avx512_high_ordered_dithering 23 | }; 24 | 25 | const process_plane_impl_t* process_plane_impl_high_precision_floyd_steinberg_dithering[] = { 26 | process_plane_impl_c_high_floyd_steinberg_dithering, 27 | process_plane_impl_c_high_floyd_steinberg_dithering, 28 | process_plane_impl_c_high_floyd_steinberg_dithering, 29 | process_plane_impl_sse4_high_floyd_steinberg_dithering, 30 | process_plane_impl_avx2_high_floyd_steinberg_dithering, 31 | process_plane_impl_avx512_high_floyd_steinberg_dithering 32 | }; 33 | 34 | const process_plane_impl_t* process_plane_impl_16bit_interleaved[] = { 35 | process_plane_impl_c_16bit_interleaved, 36 | process_plane_impl_c_16bit_interleaved, 37 | process_plane_impl_c_16bit_interleaved, 38 | process_plane_impl_sse4_16bit_interleaved, 39 | process_plane_impl_avx2_16bit_interleaved, 40 | process_plane_impl_avx512_16bit_interleaved 41 | }; 42 | 43 | 44 | const process_plane_impl_t** process_plane_impls[] = { 45 | nullptr, // process_plane_impl_low_precision has been removed, 46 | process_plane_impl_high_precision_no_dithering, 47 | process_plane_impl_high_precision_ordered_dithering, 48 | process_plane_impl_high_precision_floyd_steinberg_dithering, 49 | process_plane_impl_16bit_interleaved 50 | }; 51 | -------------------------------------------------------------------------------- /include/avisynth/avs/minmax.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_MINMAX_H 33 | #define AVSCORE_MINMAX_H 34 | 35 | template 36 | T min(T v1, T v2) 37 | { 38 | return v1 < v2 ? v1 : v2; 39 | } 40 | 41 | template 42 | T max(T v1, T v2) 43 | { 44 | return v1 > v2 ? v1 : v2; 45 | } 46 | 47 | template 48 | T clamp(T n, T min, T max) 49 | { 50 | n = n > max ? max : n; 51 | return n < min ? min : n; 52 | } 53 | 54 | #endif // AVSCORE_MINMAX_H 55 | -------------------------------------------------------------------------------- /include/avisynth/avs/types.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_TYPES_H 34 | #define AVS_TYPES_H 35 | 36 | // Define all types necessary for interfacing with avisynth.dll 37 | #include 38 | #include 39 | #ifdef __cplusplus 40 | #include 41 | #include 42 | #else 43 | #include 44 | #include 45 | #endif 46 | 47 | // Raster types used by VirtualDub & Avisynth 48 | typedef uint32_t Pixel32; 49 | typedef uint8_t BYTE; 50 | 51 | // Audio Sample information 52 | typedef float SFLOAT; 53 | 54 | #endif //AVS_TYPES_H 55 | -------------------------------------------------------------------------------- /src/pixel_proc_c.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "impl_dispatch.h" 6 | 7 | #define CALL_IMPL(func, ...) \ 8 | ( mode == DA_HIGH_NO_DITHERING ? pixel_proc_high_no_dithering::func(__VA_ARGS__) : \ 9 | mode == DA_HIGH_ORDERED_DITHERING ? pixel_proc_high_ordered_dithering::func(__VA_ARGS__) : \ 10 | mode == DA_HIGH_FLOYD_STEINBERG_DITHERING ? pixel_proc_high_f_s_dithering::func(__VA_ARGS__) : \ 11 | pixel_proc_16bit::func(__VA_ARGS__) ) 12 | 13 | #define CHECK_MODE() if (mode < 0 || mode >= DA_COUNT) abort() 14 | 15 | #include "pixel_proc_c_high_no_dithering.h" 16 | #include "pixel_proc_c_high_ordered_dithering.h" 17 | #include "pixel_proc_c_high_f_s_dithering.h" 18 | 19 | #include "pixel_proc_c_16bit.h" 20 | 21 | template 22 | static inline void pixel_proc_init_context(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth) 23 | { 24 | CHECK_MODE(); 25 | CALL_IMPL(init_context, context_buffer, frame_width, output_depth); 26 | } 27 | 28 | template 29 | static inline void pixel_proc_destroy_context(void* context) 30 | { 31 | CHECK_MODE(); 32 | CALL_IMPL(destroy_context, context); 33 | } 34 | 35 | template 36 | static inline void pixel_proc_next_pixel(void* context) 37 | { 38 | CHECK_MODE(); 39 | CALL_IMPL(next_pixel, context); 40 | } 41 | 42 | template 43 | static inline void pixel_proc_next_row(void* context) 44 | { 45 | CHECK_MODE(); 46 | CALL_IMPL(next_row, context); 47 | } 48 | 49 | template 50 | static inline int pixel_proc_upsample(void* context, unsigned char pixel) 51 | { 52 | CHECK_MODE(); 53 | return CALL_IMPL(upsample, context, pixel); 54 | } 55 | 56 | template 57 | static inline int pixel_proc_downsample(void* context, int pixel, int row, int column, int pixel_min, int pixel_max, int output_depth) 58 | { 59 | CHECK_MODE(); 60 | return CALL_IMPL(downsample, context, pixel, row, column, pixel_min, pixel_max, output_depth); 61 | } 62 | 63 | template 64 | static inline int pixel_proc_avg_2(void* context, int pixel1, int pixel2) 65 | { 66 | CHECK_MODE(); 67 | return CALL_IMPL(avg_2, context, pixel1, pixel2); 68 | } 69 | 70 | template 71 | static inline int pixel_proc_avg_4(void* context, int pixel1, int pixel2, int pixel3, int pixel4) 72 | { 73 | CHECK_MODE(); 74 | return CALL_IMPL(avg_4, context, pixel1, pixel2, pixel3, pixel4); 75 | } 76 | -------------------------------------------------------------------------------- /include/avisynth/avs/win.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_WIN_H 33 | #define AVSCORE_WIN_H 34 | 35 | // Whenever you need windows headers, start by including this file, then the rest. 36 | 37 | // WWUUT? We require XP now? 38 | #if !defined(NTDDI_VERSION) && !defined(_WIN32_WINNT) 39 | #define NTDDI_VERSION 0x05020000 40 | #define _WIN32_WINNT 0x0502 41 | #endif 42 | 43 | #define WIN32_LEAN_AND_MEAN 44 | #define STRICT 45 | #if !defined(NOMINMAX) 46 | #define NOMINMAX 47 | #endif 48 | 49 | #include 50 | 51 | // Provision for UTF-8 max 4 bytes per code point 52 | #define AVS_MAX_PATH MAX_PATH*4 53 | 54 | #endif // AVSCORE_WIN_H 55 | -------------------------------------------------------------------------------- /include/dualsynth/ds_filter.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Xinyue Lu 3 | * 4 | * DualSynth wrapper - Filter parent class. 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | struct Filter 11 | { 12 | DSVideoInfo in_vi; 13 | FetchFrameFunctor* fetch_frame; 14 | virtual const char* VSName() const { return "FilterFoo"; } 15 | virtual const char* AVSName() const { return "FilterFoo"; } 16 | virtual const MtMode AVSMode() const { return MT_SERIALIZED; } 17 | virtual const VSFilterMode VSMode() const { return fmSerial; } 18 | virtual const std::vector Params() const = 0; 19 | virtual const std::string VSParams() const 20 | { 21 | std::stringstream ss; 22 | auto params = this->Params(); 23 | for (auto &&p : params) 24 | { 25 | if (!p.VSEnabled) continue; 26 | std::string type_name; 27 | switch(p.Type) { 28 | case Clip: type_name = "clip"; break; 29 | case Integer: type_name = "int"; break; 30 | case Float: type_name = "float"; break; 31 | case Boolean: type_name = "int"; break; 32 | case String: type_name = "data"; break; 33 | } 34 | ss << p.Name << ':' << type_name; 35 | if (p.IsArray) 36 | ss << "[]"; 37 | if (p.IsOptional) 38 | ss << ":opt"; 39 | ss << ';'; 40 | } 41 | return ss.str(); 42 | }; 43 | virtual const std::string AVSParams() const 44 | { 45 | std::stringstream ss; 46 | auto params = this->Params(); 47 | for (auto &&p : params) 48 | { 49 | if (!p.AVSEnabled) continue; 50 | char type_name; 51 | switch(p.Type) { 52 | case Clip: type_name = 'c'; break; 53 | case Integer: type_name = 'i'; break; 54 | case Float: type_name = 'f'; break; 55 | case Boolean: type_name = 'b'; break; 56 | case String: type_name = 's'; break; 57 | } 58 | if (p.IsOptional) 59 | ss << '[' << p.Name << ']'; 60 | ss << type_name; 61 | } 62 | return ss.str(); 63 | }; 64 | virtual void Initialize(InDelegator* in, DSVideoInfo in_vi, FetchFrameFunctor* fetch_frame) 65 | { 66 | this->in_vi = in_vi; 67 | this->fetch_frame = fetch_frame; 68 | }; 69 | virtual std::vector RequestReferenceFrames(int n) const 70 | { 71 | return std::vector{n}; 72 | } 73 | virtual DSFrame GetFrame(int n, std::unordered_map in_frames) 74 | { 75 | return in_frames.size() > 0 ? in_frames.begin()->second : DSFrame(); 76 | } 77 | virtual DSVideoInfo GetOutputVI() 78 | { 79 | return in_vi; 80 | } 81 | virtual int SetCacheHints(int cachehints, int frame_range) 82 | { 83 | return cachehints == CACHE_GET_MTMODE ? AVSMode() : 0; 84 | } 85 | }; 86 | -------------------------------------------------------------------------------- /src/random.cpp: -------------------------------------------------------------------------------- 1 | #include "random.h" 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include 8 | 9 | typedef double (*rand_impl_t)(int& seed, double param); 10 | 11 | double rand_old(int& seed, double param); 12 | 13 | double rand_uniform(int& seed, double param); 14 | 15 | double rand_gaussian(int& seed, double param); 16 | 17 | static const rand_impl_t rand_algorithms[] = { 18 | rand_old, 19 | rand_uniform, 20 | rand_gaussian 21 | }; 22 | 23 | inline double round(double r) { 24 | return (r > 0.0) ? floor(r + 0.5) : ceil(r - 0.5); 25 | } 26 | 27 | int random(RANDOM_ALGORITHM algo, int& seed, int range, double param) 28 | { 29 | assert(algo >= 0 && algo < RANDOM_ALGORITHM_COUNT); 30 | 31 | double num = rand_algorithms[algo](seed, param); 32 | assert(num >= -1.0 && num <= 1.0); 33 | return (int)round(num * range); 34 | } 35 | 36 | // most algorithms below are stolen from AddGrainC 37 | 38 | double rand_to_double(int rand_num) 39 | { 40 | // convert the number to 52 bit, use high 12 bits to fill lower space 41 | // (otherwise the upper bound will be significantly less than 1.0) 42 | union 43 | { 44 | uint64_t itemp; 45 | double result; 46 | }; 47 | itemp = ((uint64_t)rand_num) & 0xffffffffULL; 48 | itemp = itemp << 20 | itemp >> 12; 49 | 50 | // fill exponent with 1 51 | itemp |= 0x3ff0000000000000ULL; 52 | 53 | // itemp is now in [1.0, 2.0), convert to [-1.0, 1.0) 54 | return (result - 1.0) * 2 - 1.0; 55 | } 56 | 57 | double rand_old(int& seed, double) 58 | { 59 | int seed_tmp = (((seed << 13) ^ (unsigned int)seed) >> 17) ^ (seed << 13) ^ seed; 60 | seed = 32 * seed_tmp ^ seed_tmp; 61 | return rand_to_double(seed); 62 | } 63 | 64 | double rand_uniform(int& seed, double) 65 | { 66 | seed = 1664525 * seed + 1013904223; 67 | return rand_to_double(seed); 68 | } 69 | 70 | // http://www.bearcave.com/misl/misl_tech/wavelets/hurst/random.html 71 | double rand_gaussian(int& seed, double param) 72 | { 73 | double ret; 74 | double x, y, r2; 75 | 76 | do 77 | { 78 | do 79 | { 80 | /* choose x,y in uniform square (-1,-1) to (+1,+1) */ 81 | 82 | x = rand_uniform (seed, param); 83 | y = rand_uniform (seed, param); 84 | 85 | /* see if it is in the unit circle */ 86 | r2 = x * x + y * y; 87 | } 88 | while (r2 > 1.0 || r2 == 0); 89 | /* Box-Muller transform */ 90 | 91 | // sigma = param 92 | ret = param * y * sqrt (-2.0 * log (r2) / r2); 93 | 94 | } while (ret <= -1.0 || ret >= 1.0); 95 | // we need to clip the result because the wrapper accepts [-1.0, 1.0] only 96 | 97 | return ret; 98 | } 99 | -------------------------------------------------------------------------------- /src/pixel_proc_c_high_ordered_dithering.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | namespace pixel_proc_high_ordered_dithering { 4 | 5 | // bayer dither matrix 6 | // align to 16 byte for reading from SSE code 7 | alignas(16) static const unsigned char THRESHOLD_MAP[16][16] = 8 | { 9 | { 0, 128, 32, 160, 8, 136, 40, 168, 2, 130, 34, 162, 10, 138, 42, 170 }, 10 | { 192, 64, 224, 96, 200, 72, 232, 104, 194, 66, 226, 98, 202, 74, 234, 106 }, 11 | { 48, 176, 16, 144, 56, 184, 24, 152, 50, 178, 18, 146, 58, 186, 26, 154 }, 12 | { 240, 112, 208, 80, 248, 120, 216, 88, 242, 114, 210, 82, 250, 122, 218, 90 }, 13 | { 12, 140, 44, 172, 4, 132, 36, 164, 14, 142, 46, 174, 6, 134, 38, 166 }, 14 | { 204, 76, 236, 108, 196, 68, 228, 100, 206, 78, 238, 110, 198, 70, 230, 102 }, 15 | { 60, 188, 28, 156, 52, 180, 20, 148, 62, 190, 30, 158, 54, 182, 22, 150 }, 16 | { 252, 124, 220, 92, 244, 116, 212, 84, 254, 126, 222, 94, 246, 118, 214, 86 }, 17 | { 3, 131, 35, 163, 11, 139, 43, 171, 1, 129, 33, 161, 9, 137, 41, 169 }, 18 | { 195, 67, 227, 99, 203, 75, 235, 107, 193, 65, 225, 97, 201, 73, 233, 105 }, 19 | { 51, 179, 19, 147, 59, 187, 27, 155, 49, 177, 17, 145, 57, 185, 25, 153 }, 20 | { 243, 115, 211, 83, 251, 123, 219, 91, 241, 113, 209, 81, 249, 121, 217, 89 }, 21 | { 15, 143, 47, 175, 7, 135, 39, 167, 13, 141, 45, 173, 5, 133, 37, 165 }, 22 | { 207, 79, 239, 111, 199, 71, 231, 103, 205, 77, 237, 109, 197, 69, 229, 101 }, 23 | { 63, 191, 31, 159, 55, 183, 23, 151, 61, 189, 29, 157, 53, 181, 21, 149 }, 24 | { 255, 127, 223, 95, 247, 119, 215, 87, 253, 125, 221, 93, 245, 117, 213, 85 } 25 | }; 26 | 27 | static const int THRESHOLD_MAP_RIGHT_SHIFT_BITS = 16 - INTERNAL_BIT_DEPTH; 28 | 29 | 30 | static inline void init_context(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth) 31 | { 32 | *((int*)context_buffer) = output_depth; 33 | } 34 | 35 | static inline void destroy_context(void* context) 36 | { 37 | // nothing to do 38 | } 39 | 40 | static inline void next_pixel(void* context) 41 | { 42 | // nothing to do 43 | } 44 | 45 | static inline void next_row(void* context) 46 | { 47 | // nothing to do 48 | } 49 | 50 | static inline int dither(void* context, int pixel, int row, int column) 51 | { 52 | int output_depth = *(int*)context; 53 | pixel += (THRESHOLD_MAP[row & 15][column & 15] >> (THRESHOLD_MAP_RIGHT_SHIFT_BITS + output_depth - 8)); 54 | return pixel; 55 | } 56 | 57 | #include "pixel_proc_c_high_bit_depth_common.h" 58 | }; 59 | -------------------------------------------------------------------------------- /src/core.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "f3kdb.h" 6 | #include "process_plane_context.h" 7 | #include "compiler_compat.h" 8 | 9 | typedef struct _pixel_dither_info { 10 | alignas(4) signed char ref1; 11 | signed char ref2; 12 | signed short change; 13 | } pixel_dither_info; 14 | 15 | static_assert(sizeof(pixel_dither_info) == 4, "Something wrong in pixel_dither_info"); 16 | 17 | typedef struct _process_plane_params 18 | { 19 | const unsigned char *src_plane_ptr; 20 | int src_pitch; 21 | 22 | unsigned char *dst_plane_ptr; 23 | int dst_pitch; 24 | 25 | int plane_width_in_pixels; 26 | int plane_height_in_pixels; 27 | 28 | PIXEL_MODE input_mode; 29 | int input_depth; 30 | PIXEL_MODE output_mode; 31 | int output_depth; 32 | 33 | unsigned short threshold; 34 | unsigned short threshold1; 35 | unsigned short threshold2; 36 | float angle_boost; 37 | float max_angle; 38 | pixel_dither_info *info_ptr_base; 39 | int info_stride; 40 | 41 | short* grain_buffer; 42 | int grain_buffer_stride; 43 | 44 | int plane; 45 | 46 | unsigned char width_subsampling; 47 | unsigned char height_subsampling; 48 | 49 | int pixel_max; 50 | int pixel_min; 51 | 52 | // Helper functions 53 | inline int get_dst_width() const { 54 | return output_mode == HIGH_BIT_DEPTH_INTERLEAVED ? plane_width_in_pixels * 2 : plane_width_in_pixels; 55 | } 56 | inline int get_dst_height() const { 57 | return plane_height_in_pixels; 58 | } 59 | inline int get_src_width() const { 60 | return input_mode == HIGH_BIT_DEPTH_INTERLEAVED ? plane_width_in_pixels * 2 : plane_width_in_pixels; 61 | } 62 | inline int get_src_height() const { 63 | return plane_height_in_pixels; 64 | } 65 | } process_plane_params; 66 | 67 | typedef void (*process_plane_impl_t)(const process_plane_params& params, process_plane_context* context); 68 | 69 | class f3kdb_core_t { 70 | private: 71 | process_plane_impl_t _process_plane_impl; 72 | 73 | pixel_dither_info *_y_info; 74 | pixel_dither_info *_cb_info; 75 | pixel_dither_info *_cr_info; 76 | 77 | process_plane_context _y_context; 78 | process_plane_context _cb_context; 79 | process_plane_context _cr_context; 80 | 81 | short* _grain_buffer_y; 82 | short* _grain_buffer_c; 83 | 84 | int* _grain_buffer_offsets; 85 | 86 | DSVideoInfo _video_info; 87 | f3kdb_params_t _params; 88 | 89 | OPTIMIZATION_MODE _opt; 90 | 91 | void init(void); 92 | void init_frame_luts(void); 93 | 94 | void destroy_frame_luts(void); 95 | 96 | f3kdb_core_t(const f3kdb_core_t&); 97 | f3kdb_core_t operator=(const f3kdb_core_t&); 98 | 99 | public: 100 | f3kdb_core_t(DSVideoInfo vi, const f3kdb_params_t params, OPTIMIZATION_MODE opt); 101 | virtual ~f3kdb_core_t(); 102 | 103 | void process_plane(int frame_index, int plane, unsigned char* dst_frame_ptr, int dst_pitch, const unsigned char* src_frame_ptr, int src_pitch); 104 | }; 105 | -------------------------------------------------------------------------------- /VCL2/vectorclass.h: -------------------------------------------------------------------------------- 1 | /**************************** vectorclass.h ******************************** 2 | * Author: Agner Fog 3 | * Date created: 2012-05-30 4 | * Last modified: 2022-07-20 5 | * Version: 2.02.00 6 | * Project: vector class library 7 | * Home: https://github.com/vectorclass 8 | * Description: 9 | * Header file defining vector classes as interface to intrinsic functions 10 | * in x86 and x86-64 microprocessors with SSE2 and later instruction sets. 11 | * 12 | * Instructions: 13 | * Use Gnu, Clang, Microsoft, or Intel C++ compiler. Compile for the desired 14 | * instruction set, which must be at least SSE2. Specify the supported 15 | * instruction set by a command line define, e.g. __SSE4_1__ if the 16 | * compiler does not automatically do so. 17 | * For detailed instructions, see vcl_manual.pdf 18 | * 19 | * Each vector object is represented internally in the CPU as a vector 20 | * register with 128, 256 or 512 bits. 21 | * 22 | * This header file includes the appropriate header files depending on the 23 | * selected instruction set. 24 | * 25 | * (c) Copyright 2012-2022 Agner Fog. 26 | * Apache License version 2.0 or later. 27 | ******************************************************************************/ 28 | 29 | #ifndef VECTORCLASS_H 30 | #define VECTORCLASS_H 20200 31 | 32 | // Maximum vector size, bits. Allowed values are 128, 256, 512 33 | #ifndef MAX_VECTOR_SIZE 34 | #define MAX_VECTOR_SIZE 512 35 | #endif 36 | 37 | // Determine instruction set, and define platform-dependent functions 38 | #include "instrset.h" // Select supported instruction set 39 | 40 | #if INSTRSET < 2 // instruction set SSE2 is the minimum 41 | #error Please compile for the SSE2 instruction set or higher 42 | #else 43 | 44 | // Select appropriate .h files depending on instruction set 45 | #include "vectori128.h" // 128-bit integer vectors 46 | #include "vectorf128.h" // 128-bit floating point vectors 47 | 48 | #if MAX_VECTOR_SIZE >= 256 49 | #if INSTRSET >= 8 50 | #include "vectori256.h" // 256-bit integer vectors, requires AVX2 instruction set 51 | #else 52 | #include "vectori256e.h" // 256-bit integer vectors, emulated 53 | #endif // INSTRSET >= 8 54 | #if INSTRSET >= 7 55 | #include "vectorf256.h" // 256-bit floating point vectors, requires AVX instruction set 56 | #else 57 | #include "vectorf256e.h" // 256-bit floating point vectors, emulated 58 | #endif // INSTRSET >= 7 59 | #endif // MAX_VECTOR_SIZE >= 256 60 | 61 | #if MAX_VECTOR_SIZE >= 512 62 | #if INSTRSET >= 9 63 | #include "vectori512.h" // 512-bit vectors of 32 and 64 bit integers, requires AVX512F instruction set 64 | #include "vectorf512.h" // 512-bit floating point vectors, requires AVX512F instruction set 65 | #else 66 | #include "vectori512e.h" // 512-bit integer vectors, emulated 67 | #include "vectorf512e.h" // 512-bit floating point vectors, emulated 68 | #endif // INSTRSET >= 9 69 | #if INSTRSET >= 10 70 | #include "vectori512s.h" // 512-bit vectors of 8 and 16 bit integers, requires AVX512BW instruction set 71 | #else 72 | #include "vectori512se.h" // 512-bit vectors of 8 and 16 bit integers, emulated 73 | #endif 74 | #endif // MAX_VECTOR_SIZE >= 512 75 | 76 | #include "vector_convert.h" // conversion between different vector sizes, and common templates 77 | 78 | #endif // INSTRSET >= 2 79 | 80 | 81 | #else // VECTORCLASS_H 82 | 83 | #if VECTORCLASS_H < 20000 84 | #error Mixed versions of vector class library 85 | #endif 86 | 87 | #endif // VECTORCLASS_H 88 | -------------------------------------------------------------------------------- /include/avisynth/avs/cpuid.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifndef AVSCORE_CPUID_H 33 | #define AVSCORE_CPUID_H 34 | 35 | // For GetCPUFlags. These are backwards-compatible with those in VirtualDub. 36 | // ending with SSE4_2 37 | // For emulation see https://software.intel.com/en-us/articles/intel-software-development-emulator 38 | enum { 39 | /* oldest CPU to support extension */ 40 | CPUF_FORCE = 0x01, // N/A 41 | CPUF_FPU = 0x02, // 386/486DX 42 | CPUF_MMX = 0x04, // P55C, K6, PII 43 | CPUF_INTEGER_SSE = 0x08, // PIII, Athlon 44 | CPUF_SSE = 0x10, // PIII, Athlon XP/MP 45 | CPUF_SSE2 = 0x20, // PIV, K8 46 | CPUF_3DNOW = 0x40, // K6-2 47 | CPUF_3DNOW_EXT = 0x80, // Athlon 48 | CPUF_X86_64 = 0xA0, // Hammer (note: equiv. to 3DNow + SSE2, which 49 | // only Hammer will have anyway) 50 | CPUF_SSE3 = 0x100, // PIV+, K8 Venice 51 | CPUF_SSSE3 = 0x200, // Core 2 52 | CPUF_SSE4 = 0x400, 53 | CPUF_SSE4_1 = 0x400, // Penryn, Wolfdale, Yorkfield 54 | CPUF_AVX = 0x800, // Sandy Bridge, Bulldozer 55 | CPUF_SSE4_2 = 0x1000, // Nehalem 56 | // AVS+ 57 | CPUF_AVX2 = 0x2000, // Haswell 58 | CPUF_FMA3 = 0x4000, 59 | CPUF_F16C = 0x8000, 60 | CPUF_MOVBE = 0x10000, // Big Endian move 61 | CPUF_POPCNT = 0x20000, 62 | CPUF_AES = 0x40000, 63 | CPUF_FMA4 = 0x80000, 64 | 65 | CPUF_AVX512F = 0x100000, // AVX-512 Foundation. 66 | CPUF_AVX512DQ = 0x200000, // AVX-512 DQ (Double/Quad granular) Instructions 67 | CPUF_AVX512PF = 0x400000, // AVX-512 Prefetch 68 | CPUF_AVX512ER = 0x800000, // AVX-512 Exponential and Reciprocal 69 | CPUF_AVX512CD = 0x1000000, // AVX-512 Conflict Detection 70 | CPUF_AVX512BW = 0x2000000, // AVX-512 BW (Byte/Word granular) Instructions 71 | CPUF_AVX512VL = 0x4000000, // AVX-512 VL (128/256 Vector Length) Extensions 72 | CPUF_AVX512IFMA = 0x8000000, // AVX-512 IFMA integer 52 bit 73 | CPUF_AVX512VBMI = 0x10000000,// AVX-512 VBMI 74 | }; 75 | 76 | #ifdef BUILDING_AVSCORE 77 | int GetCPUFlags(); 78 | void SetMaxCPU(int new_flags); 79 | #endif 80 | 81 | #endif // AVSCORE_CPUID_H 82 | -------------------------------------------------------------------------------- /include/dualsynth/ds_format.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Xinyue Lu 3 | * 4 | * DualSynth wrapper - DSFormat. 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | struct DSFormat 11 | { 12 | bool IsFamilyYUV {true}, IsFamilyRGB {false}, IsFamilyYCC {false}; 13 | bool IsInteger {true}, IsFloat {false}; 14 | int SSW {0}, SSH {0}; 15 | int BitsPerSample {8}, BytesPerSample {1}; 16 | int Planes {3}; 17 | DSFormat() {} 18 | DSFormat(const VSFormat* format) 19 | { 20 | Planes = format->numPlanes; 21 | IsFamilyYUV = format->colorFamily == cmYUV || format->colorFamily == cmGray; 22 | IsFamilyRGB = format->colorFamily == cmRGB; 23 | IsFamilyYCC = format->colorFamily == cmYCoCg; 24 | SSW = format->subSamplingW; 25 | SSH = format->subSamplingH; 26 | BitsPerSample = format->bitsPerSample; 27 | BytesPerSample = format->bytesPerSample; 28 | IsInteger = format->sampleType == stInteger; 29 | IsFloat = format->sampleType == stFloat; 30 | } 31 | 32 | const VSFormat* ToVSFormat(const VSCore* vscore, const VSAPI* vsapi) const 33 | { 34 | VSColorFamily family = cmYUV; 35 | if (IsFamilyYUV) 36 | family = Planes == 1 ? cmGray : cmYUV; 37 | else if (IsFamilyRGB) 38 | family = cmRGB; 39 | else if (IsFamilyYCC) 40 | family = cmYCoCg; 41 | return vsapi->registerFormat(family, IsInteger ? stInteger : stFloat, BitsPerSample, SSW, SSH, const_cast(vscore)); 42 | } 43 | 44 | DSFormat(int format) 45 | { 46 | const int componentBitSizes[8] = {8,16,32,0,0,10,12,14}; 47 | if (format == VideoInfo::CS_I420) 48 | format = VideoInfo::CS_YV12; 49 | 50 | auto PYUV = VideoInfo::CS_PLANAR | VideoInfo::CS_YUV; 51 | IsFamilyYUV = (format & PYUV) == PYUV; 52 | auto PRGB = VideoInfo::CS_PLANAR | VideoInfo::CS_BGR; 53 | IsFamilyRGB = (format & PRGB) == PRGB; 54 | IsFamilyYCC = false; 55 | BitsPerSample = componentBitSizes[(format >> VideoInfo::CS_Shift_Sample_Bits) & 7]; 56 | BytesPerSample = BitsPerSample == 8 ? 1 : BitsPerSample == 32 ? 4 : 2; 57 | IsInteger = BitsPerSample < 32; 58 | IsFloat = BitsPerSample == 32; 59 | if (IsFamilyYUV && (format & VideoInfo::CS_GENERIC_Y) == VideoInfo::CS_GENERIC_Y) 60 | Planes = 1; 61 | else if (IsFamilyYUV && (format & VideoInfo::CS_YUVA) == VideoInfo::CS_YUVA) 62 | Planes = 4; 63 | else if (IsFamilyRGB && (format & VideoInfo::CS_RGBA_TYPE) == VideoInfo::CS_RGBA_TYPE) 64 | Planes = 4; 65 | 66 | if (IsFamilyYUV && Planes > 1) { 67 | SSW = ((format >> VideoInfo::CS_Shift_Sub_Width) + 1) & 3; 68 | SSH = ((format >> VideoInfo::CS_Shift_Sub_Height) + 1) & 3; 69 | } 70 | } 71 | 72 | int ToAVSFormat() const 73 | { 74 | int pixel_format = VideoInfo::CS_PLANAR | (Planes == 3 ? VideoInfo::CS_YUV : VideoInfo::CS_YUVA) | VideoInfo::CS_VPlaneFirst; 75 | if (IsFamilyYUV) { 76 | pixel_format = VideoInfo::CS_PLANAR | (Planes == 3 ? VideoInfo::CS_YUV : VideoInfo::CS_YUVA) | VideoInfo::CS_VPlaneFirst; 77 | 78 | switch(SSW) { 79 | case 0: pixel_format |= VideoInfo::CS_Sub_Width_1; break; 80 | case 1: pixel_format |= VideoInfo::CS_Sub_Width_2; break; 81 | case 2: pixel_format |= VideoInfo::CS_Sub_Width_4; break; 82 | } 83 | 84 | switch(SSH) { 85 | case 0: pixel_format |= VideoInfo::CS_Sub_Height_1; break; 86 | case 1: pixel_format |= VideoInfo::CS_Sub_Height_2; break; 87 | case 2: pixel_format |= VideoInfo::CS_Sub_Height_4; break; 88 | } 89 | 90 | if (Planes == 1) 91 | pixel_format = VideoInfo::CS_GENERIC_Y; 92 | } 93 | else if (IsFamilyRGB || IsFamilyYCC) 94 | pixel_format = VideoInfo::CS_PLANAR | VideoInfo::CS_BGR | (Planes == 3 ? VideoInfo::CS_RGB_TYPE : VideoInfo::CS_RGBA_TYPE); 95 | 96 | switch(BitsPerSample) { 97 | case 8: pixel_format |= VideoInfo::CS_Sample_Bits_8; break; 98 | case 10: pixel_format |= VideoInfo::CS_Sample_Bits_10; break; 99 | case 12: pixel_format |= VideoInfo::CS_Sample_Bits_12; break; 100 | case 14: pixel_format |= VideoInfo::CS_Sample_Bits_14; break; 101 | case 16: pixel_format |= VideoInfo::CS_Sample_Bits_16; break; 102 | case 32: pixel_format |= VideoInfo::CS_Sample_Bits_32; break; 103 | } 104 | 105 | return pixel_format; 106 | } 107 | }; 108 | -------------------------------------------------------------------------------- /include/avisynth/avs/capi.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_CAPI_H 34 | #define AVS_CAPI_H 35 | 36 | #include "config.h" 37 | 38 | #ifdef AVS_POSIX 39 | // this is also defined in avs/posix.h 40 | #ifndef AVS_HAIKU 41 | #define __declspec(x) 42 | #endif 43 | #endif 44 | 45 | #ifdef __cplusplus 46 | # define EXTERN_C extern "C" 47 | #else 48 | # define EXTERN_C 49 | #endif 50 | 51 | #ifdef AVS_WINDOWS 52 | #ifdef BUILDING_AVSCORE 53 | # if defined(GCC) && defined(X86_32) 54 | # define AVSC_CC 55 | # else // MSVC builds and 64-bit GCC 56 | # ifndef AVSC_USE_STDCALL 57 | # define AVSC_CC __cdecl 58 | # else 59 | # define AVSC_CC __stdcall 60 | # endif 61 | # endif 62 | #else // needed for programs that talk to AviSynth+ 63 | # ifndef AVSC_WIN32_GCC32 // see comment below 64 | # ifndef AVSC_USE_STDCALL 65 | # define AVSC_CC __cdecl 66 | # else 67 | # define AVSC_CC __stdcall 68 | # endif 69 | # else 70 | # define AVSC_CC 71 | # endif 72 | #endif 73 | # else 74 | # define AVSC_CC 75 | #endif 76 | 77 | // On 64-bit Windows, there's only one calling convention, 78 | // so there is no difference between MSVC and GCC. On 32-bit, 79 | // this isn't true. The convention that GCC needs to use to 80 | // even build AviSynth+ as 32-bit makes anything that uses 81 | // it incompatible with 32-bit MSVC builds of AviSynth+. 82 | // The AVSC_WIN32_GCC32 define is meant to provide a user 83 | // switchable way to make builds of FFmpeg to test 32-bit 84 | // GCC builds of AviSynth+ without having to screw around 85 | // with alternate headers, while still default to the usual 86 | // situation of using 32-bit MSVC builds of AviSynth+. 87 | 88 | // Hopefully, this situation will eventually be resolved 89 | // and a broadly compatible solution will arise so the 90 | // same 32-bit FFmpeg build can handle either MSVC or GCC 91 | // builds of AviSynth+. 92 | 93 | #define AVSC_INLINE static __inline 94 | 95 | #ifdef BUILDING_AVSCORE 96 | #ifdef AVS_WINDOWS 97 | # ifndef AVS_STATIC_LIB 98 | # define AVSC_EXPORT __declspec(dllexport) 99 | # else 100 | # define AVSC_EXPORT 101 | # endif 102 | # define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name 103 | #else 104 | # define AVSC_EXPORT EXTERN_C 105 | # define AVSC_API(ret, name) EXTERN_C ret AVSC_CC name 106 | #endif 107 | #else 108 | # define AVSC_EXPORT EXTERN_C __declspec(dllexport) 109 | # ifndef AVS_STATIC_LIB 110 | # define AVSC_IMPORT __declspec(dllimport) 111 | # else 112 | # define AVSC_IMPORT 113 | # endif 114 | # ifndef AVSC_NO_DECLSPEC 115 | # define AVSC_API(ret, name) EXTERN_C AVSC_IMPORT ret AVSC_CC name 116 | # else 117 | # define AVSC_API(ret, name) typedef ret (AVSC_CC *name##_func) 118 | # endif 119 | #endif 120 | 121 | #endif //AVS_CAPI_H 122 | -------------------------------------------------------------------------------- /include/vapoursynth/VSScript.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013-2018 Fredrik Mellbin 3 | * 4 | * This file is part of VapourSynth. 5 | * 6 | * VapourSynth is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU Lesser General Public 8 | * License as published by the Free Software Foundation; either 9 | * version 2.1 of the License, or (at your option) any later version. 10 | * 11 | * VapourSynth is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | * Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public 17 | * License along with VapourSynth; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 | */ 20 | 21 | #ifndef VSSCRIPT_H 22 | #define VSSCRIPT_H 23 | 24 | #include "VapourSynth.h" 25 | 26 | #define VSSCRIPT_API_MAJOR 3 27 | #define VSSCRIPT_API_MINOR 2 28 | #define VSSCRIPT_API_VERSION ((VSSCRIPT_API_MAJOR << 16) | (VSSCRIPT_API_MINOR)) 29 | 30 | /* As of api 3.2 all functions are threadsafe */ 31 | 32 | typedef struct VSScript VSScript; 33 | 34 | typedef enum VSEvalFlags { 35 | efSetWorkingDir = 1, 36 | } VSEvalFlags; 37 | 38 | /* Get the api version */ 39 | VS_API(int) vsscript_getApiVersion(void); /* api 3.1 */ 40 | 41 | /* Initialize the available scripting runtimes, returns zero on failure */ 42 | VS_API(int) vsscript_init(void); 43 | 44 | /* Free all scripting runtimes */ 45 | VS_API(int) vsscript_finalize(void); 46 | 47 | /* 48 | * Pass a pointer to a null handle to create a new one 49 | * The values returned by the query functions are only valid during the lifetime of the VSScript 50 | * scriptFilename is if the error message should reference a certain file, NULL allowed in vsscript_evaluateScript() 51 | * core is to pass in an already created instance so that mixed environments can be used, 52 | * NULL creates a new core that can be fetched with vsscript_getCore() later OR implicitly uses the one associated with an already existing handle when passed 53 | * If efSetWorkingDir is passed to flags the current working directory will be changed to the path of the script 54 | * note that if scriptFilename is NULL in vsscript_evaluateScript() then __file__ won't be set and the working directory won't be changed 55 | * Set efSetWorkingDir to get the default and recommended behavior 56 | */ 57 | VS_API(int) vsscript_evaluateScript(VSScript **handle, const char *script, const char *scriptFilename, int flags); 58 | /* Convenience version of the above function that loads the script from a file */ 59 | VS_API(int) vsscript_evaluateFile(VSScript **handle, const char *scriptFilename, int flags); 60 | /* Create an empty environment for use in later invocations, mostly useful to set script variables before execution */ 61 | VS_API(int) vsscript_createScript(VSScript **handle); 62 | 63 | VS_API(void) vsscript_freeScript(VSScript *handle); 64 | VS_API(const char *) vsscript_getError(VSScript *handle); 65 | /* The node returned must be freed using freeNode() before calling vsscript_freeScript() */ 66 | VS_API(VSNodeRef *) vsscript_getOutput(VSScript *handle, int index); 67 | /* Both nodes returned must be freed using freeNode() before calling vsscript_freeScript(), the alpha node pointer will only be set if an alpha clip has been set in the script */ 68 | VS_API(VSNodeRef *) vsscript_getOutput2(VSScript *handle, int index, VSNodeRef **alpha); /* api 3.1 */ 69 | /* Unset an output index */ 70 | VS_API(int) vsscript_clearOutput(VSScript *handle, int index); 71 | /* The core is valid as long as the environment exists */ 72 | VS_API(VSCore *) vsscript_getCore(VSScript *handle); 73 | /* Convenience function for retrieving a vsapi pointer */ 74 | VS_API(const VSAPI *) vsscript_getVSApi(void); /* deprecated as of api 3.2 since it's impossible to tell the api version supported */ 75 | VS_API(const VSAPI *) vsscript_getVSApi2(int version); /* api 3.2, generally you should pass VAPOURSYNTH_API_VERSION */ 76 | 77 | /* Variables names that are not set or not of a convertible type will return an error */ 78 | VS_API(int) vsscript_getVariable(VSScript *handle, const char *name, VSMap *dst); 79 | VS_API(int) vsscript_setVariable(VSScript *handle, const VSMap *vars); 80 | VS_API(int) vsscript_clearVariable(VSScript *handle, const char *name); 81 | /* Tries to clear everything set in an environment, normally it is better to simply free an environment completely and create a new one */ 82 | VS_API(void) vsscript_clearEnvironment(VSScript *handle); 83 | 84 | #endif /* VSSCRIPT_H */ 85 | -------------------------------------------------------------------------------- /src/impl_dispatch_decl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "core.h" 4 | 5 | #ifdef IMPL_DISPATCH_IMPORT_DECLARATION 6 | 7 | #define DEFINE_IMPL(n, \ 8 | nullptr, \ 9 | impl_func_mode1_blur, \ 10 | impl_func_mode1_noblur, \ 11 | impl_func_mode2_blur, \ 12 | impl_func_mode2_noblur, \ 13 | impl_func_mode3_blur, \ 14 | impl_func_mode3_noblur, \ 15 | impl_func_mode4_blur, \ 16 | impl_func_mode4_noblur, \ 17 | impl_func_mode5_blur, \ 18 | impl_func_mode5_noblur, \ 19 | impl_func_mode6_blur, \ 20 | impl_func_mode6_noblur, \ 21 | impl_func_mode7_blur, \ 22 | impl_func_mode7_noblur) \ 23 | extern const process_plane_impl_t process_plane_impl_##n []; 24 | 25 | #else 26 | 27 | #define DEFINE_IMPL(n, \ 28 | nullptr, \ 29 | impl_func_mode1_blur, \ 30 | impl_func_mode1_noblur, \ 31 | impl_func_mode2_blur, \ 32 | impl_func_mode2_noblur, \ 33 | impl_func_mode3_blur, \ 34 | impl_func_mode3_noblur, \ 35 | impl_func_mode4_blur, \ 36 | impl_func_mode4_noblur, \ 37 | impl_func_mode5_blur, \ 38 | impl_func_mode5_noblur, \ 39 | impl_func_mode6_blur, \ 40 | impl_func_mode6_noblur, \ 41 | impl_func_mode7_blur, \ 42 | impl_func_mode7_noblur) \ 43 | extern const process_plane_impl_t process_plane_impl_##n [] = { \ 44 | nullptr, \ 45 | impl_func_mode1_blur, \ 46 | impl_func_mode1_noblur, \ 47 | impl_func_mode2_blur, \ 48 | impl_func_mode2_noblur, \ 49 | impl_func_mode3_blur, \ 50 | impl_func_mode3_noblur, \ 51 | impl_func_mode4_blur, \ 52 | impl_func_mode4_noblur, \ 53 | impl_func_mode5_blur, \ 54 | impl_func_mode5_noblur, \ 55 | impl_func_mode6_blur, \ 56 | impl_func_mode6_noblur, \ 57 | impl_func_mode7_blur, \ 58 | impl_func_mode7_noblur}; 59 | 60 | #endif 61 | 62 | 63 | #define DEFINE_TEMPLATE_IMPL(name, impl_func, ...) \ 64 | DEFINE_IMPL(name, \ 65 | (nullptr), \ 66 | (&impl_func<1, true, __VA_ARGS__>), \ 67 | (&impl_func<1, false, __VA_ARGS__>), \ 68 | (&impl_func<2, true, __VA_ARGS__>), \ 69 | (&impl_func<2, false, __VA_ARGS__>), \ 70 | (&impl_func<3, true, __VA_ARGS__>), \ 71 | (&impl_func<3, false, __VA_ARGS__>), \ 72 | (&impl_func<4, true, __VA_ARGS__>), \ 73 | (&impl_func<4, false, __VA_ARGS__>), \ 74 | (&impl_func<5, true, __VA_ARGS__>), \ 75 | (&impl_func<5, false, __VA_ARGS__>), \ 76 | (&impl_func<6, true, __VA_ARGS__>), \ 77 | (&impl_func<6, false, __VA_ARGS__>), \ 78 | (&impl_func<7, true, __VA_ARGS__>), \ 79 | (&impl_func<7, false, __VA_ARGS__>) ); 80 | 81 | #define DEFINE_SSE_IMPL(name, ...) \ 82 | DEFINE_TEMPLATE_IMPL(name, process_plane_sse_impl, __VA_ARGS__); 83 | 84 | 85 | #if defined(IMPL_DISPATCH_IMPORT_DECLARATION) || defined(DECLARE_IMPL_C) 86 | DEFINE_TEMPLATE_IMPL(c_high_no_dithering, process_plane_plainc, DA_HIGH_NO_DITHERING); 87 | DEFINE_TEMPLATE_IMPL(c_high_ordered_dithering, process_plane_plainc, DA_HIGH_ORDERED_DITHERING); 88 | DEFINE_TEMPLATE_IMPL(c_high_floyd_steinberg_dithering, process_plane_plainc, DA_HIGH_FLOYD_STEINBERG_DITHERING); 89 | DEFINE_TEMPLATE_IMPL(c_16bit_interleaved, process_plane_plainc, DA_16BIT_INTERLEAVED); 90 | #endif 91 | 92 | 93 | #if defined(IMPL_DISPATCH_IMPORT_DECLARATION) || defined(DECLARE_IMPL_SSE4) 94 | DEFINE_SSE_IMPL(sse4_high_no_dithering, DA_HIGH_NO_DITHERING); 95 | DEFINE_SSE_IMPL(sse4_high_ordered_dithering, DA_HIGH_ORDERED_DITHERING); 96 | DEFINE_SSE_IMPL(sse4_high_floyd_steinberg_dithering, DA_HIGH_FLOYD_STEINBERG_DITHERING); 97 | DEFINE_SSE_IMPL(sse4_16bit_interleaved, DA_16BIT_INTERLEAVED); 98 | #endif 99 | 100 | #if defined(IMPL_DISPATCH_IMPORT_DECLARATION) || defined(DECLARE_IMPL_AVX2) 101 | #define DEFINE_AVX2_IMPL(name, ...) \ 102 | DEFINE_TEMPLATE_IMPL(name, process_plane_avx2_impl, __VA_ARGS__); 103 | DEFINE_AVX2_IMPL(avx2_high_no_dithering, DA_HIGH_NO_DITHERING); 104 | DEFINE_AVX2_IMPL(avx2_high_ordered_dithering, DA_HIGH_ORDERED_DITHERING); 105 | DEFINE_AVX2_IMPL(avx2_high_floyd_steinberg_dithering, DA_HIGH_FLOYD_STEINBERG_DITHERING); 106 | DEFINE_AVX2_IMPL(avx2_16bit_interleaved, DA_16BIT_INTERLEAVED); 107 | #endif 108 | 109 | #if defined(IMPL_DISPATCH_IMPORT_DECLARATION) || defined(DECLARE_IMPL_AVX512) 110 | #define DEFINE_AVX512_IMPL(name, ...) \ 111 | DEFINE_TEMPLATE_IMPL(name, process_plane_avx512_impl, __VA_ARGS__); 112 | DEFINE_AVX512_IMPL(avx512_high_no_dithering, DA_HIGH_NO_DITHERING); 113 | DEFINE_AVX512_IMPL(avx512_high_ordered_dithering, DA_HIGH_ORDERED_DITHERING); 114 | DEFINE_AVX512_IMPL(avx512_high_floyd_steinberg_dithering, DA_HIGH_FLOYD_STEINBERG_DITHERING); 115 | DEFINE_AVX512_IMPL(avx512_16bit_interleaved, DA_16BIT_INTERLEAVED); 116 | #endif 117 | -------------------------------------------------------------------------------- /include/avisynth/avs/alignment.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_ALIGNMENT_H 34 | #define AVS_ALIGNMENT_H 35 | 36 | // Functions and macros to help work with alignment requirements. 37 | 38 | // Tells if a number is a power of two. 39 | #define IS_POWER2(n) ((n) && !((n) & ((n) - 1))) 40 | 41 | // Tells if the pointer "ptr" is aligned to "align" bytes. 42 | #define IS_PTR_ALIGNED(ptr, align) (((uintptr_t)ptr & ((uintptr_t)(align-1))) == 0) 43 | 44 | // Rounds up the number "n" to the next greater multiple of "align" 45 | #define ALIGN_NUMBER(n, align) (((n) + (align)-1) & (~((align)-1))) 46 | 47 | // Rounds up the pointer address "ptr" to the next greater multiple of "align" 48 | #define ALIGN_POINTER(ptr, align) (((uintptr_t)(ptr) + (align)-1) & (~(uintptr_t)((align)-1))) 49 | 50 | #ifdef __cplusplus 51 | 52 | #include 53 | #include 54 | #include 55 | #include "config.h" 56 | 57 | #if defined(MSVC) && _MSC_VER<1400 58 | // needed for VS2013, otherwise C++11 'alignas' works 59 | #define avs_alignas(x) __declspec(align(x)) 60 | #else 61 | // assumes C++11 support 62 | #define avs_alignas(x) alignas(x) 63 | #endif 64 | 65 | template 66 | static bool IsPtrAligned(T* ptr, size_t align) 67 | { 68 | assert(IS_POWER2(align)); 69 | return (bool)IS_PTR_ALIGNED(ptr, align); 70 | } 71 | 72 | template 73 | static T AlignNumber(T n, T align) 74 | { 75 | assert(IS_POWER2(align)); 76 | return ALIGN_NUMBER(n, align); 77 | } 78 | 79 | template 80 | static T* AlignPointer(T* ptr, size_t align) 81 | { 82 | assert(IS_POWER2(align)); 83 | return (T*)ALIGN_POINTER(ptr, align); 84 | } 85 | 86 | extern "C" 87 | { 88 | #else 89 | #include 90 | #endif // __cplusplus 91 | 92 | // Returns a new buffer that is at least the size "nbytes". 93 | // The buffer will be aligned to "align" bytes. 94 | // Returns NULL on error. On successful allocation, 95 | // the returned buffer must be freed using "avs_free". 96 | inline void* avs_malloc(size_t nbytes, size_t align) 97 | { 98 | if (!IS_POWER2(align)) 99 | return NULL; 100 | 101 | size_t offset = sizeof(void*) + align - 1; 102 | 103 | void *orig = malloc(nbytes + offset); 104 | if (orig == NULL) 105 | return NULL; 106 | 107 | void **aligned = (void**)(((uintptr_t)orig + (uintptr_t)offset) & (~(uintptr_t)(align-1))); 108 | aligned[-1] = orig; 109 | return aligned; 110 | } 111 | 112 | // Buffers allocated using "avs_malloc" must be freed 113 | // using "avs_free" instead of "free". 114 | inline void avs_free(void *ptr) 115 | { 116 | // Mirroring free()'s semantic requires us to accept NULLs 117 | if (ptr == NULL) 118 | return; 119 | 120 | free(((void**)ptr)[-1]); 121 | } 122 | 123 | #ifdef __cplusplus 124 | } // extern "C" 125 | 126 | // The point of these undef's is to force using the template functions 127 | // if we are in C++ mode. For C, the user can rely only on the macros. 128 | #undef IS_PTR_ALIGNED 129 | #undef ALIGN_NUMBER 130 | #undef ALIGN_POINTER 131 | 132 | #endif // __cplusplus 133 | 134 | #endif //AVS_ALIGNMENT_H 135 | -------------------------------------------------------------------------------- /src/pixel_proc_c_high_f_s_dithering.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "impl_dispatch.h" 7 | 8 | namespace pixel_proc_high_f_s_dithering { 9 | 10 | // #define DUMP_DATA 11 | 12 | typedef unsigned short ERROR_TYPE; 13 | 14 | typedef struct _context_t 15 | { 16 | int output_depth; 17 | ERROR_TYPE* error_buffer; 18 | bool buffer_needs_dealloc; 19 | ERROR_TYPE* current_px_error; 20 | int row_pitch; 21 | int frame_width; 22 | int processed_pixels_in_current_line; 23 | #ifdef DUMP_DATA 24 | FILE* debug_dump_fd[3]; 25 | #endif 26 | } context_t; 27 | 28 | static inline void init_context(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth) 29 | { 30 | context_t* ctx = (context_t*)context_buffer; 31 | int ctx_size = sizeof(context_t); 32 | memset(ctx, 0, ctx_size); 33 | 34 | // additional 2 bytes are placed at the beginning and the end 35 | int size_needed = (frame_width + 2) * 2 * sizeof(ERROR_TYPE); 36 | if (CONTEXT_BUFFER_SIZE - ctx_size < size_needed) 37 | { 38 | ctx->error_buffer = (ERROR_TYPE*)malloc(size_needed); 39 | ctx->buffer_needs_dealloc = true; 40 | } else { 41 | ctx->error_buffer = (ERROR_TYPE*)(context_buffer + ctx_size); 42 | } 43 | memset(ctx->error_buffer, 0, size_needed); 44 | ctx->current_px_error = ctx->error_buffer + 1; 45 | ctx->row_pitch = frame_width + 2; 46 | ctx->frame_width = frame_width; 47 | ctx->output_depth = output_depth; 48 | 49 | #ifdef DUMP_DATA 50 | char file_name[256]; 51 | sprintf(file_name, "fsdither_dump_stage0_%d", frame_width); 52 | ctx->debug_dump_fd[0] = fopen(file_name, "wb"); 53 | sprintf(file_name, "fsdither_dump_stage1_%d", frame_width); 54 | ctx->debug_dump_fd[1] = fopen(file_name, "wb"); 55 | sprintf(file_name, "fsdither_dump_stage2_%d", frame_width); 56 | ctx->debug_dump_fd[2] = fopen(file_name, "wb"); 57 | #endif 58 | } 59 | 60 | static inline void destroy_context(void* context) 61 | { 62 | context_t* ctx = (context_t*)context; 63 | if (ctx->buffer_needs_dealloc) 64 | { 65 | free(ctx->error_buffer); 66 | ctx->error_buffer = NULL; 67 | } 68 | #ifdef DUMP_DATA 69 | for (int i = 0; i < sizeof(ctx->debug_dump_fd) / sizeof(FILE*); i++) 70 | { 71 | if (ctx->debug_dump_fd[i]) 72 | { 73 | fclose(ctx->debug_dump_fd[i]); 74 | } 75 | } 76 | #endif 77 | } 78 | 79 | static __forceinline void next_pixel(void* context) 80 | { 81 | context_t* ctx = (context_t*)context; 82 | ctx->current_px_error++; 83 | ctx->processed_pixels_in_current_line++; 84 | } 85 | 86 | static __forceinline void next_row(void* context) 87 | { 88 | context_t* ctx = (context_t*)context; 89 | ctx->row_pitch = -ctx->row_pitch; 90 | ctx->current_px_error = ctx->error_buffer + (ctx->row_pitch >> 31) * ctx->row_pitch; 91 | memset(ctx->current_px_error + ctx->row_pitch, 0, abs(ctx->row_pitch) * sizeof(ERROR_TYPE)); 92 | ctx->current_px_error++; 93 | ctx->processed_pixels_in_current_line = 0; 94 | } 95 | 96 | static __forceinline int dither(void* context, int pixel, int row, int column); 97 | 98 | #include "pixel_proc_c_high_bit_depth_common.h" 99 | 100 | static const int PIXEL_MAX = ( ( 1 << (INTERNAL_BIT_DEPTH) ) - 1 ); 101 | static const int PIXEL_MIN = 0; 102 | 103 | static __forceinline int dither(void* context, int pixel, int row, int column) 104 | { 105 | context_t* ctx = (context_t*)context; 106 | if (ctx->processed_pixels_in_current_line >= ctx->frame_width) 107 | { 108 | // outside plane, can occur in SSE code 109 | return pixel; 110 | } 111 | #ifndef FS_DITHER_SKIP_PRE_CLAMP 112 | pixel = clamp_pixel(pixel, PIXEL_MIN, PIXEL_MAX); 113 | #endif 114 | #ifdef DUMP_DATA 115 | fwrite(&pixel, 4, 1, ctx->debug_dump_fd[0]); 116 | #endif 117 | pixel += *(ctx->current_px_error); 118 | #ifdef DUMP_DATA 119 | fwrite(&pixel, 4, 1, ctx->debug_dump_fd[1]); 120 | #endif 121 | pixel = clamp_pixel(pixel, PIXEL_MIN, PIXEL_MAX); 122 | #ifdef DUMP_DATA 123 | fwrite(&pixel, 4, 1, ctx->debug_dump_fd[2]); 124 | #endif 125 | int new_error = pixel & ( ( 1 << (INTERNAL_BIT_DEPTH - ctx->output_depth) ) - 1 ); 126 | *(ctx->current_px_error + 1) += (new_error * 7) >> 4; 127 | *(ctx->current_px_error + ctx->row_pitch - 1) += (new_error * 3) >> 4; 128 | *(ctx->current_px_error + ctx->row_pitch) += (new_error * 5) >> 4; 129 | *(ctx->current_px_error + ctx->row_pitch + 1) += (new_error * 1) >> 4; 130 | return pixel; 131 | } 132 | 133 | 134 | }; -------------------------------------------------------------------------------- /src/cpuid.cpp: -------------------------------------------------------------------------------- 1 | // Avisynth v1.0 beta. Copyright 2000 Ben Rudiak-Gould. 2 | // http://www.math.berkeley.edu/~benrg/avisynth.html 3 | 4 | // VirtualDub - Video processing and capture application 5 | // Copyright (C) 1998-2000 Avery Lee 6 | // 7 | // This program is free software; you can redistribute it and/or modify 8 | // it under the terms of the GNU General Public License as published by 9 | // the Free Software Foundation; either version 2 of the License, or 10 | // (at your option) any later version. 11 | // 12 | // This program is distributed in the hope that it will be useful, 13 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | // GNU General Public License for more details. 16 | // 17 | // You should have received a copy of the GNU General Public License 18 | // along with this program; if not, write to the Free Software 19 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #ifdef AVS_WINDOWS 26 | #include 27 | #elif defined(__x86_64__) || defined(__i386__) 28 | #include 29 | #include 30 | #undef __cpuid 31 | 32 | static inline void __cpuid(int cpuinfo[4], int leaf) { 33 | unsigned int eax, ebx, ecx, edx; 34 | __get_cpuid_count(leaf, 0, &eax, &ebx, &ecx, &edx); 35 | cpuinfo[0] = eax; 36 | cpuinfo[1] = ebx; 37 | cpuinfo[2] = ecx; 38 | cpuinfo[3] = edx; 39 | } 40 | #endif 41 | 42 | #define IS_BIT_SET(bitfield, bit) ((bitfield) & (1<<(bit)) ? true : false) 43 | 44 | #if defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86) 45 | static uint32_t get_xcr0() 46 | { 47 | uint32_t xcr0; 48 | #if defined(GCC) || defined(CLANG) 49 | __asm__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); 50 | #else 51 | xcr0 = (uint32_t)_xgetbv(0); 52 | #endif 53 | return xcr0; 54 | } 55 | #endif 56 | 57 | static int CPUCheckForExtensions() 58 | { 59 | int result = 0; 60 | 61 | #if defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86) 62 | int cpuinfo[4]; 63 | __cpuid(cpuinfo, 1); 64 | 65 | if (IS_BIT_SET(cpuinfo[3], 0)) 66 | result |= CPUF_FPU; 67 | if (IS_BIT_SET(cpuinfo[3], 23)) 68 | result |= CPUF_MMX; 69 | if (IS_BIT_SET(cpuinfo[3], 25)) 70 | result |= CPUF_SSE | CPUF_INTEGER_SSE; 71 | if (IS_BIT_SET(cpuinfo[3], 26)) 72 | result |= CPUF_SSE2; 73 | if (IS_BIT_SET(cpuinfo[2], 0)) 74 | result |= CPUF_SSE3; 75 | if (IS_BIT_SET(cpuinfo[2], 9)) 76 | result |= CPUF_SSSE3; 77 | if (IS_BIT_SET(cpuinfo[2], 19)) 78 | result |= CPUF_SSE4_1; 79 | if (IS_BIT_SET(cpuinfo[2], 20)) 80 | result |= CPUF_SSE4_2; 81 | if (IS_BIT_SET(cpuinfo[2], 22)) 82 | result |= CPUF_MOVBE; 83 | if (IS_BIT_SET(cpuinfo[2], 23)) 84 | result |= CPUF_POPCNT; 85 | if (IS_BIT_SET(cpuinfo[2], 25)) 86 | result |= CPUF_AES; 87 | if (IS_BIT_SET(cpuinfo[2], 29)) 88 | result |= CPUF_F16C; 89 | 90 | bool xgetbv_supported = IS_BIT_SET(cpuinfo[2], 27); 91 | bool avx_supported = IS_BIT_SET(cpuinfo[2], 28); 92 | if (xgetbv_supported && avx_supported) 93 | { 94 | uint32_t xgetbv0_32 = get_xcr0(); 95 | if ((xgetbv0_32 & 0x6u) == 0x6u) { 96 | result |= CPUF_AVX; 97 | if (IS_BIT_SET(cpuinfo[2], 12)) 98 | result |= CPUF_FMA3; 99 | __cpuid(cpuinfo, 7); 100 | if (IS_BIT_SET(cpuinfo[1], 5)) 101 | result |= CPUF_AVX2; 102 | } 103 | if((xgetbv0_32 & (0x7u << 5)) && (xgetbv0_32 & (0x3u << 1))) { 104 | __cpuid(cpuinfo, 7); 105 | if (IS_BIT_SET(cpuinfo[1], 16)) 106 | result |= CPUF_AVX512F; 107 | if (IS_BIT_SET(cpuinfo[1], 17)) 108 | result |= CPUF_AVX512DQ; 109 | if (IS_BIT_SET(cpuinfo[1], 21)) 110 | result |= CPUF_AVX512IFMA; 111 | if (IS_BIT_SET(cpuinfo[1], 26)) 112 | result |= CPUF_AVX512PF; 113 | if (IS_BIT_SET(cpuinfo[1], 27)) 114 | result |= CPUF_AVX512ER; 115 | if (IS_BIT_SET(cpuinfo[1], 28)) 116 | result |= CPUF_AVX512CD; 117 | if (IS_BIT_SET(cpuinfo[1], 30)) 118 | result |= CPUF_AVX512BW; 119 | if (IS_BIT_SET(cpuinfo[1], 31)) 120 | result |= CPUF_AVX512VL; 121 | if (IS_BIT_SET(cpuinfo[2], 1)) 122 | result |= CPUF_AVX512VBMI; 123 | } 124 | } 125 | 126 | __cpuid(cpuinfo, 0x80000000); 127 | if (cpuinfo[0] >= 0x80000001) 128 | { 129 | __cpuid(cpuinfo, 0x80000001); 130 | 131 | if (IS_BIT_SET(cpuinfo[3], 31)) 132 | result |= CPUF_3DNOW; 133 | 134 | if (IS_BIT_SET(cpuinfo[3], 30)) 135 | result |= CPUF_3DNOW_EXT; 136 | 137 | if (IS_BIT_SET(cpuinfo[3], 22)) 138 | result |= CPUF_INTEGER_SSE; 139 | 140 | if (result & CPUF_AVX) { 141 | if (IS_BIT_SET(cpuinfo[2], 16)) 142 | result |= CPUF_FMA4; 143 | } 144 | } 145 | #elif defined(__aarch64__) || defined(__arm__) 146 | result |= CPUF_SSE | CPUF_SSE2 | CPUF_SSE3 | CPUF_SSSE3; 147 | result |= CPUF_SSE4_1 | CPUF_SSE4_2 | CPUF_AES; 148 | #endif 149 | 150 | return result; 151 | } 152 | 153 | int GetCPUFlags() { 154 | static int lCPUExtensionsAvailable = CPUCheckForExtensions(); 155 | return lCPUExtensionsAvailable; 156 | } -------------------------------------------------------------------------------- /include/avisynth/avs/posix.h: -------------------------------------------------------------------------------- 1 | // This program is free software; you can redistribute it and/or modify 2 | // it under the terms of the GNU General Public License as published by 3 | // the Free Software Foundation; either version 2 of the License, or 4 | // (at your option) any later version. 5 | // 6 | // This program is distributed in the hope that it will be useful, 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | // GNU General Public License for more details. 10 | // 11 | // You should have received a copy of the GNU General Public License 12 | // along with this program; if not, write to the Free Software 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 14 | // http://www.gnu.org/copyleft/gpl.html . 15 | // 16 | // Linking Avisynth statically or dynamically with other modules is making a 17 | // combined work based on Avisynth. Thus, the terms and conditions of the GNU 18 | // General Public License cover the whole combination. 19 | // 20 | // As a special exception, the copyright holders of Avisynth give you 21 | // permission to link Avisynth with independent modules that communicate with 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license 23 | // terms of these independent modules, and to copy and distribute the 24 | // resulting combined work under terms of your choice, provided that 25 | // every copy of the combined work is accompanied by a complete copy of 26 | // the source code of Avisynth (the version of Avisynth used to produce the 27 | // combined work), being distributed under the terms of the GNU General 28 | // Public License plus this exception. An independent module is a module 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters, 30 | // import and export plugins, or graphical user interfaces. 31 | 32 | #ifdef AVS_POSIX 33 | #ifndef AVSCORE_POSIX_H 34 | #define AVSCORE_POSIX_H 35 | 36 | #ifdef __cplusplus 37 | #include 38 | #endif 39 | #include 40 | #include 41 | 42 | // Define these MSVC-extension used in Avisynth 43 | #define __single_inheritance 44 | 45 | // These things don't exist in Linux 46 | #if defined(AVS_HAIKU) 47 | #undef __declspec 48 | #endif 49 | #define __declspec(x) 50 | #define lstrlen strlen 51 | #define lstrcmp strcmp 52 | #define lstrcmpi strcasecmp 53 | #define _stricmp strcasecmp 54 | #define _strnicmp strncasecmp 55 | #define _strdup strdup 56 | #define SetCurrentDirectory(x) chdir(x) 57 | #define SetCurrentDirectoryW(x) chdir(x) 58 | #define GetCurrentDirectoryW(x) getcwd(x) 59 | #define _putenv putenv 60 | #define _alloca alloca 61 | 62 | // Borrowing some compatibility macros from AvxSynth, slightly modified 63 | #define UInt32x32To64(a, b) ((uint64_t)(((uint64_t)((uint32_t)(a))) * ((uint32_t)(b)))) 64 | #define Int64ShrlMod32(a, b) ((uint64_t)((uint64_t)(a) >> (b))) 65 | #define Int32x32To64(a, b) ((int64_t)(((int64_t)((long)(a))) * ((long)(b)))) 66 | 67 | #define InterlockedIncrement(x) __sync_add_and_fetch((x), 1) 68 | #define InterlockedDecrement(x) __sync_sub_and_fetch((x), 1) 69 | #define MulDiv(nNumber, nNumerator, nDenominator) (int32_t) (((int64_t) (nNumber) * (int64_t) (nNumerator) + (int64_t) ((nDenominator)/2)) / (int64_t) (nDenominator)) 70 | 71 | #ifndef TRUE 72 | #define TRUE true 73 | #endif 74 | 75 | #ifndef FALSE 76 | #define FALSE false 77 | #endif 78 | 79 | #define S_FALSE (0x00000001) 80 | #define E_FAIL (0x80004005) 81 | #define FAILED(hr) ((hr) & 0x80000000) 82 | #define SUCCEEDED(hr) (!FAILED(hr)) 83 | 84 | // Statuses copied from comments in exception.cpp 85 | #define STATUS_GUARD_PAGE_VIOLATION 0x80000001 86 | #define STATUS_DATATYPE_MISALIGNMENT 0x80000002 87 | #define STATUS_BREAKPOINT 0x80000003 88 | #define STATUS_SINGLE_STEP 0x80000004 89 | #define STATUS_ACCESS_VIOLATION 0xc0000005 90 | #define STATUS_IN_PAGE_ERROR 0xc0000006 91 | #define STATUS_INVALID_HANDLE 0xc0000008 92 | #define STATUS_NO_MEMORY 0xc0000017 93 | #define STATUS_ILLEGAL_INSTRUCTION 0xc000001d 94 | #define STATUS_NONCONTINUABLE_EXCEPTION 0xc0000025 95 | #define STATUS_INVALID_DISPOSITION 0xc0000026 96 | #define STATUS_ARRAY_BOUNDS_EXCEEDED 0xc000008c 97 | #define STATUS_FLOAT_DENORMAL_OPERAND 0xc000008d 98 | #define STATUS_FLOAT_DIVIDE_BY_ZERO 0xc000008e 99 | #define STATUS_FLOAT_INEXACT_RESULT 0xc000008f 100 | #define STATUS_FLOAT_INVALID_OPERATION 0xc0000090 101 | #define STATUS_FLOAT_OVERFLOW 0xc0000091 102 | #define STATUS_FLOAT_STACK_CHECK 0xc0000092 103 | #define STATUS_FLOAT_UNDERFLOW 0xc0000093 104 | #define STATUS_INTEGER_DIVIDE_BY_ZERO 0xc0000094 105 | #define STATUS_INTEGER_OVERFLOW 0xc0000095 106 | #define STATUS_PRIVILEGED_INSTRUCTION 0xc0000096 107 | #define STATUS_STACK_OVERFLOW 0xc00000fd 108 | 109 | // Calling convension 110 | #ifndef AVS_HAIKU 111 | #define __stdcall 112 | #define __cdecl 113 | #endif 114 | 115 | // PowerPC OS X is really niche these days, but this painless equivocation 116 | // of the function/macro names used in posix_get_available_memory() 117 | // is all it takes to let it work. The G5 was 64-bit, and if 10.5 Leopard 118 | // can run in native 64-bit, it probably uses the names in that block as-is. 119 | #ifdef AVS_MACOS 120 | #ifdef PPC32 121 | #define vm_statistics64_data_t vm_statistics_data_t 122 | #define HOST_VM_INFO64_COUNT HOST_VM_INFO_COUNT 123 | #define HOST_VM_INFO64 HOST_VM_INFO 124 | #define host_statistics64 host_statistics 125 | #endif // PPC32 126 | #endif // AVS_MACOS 127 | 128 | #endif // AVSCORE_POSIX_H 129 | #endif // AVS_POSIX 130 | -------------------------------------------------------------------------------- /include/avisynth/avs/config.h: -------------------------------------------------------------------------------- 1 | // Avisynth C Interface Version 0.20 2 | // Copyright 2003 Kevin Atkinson 3 | 4 | // This program is free software; you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation; either version 2 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program; if not, write to the Free Software 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit 17 | // http://www.gnu.org/copyleft/gpl.html . 18 | // 19 | // As a special exception, I give you permission to link to the 20 | // Avisynth C interface with independent modules that communicate with 21 | // the Avisynth C interface solely through the interfaces defined in 22 | // avisynth_c.h, regardless of the license terms of these independent 23 | // modules, and to copy and distribute the resulting combined work 24 | // under terms of your choice, provided that every copy of the 25 | // combined work is accompanied by a complete copy of the source code 26 | // of the Avisynth C interface and Avisynth itself (with the version 27 | // used to produce the combined work), being distributed under the 28 | // terms of the GNU General Public License plus this exception. An 29 | // independent module is a module which is not derived from or based 30 | // on Avisynth C Interface, such as 3rd-party filters, import and 31 | // export plugins, or graphical user interfaces. 32 | 33 | #ifndef AVS_CONFIG_H 34 | #define AVS_CONFIG_H 35 | 36 | // Undefine this to get cdecl calling convention 37 | #define AVSC_USE_STDCALL 1 38 | 39 | // NOTE TO PLUGIN AUTHORS: 40 | // Because FRAME_ALIGN can be substantially higher than the alignment 41 | // a plugin actually needs, plugins should not use FRAME_ALIGN to check for 42 | // alignment. They should always request the exact alignment value they need. 43 | // This is to make sure that plugins work over the widest range of AviSynth 44 | // builds possible. 45 | #define FRAME_ALIGN 64 46 | 47 | #if defined(_M_AMD64) || defined(__x86_64) 48 | # define X86_64 49 | #elif defined(_M_IX86) || defined(__i386__) 50 | # define X86_32 51 | // VS2017 introduced _M_ARM64 52 | #elif defined(_M_ARM64) || defined(__aarch64__) 53 | # define ARM64 54 | #elif defined(_M_ARM) || defined(__arm__) 55 | # define ARM32 56 | #elif defined(__PPC64__) 57 | # define PPC64 58 | #elif defined(_M_PPC) || defined(__PPC__) || defined(__POWERPC__) 59 | # define PPC32 60 | #else 61 | # error Unsupported CPU architecture. 62 | #endif 63 | 64 | // VC++ LLVM-Clang-cl MinGW-Gnu 65 | // MSVC x x 66 | // MSVC_PURE x 67 | // CLANG x 68 | // GCC x 69 | 70 | #if defined(__clang__) 71 | // Check clang first. clang-cl also defines __MSC_VER 72 | // We set MSVC because they are mostly compatible 73 | # define CLANG 74 | #if defined(_MSC_VER) 75 | # define MSVC 76 | # define AVS_FORCEINLINE __attribute__((always_inline)) 77 | #else 78 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline 79 | #endif 80 | #elif defined(_MSC_VER) 81 | # define MSVC 82 | # define MSVC_PURE 83 | # define AVS_FORCEINLINE __forceinline 84 | #elif defined(__GNUC__) 85 | # define GCC 86 | # define AVS_FORCEINLINE __attribute__((always_inline)) inline 87 | #else 88 | # error Unsupported compiler. 89 | # define AVS_FORCEINLINE inline 90 | # undef __forceinline 91 | # define __forceinline inline 92 | #endif 93 | 94 | #if defined(_WIN32) 95 | # define AVS_WINDOWS 96 | #elif defined(__linux__) 97 | # define AVS_LINUX 98 | # define AVS_POSIX 99 | #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) 100 | # define AVS_BSD 101 | # define AVS_POSIX 102 | #elif defined(__APPLE__) 103 | # define AVS_MACOS 104 | # define AVS_POSIX 105 | #elif defined(__HAIKU__) 106 | # define AVS_HAIKU 107 | # define AVS_POSIX 108 | #else 109 | # error Operating system unsupported. 110 | #endif 111 | 112 | // useful warnings disabler macros for supported compilers 113 | 114 | #if defined(_MSC_VER) 115 | #define DISABLE_WARNING_PUSH __pragma(warning( push )) 116 | #define DISABLE_WARNING_POP __pragma(warning( pop )) 117 | #define DISABLE_WARNING(warningNumber) __pragma(warning( disable : warningNumber )) 118 | 119 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(4101) 120 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(4505) 121 | // other warnings you want to deactivate... 122 | 123 | #elif defined(__GNUC__) || defined(__clang__) 124 | #define DO_PRAGMA(X) _Pragma(#X) 125 | #define DISABLE_WARNING_PUSH DO_PRAGMA(GCC diagnostic push) 126 | #define DISABLE_WARNING_POP DO_PRAGMA(GCC diagnostic pop) 127 | #define DISABLE_WARNING(warningName) DO_PRAGMA(GCC diagnostic ignored #warningName) 128 | 129 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE DISABLE_WARNING(-Wunused-variable) 130 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION DISABLE_WARNING(-Wunused-function) 131 | // other warnings you want to deactivate... 132 | 133 | #else 134 | #define DISABLE_WARNING_PUSH 135 | #define DISABLE_WARNING_POP 136 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE 137 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION 138 | // other warnings you want to deactivate... 139 | 140 | #endif 141 | 142 | #if defined(AVS_POSIX) 143 | #define NEW_AVSVALUE 144 | #else 145 | #define NEW_AVSVALUE 146 | #endif 147 | 148 | #if defined(AVS_WINDOWS) 149 | // Windows XP does not have proper initialization for 150 | // thread local variables. 151 | // Use workaround instead __declspec(thread) 152 | #define XP_TLS 153 | #endif 154 | 155 | #endif //AVS_CONFIG_H 156 | -------------------------------------------------------------------------------- /include/vapoursynth/VSHelper.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * Copyright (c) 2012-2015 Fredrik Mellbin 3 | * --- Legal stuff --- 4 | * This program is free software. It comes without any warranty, to 5 | * the extent permitted by applicable law. You can redistribute it 6 | * and/or modify it under the terms of the Do What The Fuck You Want 7 | * To Public License, Version 2, as published by Sam Hocevar. See 8 | * http://sam.zoy.org/wtfpl/COPYING for more details. 9 | *****************************************************************************/ 10 | 11 | #ifndef VSHELPER_H 12 | #define VSHELPER_H 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #ifdef _WIN32 21 | #include 22 | #endif 23 | #include "VapourSynth.h" 24 | 25 | /* Visual Studio doesn't recognize inline in c mode */ 26 | #if defined(_MSC_VER) && !defined(__cplusplus) 27 | #define inline _inline 28 | #endif 29 | 30 | /* A kinda portable definition of the C99 restrict keyword (or its inofficial C++ equivalent) */ 31 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* Available in C99 */ 32 | #define VS_RESTRICT restrict 33 | #elif defined(__cplusplus) || defined(_MSC_VER) /* Almost all relevant C++ compilers support it so just assume it works */ 34 | #define VS_RESTRICT __restrict 35 | #else /* Not supported */ 36 | #define VS_RESTRICT 37 | #endif 38 | 39 | #ifdef _WIN32 40 | #define VS_ALIGNED_MALLOC(pptr, size, alignment) do { *(pptr) = _aligned_malloc((size), (alignment)); } while (0) 41 | #define VS_ALIGNED_FREE(ptr) do { _aligned_free((ptr)); } while (0) 42 | #else 43 | #define VS_ALIGNED_MALLOC(pptr, size, alignment) do { if(posix_memalign((void**)(pptr), (alignment), (size))) *((void**)pptr) = NULL; } while (0) 44 | #define VS_ALIGNED_FREE(ptr) do { free((ptr)); } while (0) 45 | #endif 46 | 47 | #define VSMAX(a,b) ((a) > (b) ? (a) : (b)) 48 | #define VSMIN(a,b) ((a) > (b) ? (b) : (a)) 49 | 50 | #ifdef __cplusplus 51 | /* A nicer templated malloc for all the C++ users out there */ 52 | #if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900) 53 | template 54 | #else 55 | template 56 | #endif 57 | static inline T* vs_aligned_malloc(size_t size, size_t alignment) { 58 | #ifdef _WIN32 59 | return (T*)_aligned_malloc(size, alignment); 60 | #else 61 | void *tmp = NULL; 62 | if (posix_memalign(&tmp, alignment, size)) 63 | tmp = 0; 64 | return (T*)tmp; 65 | #endif 66 | } 67 | 68 | static inline void vs_aligned_free(void *ptr) { 69 | VS_ALIGNED_FREE(ptr); 70 | } 71 | #endif /* __cplusplus */ 72 | 73 | /* convenience function for checking if the format never changes between frames */ 74 | static inline int isConstantFormat(const VSVideoInfo *vi) { 75 | return vi->height > 0 && vi->width > 0 && vi->format; 76 | } 77 | 78 | /* convenience function to check for if two clips have the same format (unknown/changeable will be considered the same too) */ 79 | static inline int isSameFormat(const VSVideoInfo *v1, const VSVideoInfo *v2) { 80 | return v1->height == v2->height && v1->width == v2->width && v1->format == v2->format; 81 | } 82 | 83 | /* multiplies and divides a rational number, such as a frame duration, in place and reduces the result */ 84 | static inline void muldivRational(int64_t *num, int64_t *den, int64_t mul, int64_t div) { 85 | /* do nothing if the rational number is invalid */ 86 | if (!*den) 87 | return; 88 | 89 | /* nobody wants to accidentally divide by zero */ 90 | assert(div); 91 | 92 | int64_t a, b; 93 | *num *= mul; 94 | *den *= div; 95 | a = *num; 96 | b = *den; 97 | while (b != 0) { 98 | int64_t t = a; 99 | a = b; 100 | b = t % b; 101 | } 102 | if (a < 0) 103 | a = -a; 104 | *num /= a; 105 | *den /= a; 106 | } 107 | 108 | /* reduces a rational number */ 109 | static inline void vs_normalizeRational(int64_t *num, int64_t *den) { 110 | muldivRational(num, den, 1, 1); 111 | } 112 | 113 | /* add two rational numbers and reduces the result */ 114 | static inline void vs_addRational(int64_t *num, int64_t *den, int64_t addnum, int64_t addden) { 115 | /* do nothing if the rational number is invalid */ 116 | if (!*den) 117 | return; 118 | 119 | /* nobody wants to accidentally add an invalid rational number */ 120 | assert(addden); 121 | 122 | if (*den == addden) { 123 | *num += addnum; 124 | } else { 125 | int64_t temp = addden; 126 | addnum *= *den; 127 | addden *= *den; 128 | *num *= temp; 129 | *den *= temp; 130 | 131 | *num += addnum; 132 | 133 | vs_normalizeRational(num, den); 134 | } 135 | } 136 | 137 | /* converts an int64 to int with saturation, useful to silence warnings when reading int properties among other things */ 138 | static inline int int64ToIntS(int64_t i) { 139 | if (i > INT_MAX) 140 | return INT_MAX; 141 | else if (i < INT_MIN) 142 | return INT_MIN; 143 | else return (int)i; 144 | } 145 | 146 | static inline void vs_bitblt(void *dstp, int dst_stride, const void *srcp, int src_stride, size_t row_size, size_t height) { 147 | if (height) { 148 | if (src_stride == dst_stride && src_stride == (int)row_size) { 149 | memcpy(dstp, srcp, row_size * height); 150 | } else { 151 | const uint8_t *srcp8 = (const uint8_t *)srcp; 152 | uint8_t *dstp8 = (uint8_t *)dstp; 153 | size_t i; 154 | for (i = 0; i < height; i++) { 155 | memcpy(dstp8, srcp8, row_size); 156 | srcp8 += src_stride; 157 | dstp8 += dst_stride; 158 | } 159 | } 160 | } 161 | } 162 | 163 | /* check if the frame dimensions are valid for a given format */ 164 | /* returns non-zero for valid width and height */ 165 | static inline int areValidDimensions(const VSFormat *fi, int width, int height) { 166 | return !(width % (1 << fi->subSamplingW) || height % (1 << fi->subSamplingH)); 167 | } 168 | 169 | /* Visual Studio doesn't recognize inline in c mode */ 170 | #if defined(_MSC_VER) && !defined(__cplusplus) 171 | #undef inline 172 | #endif 173 | 174 | #endif 175 | -------------------------------------------------------------------------------- /include/dualsynth/avs_wrapper.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Xinyue Lu 3 | * 4 | * DualSynth wrapper - AviSynth+. 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | namespace Plugin { 11 | extern const char* Description; 12 | } 13 | 14 | namespace AVSInterface 15 | { 16 | struct AVSInDelegator final : InDelegator { 17 | const AVSValue _args; 18 | std::unordered_map _params_index_map; 19 | int NameToIndex(const char* name) { 20 | std::string name_string(name); 21 | if (_params_index_map.find(name_string) == _params_index_map.end()) 22 | throw "Unknown parameter during NameToIndex"; 23 | return _params_index_map[name_string]; 24 | } 25 | void Read(const char* name, int& output) override { 26 | output = _args[NameToIndex(name)].AsInt(output); 27 | } 28 | void Read(const char* name, int64_t& output) override { 29 | output = _args[NameToIndex(name)].AsInt(static_cast(output)); 30 | } 31 | void Read(const char* name, float& output) override { 32 | output = static_cast(_args[NameToIndex(name)].AsFloat(output)); 33 | } 34 | void Read(const char* name, double& output) override { 35 | auto _default = output; 36 | output = _args[NameToIndex(name)].AsFloat(NAN); 37 | if (std::isnan(output)) 38 | output = _default; 39 | } 40 | void Read(const char* name, bool& output) override { 41 | output = _args[NameToIndex(name)].AsBool(output); 42 | } 43 | void Read(const char* name, std::string& output) override { 44 | const char * result = _args[NameToIndex(name)].AsString(output.c_str()); 45 | if (result) 46 | output = result; 47 | } 48 | void Read(const char* name, void*& output) override { 49 | PClip* clip = new PClip(_args[NameToIndex(name)].AsClip()); 50 | output = (void *)(clip); 51 | } 52 | void Read(const char* name, std::vector& output) override { 53 | auto arg = _args[NameToIndex(name)]; 54 | if (!arg.IsArray()) 55 | throw "Argument is not array"; 56 | auto size = arg.ArraySize(); 57 | output.clear(); 58 | for (int i = 0; i < size; i++) 59 | output.push_back(arg[i].AsInt()); 60 | } 61 | void Read(const char* name, std::vector& output) override { 62 | auto arg = _args[NameToIndex(name)]; 63 | if (!arg.IsArray()) 64 | throw "Argument is not array"; 65 | auto size = arg.ArraySize(); 66 | output.clear(); 67 | for (int i = 0; i < size; i++) 68 | output.push_back(arg[i].AsInt()); 69 | } 70 | void Read(const char* name, std::vector& output) override { 71 | auto arg = _args[NameToIndex(name)]; 72 | if (!arg.IsArray()) 73 | throw "Argument is not array"; 74 | auto size = arg.ArraySize(); 75 | output.clear(); 76 | for (int i = 0; i < size; i++) 77 | output.push_back(static_cast(arg[i].AsFloat())); 78 | } 79 | void Read(const char* name, std::vector& output) override { 80 | auto arg = _args[NameToIndex(name)]; 81 | if (!arg.IsArray()) 82 | throw "Argument is not array"; 83 | auto size = arg.ArraySize(); 84 | output.clear(); 85 | for (int i = 0; i < size; i++) 86 | output.push_back(arg[i].AsFloat()); 87 | } 88 | void Read(const char* name, std::vector& output) override { 89 | auto arg = _args[NameToIndex(name)]; 90 | if (!arg.IsArray()) 91 | throw "Argument is not array"; 92 | auto size = arg.ArraySize(); 93 | output.clear(); 94 | for (int i = 0; i < size; i++) 95 | output.push_back(arg[i].AsBool()); 96 | } 97 | void Free(void*& clip) override { 98 | PClip* c = (PClip *)(clip); 99 | delete c; 100 | clip = nullptr; 101 | } 102 | 103 | AVSInDelegator(const AVSValue args, std::vector params) : _args(args) 104 | { 105 | int idx = 0; 106 | for (auto &¶m : params) 107 | { 108 | if (!param.AVSEnabled) continue; 109 | _params_index_map[param.Name] = idx++; 110 | } 111 | } 112 | }; 113 | 114 | struct AVSFetchFrameFunctor final : FetchFrameFunctor { 115 | PClip _clip; 116 | VideoInfo _vi; 117 | IScriptEnvironment* _env; 118 | std::mutex fetch_frame_mutex; 119 | AVSFetchFrameFunctor(PClip clip, VideoInfo vi, IScriptEnvironment * env) 120 | : _clip(clip), _vi(vi), _env(env) {} 121 | DSFrame operator()(int n) override { 122 | std::lock_guard guard(fetch_frame_mutex); 123 | auto frame = _clip->GetFrame(n, _env); 124 | return DSFrame(frame, _vi, _env); 125 | } 126 | ~AVSFetchFrameFunctor() override {} 127 | }; 128 | 129 | template 130 | struct AVSWrapper : IClip 131 | { 132 | AVSValue _args; 133 | IScriptEnvironment* _env; 134 | FilterType data; 135 | PClip clip; 136 | VideoInfo vi; 137 | AVSFetchFrameFunctor* functor {nullptr}; 138 | 139 | AVSWrapper(AVSValue args, IScriptEnvironment* env) 140 | : _args(args), _env(env) {} 141 | 142 | void Initialize() 143 | { 144 | auto input_vi = DSVideoInfo(); 145 | if (_args[0].IsClip()) { 146 | clip = _args[0].AsClip(); 147 | input_vi = DSVideoInfo(clip->GetVideoInfo()); 148 | functor = new AVSFetchFrameFunctor(clip, clip->GetVideoInfo(), _env); 149 | } 150 | auto argument = AVSInDelegator(_args, data.Params()); 151 | data.Initialize(&argument, input_vi, functor); 152 | } 153 | 154 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment * env) override { 155 | std::unordered_map in_frames; 156 | if (functor) { 157 | std::vector requests = data.RequestReferenceFrames(n); 158 | for (auto &&i : requests) { 159 | auto frame = clip->GetFrame(i, env); 160 | in_frames[i] = DSFrame(frame, vi, env); 161 | } 162 | } 163 | else 164 | in_frames[n] = DSFrame(env); 165 | 166 | return data.GetFrame(n, in_frames).ToAVSFrame(); 167 | } 168 | 169 | const VideoInfo& __stdcall GetVideoInfo() override { 170 | auto output_vi = data.GetOutputVI(); 171 | vi = output_vi.ToAVSVI(); 172 | return vi; 173 | } 174 | 175 | void __stdcall GetAudio(void* buf, int64_t start, int64_t count, IScriptEnvironment* env) override { if (clip) clip->GetAudio(buf, start, count, env); } 176 | bool __stdcall GetParity(int n) override { return clip ? clip->GetParity(n) : false; } 177 | int __stdcall SetCacheHints(int cachehints, int frame_range) override { return data.SetCacheHints(cachehints, frame_range); } 178 | ~AVSWrapper() { 179 | delete functor; 180 | } 181 | }; 182 | 183 | template 184 | AVSValue __cdecl Create(AVSValue args, void* user_data, IScriptEnvironment* env) 185 | { 186 | auto filter = new AVSWrapper(args, env); 187 | try { 188 | filter->Initialize(); 189 | } 190 | catch (const char *err) { 191 | env->ThrowError("%s: %s", filter->data.AVSName(), err); 192 | } 193 | return filter; 194 | } 195 | 196 | template 197 | void RegisterFilter(IScriptEnvironment* env) { 198 | FilterType filter; 199 | env->AddFunction(filter.AVSName(), filter.AVSParams().c_str(), Create, nullptr); 200 | } 201 | } 202 | 203 | const AVS_Linkage *AVS_linkage = NULL; 204 | 205 | extern "C" __declspec(dllexport) const char* __stdcall AvisynthPluginInit3(IScriptEnvironment* env, AVS_Linkage* linkage) 206 | { 207 | AVS_linkage = linkage; 208 | auto filters = RegisterAVSFilters(); 209 | for (auto &&RegisterFilter : filters) { 210 | RegisterFilter(env); 211 | } 212 | return Plugin::Description; 213 | } 214 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neo f3kdb (forked from flash3kyuu_deband) 2 | 3 | Neo f3kdb Copyright(C) 2019-2020 Xinyue Lu, and previous developers 4 | 5 | F3kdb is a deband filter. It was originally written for AviUtl by [bunyuchan](https://twitter.com/bunyuchan) and later ported to AviSynth by [SAPikachu](https://github.com/SAPikachu) many years ago. 6 | 7 | Legacy format support was removed and a few options that are no longer useful were also removed. Due to API change, the project has been renamed from f3kdb to Neo_f3kdb to avoid confusion. SSE4.1 is now required to run optimized routine. SSE4.1 is supported since Intel Penryn (2007) and AMD bulldozer (2011). AVX routine didn't show much performance benefit and is not included. 8 | 9 | ## Usage 10 | 11 | ```python 12 | # AviSynth+ 13 | LoadPlugin("neo-f3kdb.dll") 14 | neo_f3kdb(clip, y=64, cb=64, cr=64, grainy=0, grainc=0, ...) 15 | # VapourSynth 16 | core.neo_f3kdb.Deband(clip, y=64, cb=64, cr=64, grainy=0, grainc=0, ...) 17 | ``` 18 | 19 | [Check original usage documents.](https://f3kdb.readthedocs.io/en/stable/usage.html) 20 | 21 | Below are the additional parameters or the these that differ from the original ones. 22 | 23 | - *range* (>r9) 24 | 25 | Banding detection range. 26 | 27 | Must be between `0` to `255`. 28 | 29 | Default value - `15`. 30 | 31 | - *sample_mode* 32 | 33 | * 1: Column references. 34 | 35 | + 36 | o 37 | + 38 | 39 | * 2: Square references. 40 | 41 | + + 42 | o 43 | + + 44 | 45 | * 3: Row references. (> r2) 46 | 47 | + o + 48 | 49 | * 4: Average of sample mode 1 and 3. (> r2) 50 | 51 | + 52 | (o) => A 53 | + 54 | 55 | + (o) + => B 56 | 57 | (A + B) / 2 58 | 59 | * 5: (Integer-based) Similar to sample mode 4 but uses multiple thresholds for detail preservation. (>r8)
60 | Optimized for speed version of https://forum.doom9.org/showthread.php?p=1652256#post1652256.
61 | `blur_first` doesn't have effect for this sample mode.
62 | `Y`/`Cb`/`Cr` - for this mode they are used for the `avgDif` check – the difference between the current pixel and the average of all four cross-shaped reference pixels. 63 | 64 | * 6: (Floating-point) Similar to sample mode 4 but uses multiple thresholds for detail preservation. (>r9)
65 | Direct implementation of https://forum.doom9.org/showthread.php?p=1652256#post1652256.
66 | `blur_first` doesn't have effect for this sample mode.
67 | `Y`/`Cb`/`Cr` - for this mode they are used for the `avgDif` check – the difference between the current pixel and the average of all four cross-shaped reference pixels. 68 | 69 | * 7: (Floating-point) An extension of sample_mode=6 that adds a gradient angle check for more intelligent detail preservation. (>r9)
70 | Direct implementation of https://forum.doom9.org/showthread.php?p=1652256#post1652256.
71 | `blur_first` doesn't have effect for this sample mode.
72 | `Y`/`Cb`/`Cr` - for this mode they are used for the `avgDif` check – the difference between the current pixel and the average of all four cross-shaped reference pixels. 73 | 74 | Reference points are randomly picked within the `range`. 75 | 76 | - *input_depth* (removed) 77 | 78 | - *input_mode* (removed) 79 | 80 | - *output_mode* (removed) 81 | 82 | - *opt* 83 | 84 | Sets which cpu optimizations to use. 85 | 86 | `sample_mode=1`, `sample_mode=2`, `sample_mode=3`, and `sample_mode=4` have `C++` and `SSE4.1` code. 87 | 88 | `sample_mode=5`, `sample_mode=6` and `sample_mode=7` have `C++`, `SSE4.1`, `AVX2` and `AVX-512` code. 89 | 90 | - `-1`: Auto-detect. 91 | - `0`: Use C++ code. 92 | - `1`: Use SSE4.1 code. 93 | - `2`: Use AVX2 code. 94 | - `3`: Use AVX-512 code. 95 | 96 | Default: `-1`. 97 | 98 | - *mt* 99 | 100 | Process planes in parallel. Default: true. 101 | 102 | If you notice a dead lock under extreme condition, try disabling it. 103 | 104 | - *scale* (> r8) 105 | 106 | Whether to use threshold parameters (Y, Cb, Cr...) within the internal bit depth range (0..65535). 107 | 108 | Default: `false`. 109 | 110 | - *Y_1 / Cb_1 / Cr_1 (maxDif)* (> r8) 111 | 112 | Detail protection threshold (max difference) for `sample_mode=5`, `sample_mode=6` and `sample_mode=7`. 113 | 114 | This threshold applies to the `maxDif` check. `maxDif` is the largest absolute difference found between the current pixel and any of its four individual cross-shaped reference pixels. If this `maxDif` is greater than or equal to `Y_1`/`Cb_1`/`Cr_1`, the pixel is considered detail. 115 | 116 | Helps protect sharp edges and fine details from being blurred by the debanding process. 117 | 118 | The valid range is same as `Y`/`Cb`/`Cr`. 119 | 120 | Default value - they are equal to `Y`/`Cb`/`Cr`. 121 | 122 | - *Y_2 / Cb_2 / Cr_2 (midDifs)* (> r8) 123 | 124 | Gradient/Texture protection threshold (mid-pair difference) for `sample_mode=5`, `sample_mode=6` and `sample_mode=7`. 125 | 126 | This threshold applies to the `midDif` checks. `midDif` measures how much the current pixel deviates from the midpoint of a pair of opposing reference pixels (one check for the vertical pair, one for the horizontal pair). If the current pixel is far from this midpoint (i.e., `midDif` is greater than or equal to `Y_2` / `Cb_2` / `Cr_2`), it might indicate a texture. 127 | 128 | This helps distinguish true banding in gradients from textured areas or complex details. 129 | 130 | The valid range is same as `Y`/`Cb`/`Cr`. 131 | 132 | Default value - they are equal to `Y`/`Cb`/`Cr`. 133 | 134 | - *angle_boost* (>r9) 135 | 136 | A multiplier used in `sample_mode=7` to increase the debanding strength on consistent gradients. 137 | 138 | When the gradient angle check passes, the `Y`/`Cb`/`Cr`, `Y_1`/`Cb_1`/`Cr_1`, and `Y_2`/`Cb_2`/`Cr_2` thresholds are multiplied by this factor. 139 | 140 | A value greater than `1.0` boosts the strength. A value of `1.0` has no effect. 141 | 142 | Must be a positive number. 143 | 144 | Default value - `1.5`. 145 | 146 | - *max_angle* (>r9) 147 | 148 | The threshold for the gradient angle check in `sample_mode=7`. 149 | 150 | It represents the maximum allowed difference between the gradient angle of the center pixel and its reference pixels for the `angle_boost` to be applied. The gradient angle is normalized to a `[0.0, 1.0]` range. 151 | 152 | A smaller value is stricter and requires a more consistent gradient. A larger value is more lenient. 153 | 154 | The valid range is `0.0` to `1.0`. 155 | 156 | Default value - `0.15`. 157 | 158 | ## Compilation 159 | 160 | ```cmd 161 | cmake -B build\x86 -S . -DCMAKE_GENERATOR_PLATFORM=Win32 -D_DIR=x86 162 | cmake -B build\x64 -S . -DCMAKE_GENERATOR_PLATFORM=x64 -D_DIR=x64 163 | cmake --build build\x86 --config Release 164 | cmake --build build\x64 --config Release 165 | ``` 166 | 167 | ## Compilation (GCC, Windows) 168 | 169 | ```bash 170 | cmake -B build/gcc -S . -G "MSYS Makefiles" -D_DIR=gcc 171 | cmake --build build/gcc 172 | ``` 173 | 174 | ## Compilation (GCC, Unix-like) 175 | 176 | ```bash 177 | cmake -B build/gcc -S . -G "Unix Makefiles" -D_DIR=gcc 178 | cmake --build build/gcc 179 | ``` 180 | 181 | ## License 182 | 183 | This program is free software: you can redistribute it and/or modify 184 | it under the terms of the GNU General Public License as published by 185 | the Free Software Foundation, either version 3 of the License, or 186 | (at your option) any later version. 187 | 188 | This program is distributed in the hope that it will be useful, 189 | but WITHOUT ANY WARRANTY; without even the implied warranty of 190 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 191 | GNU General Public License for more details. 192 | 193 | You should have received a copy of the GNU General Public License 194 | along with this program. If not, see . 195 | -------------------------------------------------------------------------------- /include/dualsynth/vs_wrapper.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Xinyue Lu 3 | * 4 | * DualSynth wrapper - VapourSynth. 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | namespace Plugin { 11 | extern const char* Identifier; 12 | extern const char* Namespace; 13 | extern const char* Description; 14 | } 15 | 16 | namespace VSInterface { 17 | const VSAPI * API; 18 | 19 | struct VSInDelegator final : InDelegator { 20 | const VSMap *_in; 21 | const VSAPI *_vsapi; 22 | int _err; 23 | void Read(const char* name, int& output) override { 24 | auto _default = output; 25 | output = static_cast(_vsapi->propGetInt(_in, name, 0, &_err)); 26 | if (_err) output = _default; 27 | } 28 | void Read(const char* name, int64_t& output) override { 29 | auto _default = output; 30 | output = _vsapi->propGetInt(_in, name, 0, &_err); 31 | if (_err) output = _default; 32 | } 33 | void Read(const char* name, float& output) override { 34 | auto _default = output; 35 | output = static_cast(_vsapi->propGetFloat(_in, name, 0, &_err)); 36 | if (_err) output = _default; 37 | } 38 | void Read(const char* name, double& output) override { 39 | auto _default = output; 40 | output = _vsapi->propGetFloat(_in, name, 0, &_err); 41 | if (_err) output = _default; 42 | } 43 | void Read(const char* name, bool& output) override { 44 | auto output_int = _vsapi->propGetInt(_in, name, 0, &_err); 45 | if (!_err) output = output_int != 0; 46 | } 47 | void Read(const char* name, std::string& output) override { 48 | auto output_str = _vsapi->propGetData(_in, name, 0, &_err); 49 | if (!_err) output = output_str; 50 | } 51 | void Read(const char* name, std::vector& output) override { 52 | auto size = _vsapi->propNumElements(_in, name); 53 | if (size < 0) return; 54 | output.clear(); 55 | for (int i = 0; i < size; i++) 56 | output.push_back(static_cast(_vsapi->propGetInt(_in, name, i, &_err))); 57 | } 58 | void Read(const char* name, std::vector& output) override { 59 | auto size = _vsapi->propNumElements(_in, name); 60 | if (size < 0) return; 61 | output.clear(); 62 | for (int i = 0; i < size; i++) 63 | output.push_back(_vsapi->propGetInt(_in, name, i, &_err)); 64 | } 65 | void Read(const char* name, std::vector& output) override { 66 | auto size = _vsapi->propNumElements(_in, name); 67 | if (size < 0) return; 68 | output.clear(); 69 | for (int i = 0; i < size; i++) 70 | output.push_back(static_cast(_vsapi->propGetFloat(_in, name, i, &_err))); 71 | } 72 | void Read(const char* name, std::vector& output) override { 73 | auto size = _vsapi->propNumElements(_in, name); 74 | if (size < 0) return; 75 | output.clear(); 76 | for (int i = 0; i < size; i++) 77 | output.push_back(_vsapi->propGetFloat(_in, name, i, &_err)); 78 | } 79 | void Read(const char* name, std::vector& output) override { 80 | auto size = _vsapi->propNumElements(_in, name); 81 | if (size < 0) return; 82 | output.clear(); 83 | for (int i = 0; i < size; i++) 84 | output.push_back(_vsapi->propGetInt(_in, name, i, &_err)); 85 | } 86 | void Read(const char* name, void*& output) override { 87 | output = reinterpret_cast(_vsapi->propGetNode(_in, name, 0, &_err)); 88 | } 89 | void Free(void*& clip) override { 90 | _vsapi->freeNode(reinterpret_cast(clip)); 91 | clip = nullptr; 92 | } 93 | VSInDelegator(const VSMap *in, const VSAPI *vsapi) : _in(in), _vsapi(vsapi) {} 94 | }; 95 | 96 | struct VSFetchFrameFunctor final : FetchFrameFunctor { 97 | VSNodeRef *_vs_clip; 98 | VSCore *_core; 99 | const VSAPI *_vsapi; 100 | VSFrameContext *_frameCtx; 101 | VSFetchFrameFunctor(VSNodeRef *clip, VSCore *core, const VSAPI *vsapi) 102 | : _vs_clip(clip), _core(core), _vsapi(vsapi) {} 103 | DSFrame operator()(int n) override { 104 | return DSFrame(_vsapi->getFrameFilter(n, _vs_clip, _frameCtx), _core, _vsapi); 105 | } 106 | ~VSFetchFrameFunctor() override { 107 | _vsapi->freeNode(_vs_clip); 108 | } 109 | }; 110 | 111 | template 112 | void VS_CC Initialize(VSMap *in, VSMap *out, void **instanceData, VSNode *node, VSCore *core, const VSAPI *vsapi) { 113 | auto Data = reinterpret_cast(*instanceData); 114 | auto output_vi = Data->GetOutputVI(); 115 | vsapi->setVideoInfo(output_vi.ToVSVI(core, vsapi), 1, node); 116 | } 117 | 118 | template 119 | void VS_CC Delete(void *instanceData, VSCore *core, const VSAPI *vsapi) { 120 | auto filter = reinterpret_cast(instanceData); 121 | auto functor = reinterpret_cast(filter->fetch_frame); 122 | delete functor; 123 | delete filter; 124 | } 125 | 126 | template 127 | const VSFrameRef* VS_CC GetFrame(int n, int activationReason, void **instanceData, void **frameData, VSFrameContext *frameCtx, VSCore *core, const VSAPI *vsapi) { 128 | auto filter = reinterpret_cast(*instanceData); 129 | auto functor = reinterpret_cast(filter->fetch_frame); 130 | if (functor) 131 | functor->_frameCtx = frameCtx; 132 | 133 | std::vector ref_frames; 134 | if (activationReason == VSActivationReason::arInitial) { 135 | if (functor) { 136 | ref_frames = filter->RequestReferenceFrames(n); 137 | for (auto &&i : ref_frames) 138 | vsapi->requestFrameFilter(i, functor->_vs_clip, frameCtx); 139 | } 140 | else { 141 | std::unordered_map in_frames; 142 | in_frames[n] = DSFrame(core, vsapi); 143 | auto vs_frame = (filter->GetFrame(n, in_frames).ToVSFrame()); 144 | return vs_frame; 145 | } 146 | } 147 | else if (activationReason == VSActivationReason::arAllFramesReady) { 148 | std::unordered_map in_frames; 149 | if (functor) { 150 | ref_frames = filter->RequestReferenceFrames(n); 151 | for (auto &&i : ref_frames) 152 | in_frames[i] = DSFrame(vsapi->getFrameFilter(i, functor->_vs_clip, frameCtx), core, vsapi); 153 | } 154 | else 155 | in_frames[n] = DSFrame(core, vsapi); 156 | 157 | auto vs_frame = (filter->GetFrame(n, in_frames).ToVSFrame()); 158 | return vs_frame; 159 | } 160 | return nullptr; 161 | } 162 | 163 | template 164 | void VS_CC Create(const VSMap *in, VSMap *out, void *userData, VSCore *core, const VSAPI *vsapi) { 165 | auto filter = new FilterType{}; 166 | auto argument = VSInDelegator(in, vsapi); 167 | try { 168 | void* clip = nullptr; 169 | VSFetchFrameFunctor* functor = nullptr; 170 | DSVideoInfo input_vi; 171 | try { 172 | argument.Read("clip", clip); 173 | if (clip) { 174 | auto vs_clip = reinterpret_cast(clip); 175 | functor = new VSFetchFrameFunctor(vs_clip, core, vsapi); 176 | input_vi = DSVideoInfo(vsapi->getVideoInfo(vs_clip)); 177 | } 178 | } 179 | catch(const char *) { /* No clip, source filter */ } 180 | filter->Initialize(&argument, input_vi, functor); 181 | vsapi->createFilter(in, out, filter->VSName(), Initialize, GetFrame, Delete, filter->VSMode(), 0, filter, core); 182 | } 183 | catch(const char *err){ 184 | char msg_buff[256]; 185 | snprintf(msg_buff, 256, "%s: %s", filter->VSName(), err); 186 | vsapi->setError(out, msg_buff); 187 | delete filter; 188 | } 189 | } 190 | 191 | template 192 | void RegisterFilter(VSRegisterFunction registerFunc, VSPlugin* vsplugin) { 193 | FilterType filter; 194 | registerFunc(filter.VSName(), filter.VSParams().c_str(), Create, nullptr, vsplugin); 195 | } 196 | 197 | void RegisterPlugin(VSConfigPlugin configFunc, VSPlugin* vsplugin) { 198 | configFunc(Plugin::Identifier, Plugin::Namespace, Plugin::Description, VAPOURSYNTH_API_VERSION, 1, vsplugin); 199 | } 200 | } 201 | 202 | VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin* vsplugin) { 203 | VSInterface::RegisterPlugin(configFunc, vsplugin); 204 | auto filters = RegisterVSFilters(); 205 | for (auto &&RegisterFilter : filters) { 206 | RegisterFilter(registerFunc, vsplugin); 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /include/dualsynth/ds_frame.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Xinyue Lu 3 | * 4 | * DualSynth wrapper - DSFrame. 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | struct DSFrame 11 | { 12 | int FrameWidth {0}, FrameHeight {0}; 13 | 14 | const unsigned char ** SrcPointers {nullptr}; 15 | int * StrideBytes {nullptr}; 16 | unsigned char ** DstPointers {nullptr}; 17 | DSFormat Format; 18 | 19 | // VapourSynth Interface 20 | const VSFrameRef* _vssrc {nullptr}; 21 | VSFrameRef* _vsdst {nullptr}; 22 | const VSCore* _vscore {nullptr}; 23 | const VSAPI* _vsapi {nullptr}; 24 | const VSFormat* _vsformat {nullptr}; 25 | 26 | // AviSynth+ Interface 27 | PVideoFrame _avssrc; 28 | VideoInfo _vi; 29 | IScriptEnvironment * _env {nullptr}; 30 | int planes_y[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A }; 31 | int planes_r[4] = { PLANAR_R, PLANAR_G, PLANAR_B, PLANAR_A }; 32 | int *planes {0}; 33 | 34 | DSFrame() {} 35 | DSFrame(const VSCore* vscore, const VSAPI* vsapi) 36 | : _vscore(vscore), _vsapi(vsapi) {} 37 | DSFrame(const VSFrameRef* src, const VSCore* vscore, const VSAPI* vsapi) 38 | : _vssrc(src), _vscore(vscore), _vsapi(vsapi) 39 | , _vsformat(src ? _vsapi->getFrameFormat(src) : nullptr) 40 | { 41 | if (_vssrc) { 42 | Format = DSFormat(_vsformat); 43 | FrameWidth = _vsapi->getFrameWidth(src, 0); 44 | FrameHeight = _vsapi->getFrameHeight(src, 0); 45 | 46 | SrcPointers = new const unsigned char*[Format.Planes]; 47 | StrideBytes = new int[Format.Planes]; 48 | for (int i = 0; i < Format.Planes; i++) { 49 | SrcPointers[i] = _vsapi->getReadPtr(src, i); 50 | StrideBytes[i] = _vsapi->getStride(src, i); 51 | } 52 | } 53 | } 54 | 55 | DSFrame(IScriptEnvironment * env) 56 | : _env(env) {} 57 | DSFrame(PVideoFrame &src, VideoInfo vi, IScriptEnvironment * env) 58 | : _avssrc(src), _vi(vi), _env(env) 59 | { 60 | if (_avssrc) { 61 | Format = DSFormat(_vi.pixel_type); 62 | planes = Format.IsFamilyYUV ? planes_y : planes_r; 63 | FrameWidth = _vi.width; 64 | FrameHeight = _vi.height; 65 | 66 | SrcPointers = new const unsigned char*[Format.Planes]; 67 | StrideBytes = new int[Format.Planes]; 68 | for (int i = 0; i < Format.Planes; i++) { 69 | SrcPointers[i] = src->GetReadPtr(planes[i]); 70 | StrideBytes[i] = src->GetPitch(planes[i]); 71 | } 72 | } 73 | } 74 | 75 | DSFrame Create() { return Create(false, false); } 76 | DSFrame Create(bool copy) { return Create(copy, false); } 77 | DSFrame Create(bool copy, bool inplace) 78 | { 79 | if (_vssrc) { 80 | // Create a new VS frame 81 | const VSFrameRef* copy_frames[1] {ToVSFrame()}; 82 | int copy_planes[4] = {0}; 83 | auto vsframe = copy ? 84 | _vsapi->newVideoFrame2(_vsformat, FrameWidth, FrameHeight, copy_frames, copy_planes, _vssrc, const_cast(_vscore)) : 85 | _vsapi->newVideoFrame(_vsformat, FrameWidth, FrameHeight, _vssrc, const_cast(_vscore)); 86 | _vsapi->freeFrame(copy_frames[0]); 87 | 88 | DSFrame new_frame(vsframe, _vscore, _vsapi); 89 | new_frame._vsdst = vsframe; 90 | new_frame.DstPointers = new unsigned char*[Format.Planes]; 91 | for (int i = 0; i < Format.Planes; i++) 92 | new_frame.DstPointers[i] = _vsapi->getWritePtr(vsframe, i); 93 | return new_frame; 94 | } 95 | else if(_avssrc) { 96 | // Create a new AVS frame 97 | return Create(_vi); 98 | } 99 | throw "Unable to create from nothing."; 100 | } 101 | DSFrame Create(DSVideoInfo vi) { 102 | planes = vi.Format.IsFamilyYUV ? planes_y : planes_r; 103 | if (_vsapi) { 104 | auto vsframe = _vsapi->newVideoFrame(vi.Format.ToVSFormat(_vscore, _vsapi), vi.Width, vi.Height, _vssrc, const_cast(_vscore)); 105 | DSFrame new_frame(vsframe, _vscore, _vsapi); 106 | new_frame._vsdst = vsframe; 107 | new_frame.DstPointers = new unsigned char*[Format.Planes]; 108 | for (int i = 0; i < Format.Planes; i++) 109 | new_frame.DstPointers[i] = _vsapi->getWritePtr(vsframe, i); 110 | return new_frame; 111 | } 112 | else if (_env) { 113 | auto avsvi = vi.ToAVSVI(); 114 | bool has_at_least_v8 = true; 115 | try { _env->CheckVersion(8); } 116 | catch (const AvisynthError&) { has_at_least_v8 = false; } 117 | auto new_avsframe = (has_at_least_v8) ? _env->NewVideoFrameP(avsvi, &_avssrc) : _env->NewVideoFrame(avsvi); 118 | auto dstp = new unsigned char*[Format.Planes]; 119 | for (int i = 0; i < Format.Planes; i++) 120 | dstp[i] = new_avsframe->GetWritePtr(planes[i]); 121 | DSFrame new_frame(new_avsframe, avsvi, _env); 122 | new_frame.DstPointers = dstp; 123 | return new_frame; 124 | } 125 | throw "Unable to create from nothing."; 126 | } 127 | 128 | const VSFrameRef* ToVSFrame() 129 | { 130 | return _vsdst ? _vsapi->cloneFrameRef(_vsdst) : 131 | _vssrc ? _vsapi->cloneFrameRef(_vssrc) : 132 | nullptr; 133 | } 134 | PVideoFrame ToAVSFrame() {return _avssrc ? _avssrc : nullptr;} 135 | 136 | ~DSFrame() 137 | { 138 | if (SrcPointers) 139 | delete[] SrcPointers; 140 | if (DstPointers) 141 | delete[] DstPointers; 142 | if (StrideBytes) 143 | delete[] StrideBytes; 144 | if (_vsdst && _vsdst != _vssrc) 145 | _vsapi->freeFrame(_vsdst); 146 | if (_vssrc) 147 | _vsapi->freeFrame(_vssrc); 148 | } 149 | 150 | DSFrame(const DSFrame & old) 151 | { 152 | _avssrc = old._avssrc; 153 | std::memcpy(this, &old, sizeof(DSFrame)); 154 | if (old.SrcPointers) { 155 | SrcPointers = new const unsigned char*[Format.Planes]; 156 | std::copy_n(old.SrcPointers, Format.Planes, SrcPointers); 157 | } 158 | if (old.DstPointers) { 159 | DstPointers = new unsigned char*[Format.Planes]; 160 | std::copy_n(old.DstPointers, Format.Planes, DstPointers); 161 | } 162 | if (old.StrideBytes) { 163 | StrideBytes = new int[Format.Planes]; 164 | std::copy_n(old.StrideBytes, Format.Planes, StrideBytes); 165 | } 166 | if (_vsdst && _vsdst != _vssrc) 167 | _vsdst = const_cast(_vsapi->cloneFrameRef(old._vsdst)); 168 | if (_vssrc) 169 | _vssrc = _vsapi->cloneFrameRef(old._vssrc); 170 | } 171 | DSFrame& operator =(const DSFrame & old) 172 | { 173 | if (&old == this) 174 | return *this; 175 | 176 | if (SrcPointers) 177 | delete[] SrcPointers; 178 | if (DstPointers) 179 | delete[] DstPointers; 180 | if (StrideBytes) 181 | delete[] StrideBytes; 182 | if (_vsdst && _vsdst != _vssrc) 183 | _vsapi->freeFrame(_vsdst); 184 | if (_vssrc) 185 | _vsapi->freeFrame(_vssrc); 186 | 187 | _avssrc = old._avssrc; 188 | std::memcpy(this, &old, sizeof(DSFrame)); 189 | if (old.SrcPointers) { 190 | SrcPointers = new const unsigned char*[Format.Planes]; 191 | std::copy_n(old.SrcPointers, Format.Planes, SrcPointers); 192 | } 193 | if (old.DstPointers) { 194 | DstPointers = new unsigned char*[Format.Planes]; 195 | std::copy_n(old.DstPointers, Format.Planes, DstPointers); 196 | } 197 | if (old.StrideBytes) { 198 | StrideBytes = new int[Format.Planes]; 199 | std::copy_n(old.StrideBytes, Format.Planes, StrideBytes); 200 | } 201 | if (_vsdst && _vsdst != _vssrc) 202 | _vsdst = const_cast(_vsapi->cloneFrameRef(old._vsdst)); 203 | if (_vssrc) 204 | _vssrc = _vsapi->cloneFrameRef(old._vssrc); 205 | return *this; 206 | } 207 | DSFrame(DSFrame && old) noexcept 208 | { 209 | _avssrc = old._avssrc; 210 | std::memcpy(this, &old, sizeof(DSFrame)); 211 | old.SrcPointers = nullptr; 212 | old.DstPointers = nullptr; 213 | old.StrideBytes = nullptr; 214 | old._vssrc = nullptr; 215 | old._vsdst = nullptr; 216 | } 217 | DSFrame& operator =(DSFrame && old) noexcept 218 | { 219 | if (&old == this) 220 | return *this; 221 | 222 | if (SrcPointers) 223 | delete[] SrcPointers; 224 | if (DstPointers) 225 | delete[] DstPointers; 226 | if (StrideBytes) 227 | delete[] StrideBytes; 228 | if (_vsdst && _vsdst != _vssrc) 229 | _vsapi->freeFrame(_vsdst); 230 | if (_vssrc) 231 | _vsapi->freeFrame(_vssrc); 232 | 233 | _avssrc = old._avssrc; 234 | std::memcpy(this, &old, sizeof(DSFrame)); 235 | old.SrcPointers = nullptr; 236 | old.DstPointers = nullptr; 237 | old.StrideBytes = nullptr; 238 | old._vssrc = nullptr; 239 | old._vsdst = nullptr; 240 | return *this; 241 | } 242 | }; 243 | -------------------------------------------------------------------------------- /src/dither_high.h: -------------------------------------------------------------------------------- 1 | 2 | #include "impl_dispatch.h" 3 | #include "compiler_compat.h" 4 | 5 | #define FS_DITHER_SKIP_PRE_CLAMP 6 | 7 | #include "pixel_proc_c_high_f_s_dithering.h" 8 | #include "pixel_proc_c_high_ordered_dithering.h" 9 | 10 | #include 11 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) 12 | #include 13 | #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) 14 | #include "sse2neon.h" 15 | #endif 16 | namespace dither_high 17 | { 18 | static __m128i _ordered_dithering_threshold_map[16] [2]; 19 | static __m128i _ordered_dithering_threshold_map_yuy2[16] [8]; 20 | static volatile bool _threshold_map_initialized = false; 21 | 22 | static __inline void init_ordered_dithering() 23 | { 24 | if (!_threshold_map_initialized) { 25 | __m128i threhold_row; 26 | __m128i zero = _mm_setzero_si128(); 27 | for (int i = 0; i < 16; i++) 28 | { 29 | threhold_row = *(__m128i*)pixel_proc_high_ordered_dithering::THRESHOLD_MAP[i]; 30 | 31 | __m128i part_0 = _mm_unpacklo_epi8(threhold_row, zero); 32 | __m128i part_1 = _mm_unpackhi_epi8(threhold_row, zero); 33 | 34 | if (INTERNAL_BIT_DEPTH < 16) 35 | { 36 | part_0 = _mm_srli_epi16(part_0, 16 - INTERNAL_BIT_DEPTH); 37 | part_1 = _mm_srli_epi16(part_1, 16 - INTERNAL_BIT_DEPTH); 38 | } 39 | _ordered_dithering_threshold_map[i][0] = part_0; 40 | _ordered_dithering_threshold_map[i][1] = part_1; 41 | 42 | __m128i tmp = _mm_unpacklo_epi8(part_0, part_0); 43 | _ordered_dithering_threshold_map_yuy2[i][0] = _mm_unpacklo_epi16(part_0, tmp); 44 | _ordered_dithering_threshold_map_yuy2[i][1] = _mm_unpackhi_epi16(part_0, tmp); 45 | 46 | tmp = _mm_unpackhi_epi8(part_0, part_0); 47 | _ordered_dithering_threshold_map_yuy2[i][2] = _mm_unpacklo_epi16(part_1, tmp); 48 | _ordered_dithering_threshold_map_yuy2[i][3] = _mm_unpackhi_epi16(part_1, tmp); 49 | 50 | tmp = _mm_unpacklo_epi8(part_1, part_1); 51 | _ordered_dithering_threshold_map_yuy2[i][4] = _mm_unpacklo_epi16(part_0, tmp); 52 | _ordered_dithering_threshold_map_yuy2[i][5] = _mm_unpackhi_epi16(part_0, tmp); 53 | 54 | tmp = _mm_unpackhi_epi8(part_1, part_1); 55 | _ordered_dithering_threshold_map_yuy2[i][6] = _mm_unpacklo_epi16(part_1, tmp); 56 | _ordered_dithering_threshold_map_yuy2[i][7] = _mm_unpackhi_epi16(part_1, tmp); 57 | } 58 | _mm_mfence(); 59 | _threshold_map_initialized = true; 60 | } 61 | } 62 | 63 | static void init_ordered_dithering_with_output_depth(char context_buffer[CONTEXT_BUFFER_SIZE], int output_depth) 64 | { 65 | assert(_threshold_map_initialized); 66 | 67 | __m128i shift = _mm_set_epi32(0, 0, 0, output_depth - 8); 68 | 69 | for (int i = 0; i < 16; i++) 70 | { 71 | for (int j = 0; j < 2; j++) 72 | { 73 | __m128i item = _ordered_dithering_threshold_map[i][j]; 74 | item = _mm_srl_epi16(item, shift); 75 | _mm_store_si128((__m128i*)(context_buffer + (i * 2 + j) * 16), item); 76 | } 77 | } 78 | } 79 | 80 | template 81 | static __inline void init(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth) 82 | { 83 | if (dither_algo == DA_HIGH_FLOYD_STEINBERG_DITHERING) 84 | { 85 | pixel_proc_high_f_s_dithering::init_context(context_buffer, frame_width, output_depth); 86 | } else if (dither_algo == DA_HIGH_ORDERED_DITHERING) { 87 | init_ordered_dithering(); 88 | init_ordered_dithering_with_output_depth(context_buffer, output_depth); 89 | } 90 | } 91 | 92 | template 93 | static __inline void complete(void* context) 94 | { 95 | if (dither_algo == DA_HIGH_FLOYD_STEINBERG_DITHERING) 96 | { 97 | pixel_proc_high_f_s_dithering::destroy_context(context); 98 | } 99 | } 100 | 101 | template 102 | static __forceinline __m128i dither(void* context, __m128i pixels, int row, int column) 103 | { 104 | switch (dither_algo) 105 | { 106 | case DA_HIGH_NO_DITHERING: 107 | return pixels; 108 | case DA_HIGH_ORDERED_DITHERING: 109 | { 110 | // row: use lowest 4 bits as index, mask = 0b00001111 = 15 111 | // column: always multiples of 8, so use 8 (bit 4) as selector, mask = 0b00001000 112 | assert((column & 7) == 0); 113 | __m128i threshold = _mm_load_si128((__m128i*)((char*)context + ( ( (row & 15) * 2 ) + ( (column & 8) >> 3 ) ) * 16 ) ); 114 | return _mm_adds_epu16(pixels, threshold); 115 | } 116 | case DA_HIGH_FLOYD_STEINBERG_DITHERING: 117 | // fixme, remove shitty compat 118 | // due to an ICC bug, accessing pixels using union will give us incorrect results 119 | // so we have to use a buffer here 120 | // tested on ICC 12.0.1024.2010 121 | alignas(16) unsigned short buffer[8]; 122 | _mm_store_si128((__m128i*)buffer, pixels); 123 | for (int i = 0; i < 8; i++) 124 | { 125 | buffer[i] = (unsigned short)pixel_proc_high_f_s_dithering::dither(context, buffer[i], row, column + i); 126 | pixel_proc_high_f_s_dithering::next_pixel(context); 127 | } 128 | return _mm_load_si128((__m128i*)buffer); 129 | case DA_16BIT_INTERLEAVED: 130 | return _mm_setzero_si128(); 131 | break; 132 | default: 133 | abort(); 134 | return _mm_setzero_si128(); 135 | } 136 | } 137 | 138 | // fixme, remove yuy2? 139 | template 140 | static __forceinline __m128i dither_yuy2(char contexts[3][CONTEXT_BUFFER_SIZE], __m128i pixels, int row, int column) 141 | { 142 | switch (dither_algo) 143 | { 144 | case DA_HIGH_NO_DITHERING: 145 | return pixels; 146 | case DA_HIGH_ORDERED_DITHERING: 147 | // row: use lowest 4 bits as index, mask = 0b00001111 = 15 148 | // column: always multiples of 8, yuy2 threshold map has 8 items, mask = 0b00111000 149 | assert((column & 7) == 0); 150 | return _mm_adds_epu16(pixels, _ordered_dithering_threshold_map_yuy2[row & 15][(column >> 3) & 7]); 151 | case DA_HIGH_FLOYD_STEINBERG_DITHERING: 152 | // fixme, remove shitty compat 153 | // due to an ICC bug, accessing pixels using union will give us incorrect results 154 | // so we have to use a buffer here 155 | // tested on ICC 12.0.1024.2010 156 | alignas(16) 157 | unsigned short buffer[8]; 158 | _mm_store_si128((__m128i*)buffer, pixels); 159 | for (int i = 0; i < 8; i++) 160 | { 161 | int cur_column = column + i; 162 | void *cur_context; 163 | switch (i & 3) 164 | { 165 | case 0: 166 | case 2: 167 | cur_column >>= 1; 168 | cur_context = contexts[0]; 169 | break; 170 | case 1: 171 | cur_column >>= 2; 172 | cur_context = contexts[1]; 173 | break; 174 | case 3: 175 | cur_column >>= 2; 176 | cur_context = contexts[2]; 177 | break; 178 | } 179 | buffer[i] = (unsigned short)pixel_proc_high_f_s_dithering::dither(cur_context, buffer[i], row, cur_column); 180 | pixel_proc_high_f_s_dithering::next_pixel(cur_context); 181 | } 182 | return _mm_load_si128((__m128i*)buffer); 183 | case DA_16BIT_INTERLEAVED: 184 | return _mm_setzero_si128(); 185 | break; 186 | default: 187 | abort(); 188 | return _mm_setzero_si128(); 189 | } 190 | } 191 | 192 | template 193 | static __inline void next_row(void* context) 194 | { 195 | if (dither_algo == DA_HIGH_FLOYD_STEINBERG_DITHERING) 196 | { 197 | pixel_proc_high_f_s_dithering::next_row(context); 198 | } 199 | } 200 | }; -------------------------------------------------------------------------------- /VCL2/instrset_detect.cpp: -------------------------------------------------------------------------------- 1 | /************************** instrset_detect.cpp **************************** 2 | * Author: Agner Fog 3 | * Date created: 2012-05-30 4 | * Last modified: 2022-07-20 5 | * Version: 2.02.00 6 | * Project: vector class library 7 | * Description: 8 | * Functions for checking which instruction sets are supported. 9 | * 10 | * (c) Copyright 2012-2022 Agner Fog. 11 | * Apache License version 2.0 or later. 12 | ******************************************************************************/ 13 | 14 | #include "instrset.h" 15 | 16 | #ifdef VCL_NAMESPACE 17 | namespace VCL_NAMESPACE { 18 | #endif 19 | 20 | 21 | // Define interface to xgetbv instruction 22 | static inline uint64_t xgetbv (int ctr) { 23 | #if (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) 24 | // Microsoft or Intel compiler supporting _xgetbv intrinsic 25 | 26 | return uint64_t(_xgetbv(ctr)); // intrinsic function for XGETBV 27 | 28 | #elif defined(__GNUC__) || defined (__clang__) // use inline assembly, Gnu/AT&T syntax 29 | 30 | uint32_t a, d; 31 | __asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : ); 32 | return a | (uint64_t(d) << 32); 33 | 34 | #else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS syntax 35 | uint32_t a, d; 36 | __asm { 37 | mov ecx, ctr 38 | _emit 0x0f 39 | _emit 0x01 40 | _emit 0xd0 ; // xgetbv 41 | mov a, eax 42 | mov d, edx 43 | } 44 | return a | (uint64_t(d) << 32); 45 | 46 | #endif 47 | } 48 | 49 | /* find supported instruction set 50 | return value: 51 | 0 = 80386 instruction set 52 | 1 or above = SSE (XMM) supported by CPU (not testing for OS support) 53 | 2 or above = SSE2 54 | 3 or above = SSE3 55 | 4 or above = Supplementary SSE3 (SSSE3) 56 | 5 or above = SSE4.1 57 | 6 or above = SSE4.2 58 | 7 or above = AVX supported by CPU and operating system 59 | 8 or above = AVX2 60 | 9 or above = AVX512F 61 | 10 or above = AVX512VL, AVX512BW, AVX512DQ 62 | */ 63 | int instrset_detect(void) { 64 | 65 | static int iset = -1; // remember value for next call 66 | if (iset >= 0) { 67 | return iset; // called before 68 | } 69 | iset = 0; // default value 70 | int abcd[4] = {0,0,0,0}; // cpuid results 71 | cpuid(abcd, 0); // call cpuid function 0 72 | if (abcd[0] == 0) return iset; // no further cpuid function supported 73 | cpuid(abcd, 1); // call cpuid function 1 for feature flags 74 | if ((abcd[3] & (1 << 0)) == 0) return iset; // no floating point 75 | if ((abcd[3] & (1 << 23)) == 0) return iset; // no MMX 76 | if ((abcd[3] & (1 << 15)) == 0) return iset; // no conditional move 77 | if ((abcd[3] & (1 << 24)) == 0) return iset; // no FXSAVE 78 | if ((abcd[3] & (1 << 25)) == 0) return iset; // no SSE 79 | iset = 1; // 1: SSE supported 80 | if ((abcd[3] & (1 << 26)) == 0) return iset; // no SSE2 81 | iset = 2; // 2: SSE2 supported 82 | if ((abcd[2] & (1 << 0)) == 0) return iset; // no SSE3 83 | iset = 3; // 3: SSE3 supported 84 | if ((abcd[2] & (1 << 9)) == 0) return iset; // no SSSE3 85 | iset = 4; // 4: SSSE3 supported 86 | if ((abcd[2] & (1 << 19)) == 0) return iset; // no SSE4.1 87 | iset = 5; // 5: SSE4.1 supported 88 | if ((abcd[2] & (1 << 23)) == 0) return iset; // no POPCNT 89 | if ((abcd[2] & (1 << 20)) == 0) return iset; // no SSE4.2 90 | iset = 6; // 6: SSE4.2 supported 91 | if ((abcd[2] & (1 << 27)) == 0) return iset; // no OSXSAVE 92 | if ((xgetbv(0) & 6) != 6) return iset; // AVX not enabled in O.S. 93 | if ((abcd[2] & (1 << 28)) == 0) return iset; // no AVX 94 | iset = 7; // 7: AVX supported 95 | cpuid(abcd, 7); // call cpuid leaf 7 for feature flags 96 | if ((abcd[1] & (1 << 5)) == 0) return iset; // no AVX2 97 | iset = 8; 98 | if ((abcd[1] & (1 << 16)) == 0) return iset; // no AVX512 99 | cpuid(abcd, 0xD); // call cpuid leaf 0xD for feature flags 100 | if ((abcd[0] & 0x60) != 0x60) return iset; // no AVX512 101 | iset = 9; 102 | cpuid(abcd, 7); // call cpuid leaf 7 for feature flags 103 | if ((abcd[1] & (1 << 31)) == 0) return iset; // no AVX512VL 104 | if ((abcd[1] & 0x40020000) != 0x40020000) return iset; // no AVX512BW, AVX512DQ 105 | iset = 10; 106 | return iset; 107 | } 108 | 109 | // detect if CPU supports the FMA3 instruction set 110 | bool hasFMA3(void) { 111 | if (instrset_detect() < 7) return false; // must have AVX 112 | int abcd[4]; // cpuid results 113 | cpuid(abcd, 1); // call cpuid function 1 114 | return ((abcd[2] & (1 << 12)) != 0); // ecx bit 12 indicates FMA3 115 | } 116 | 117 | // detect if CPU supports the FMA4 instruction set 118 | bool hasFMA4(void) { 119 | if (instrset_detect() < 7) return false; // must have AVX 120 | int abcd[4]; // cpuid results 121 | cpuid(abcd, 0x80000001); // call cpuid function 0x80000001 122 | return ((abcd[2] & (1 << 16)) != 0); // ecx bit 16 indicates FMA4 123 | } 124 | 125 | // detect if CPU supports the XOP instruction set 126 | bool hasXOP(void) { 127 | if (instrset_detect() < 7) return false; // must have AVX 128 | int abcd[4]; // cpuid results 129 | cpuid(abcd, 0x80000001); // call cpuid function 0x80000001 130 | return ((abcd[2] & (1 << 11)) != 0); // ecx bit 11 indicates XOP 131 | } 132 | 133 | // detect if CPU supports the AVX512ER instruction set 134 | bool hasAVX512ER(void) { 135 | if (instrset_detect() < 9) return false; // must have AVX512F 136 | int abcd[4]; // cpuid results 137 | cpuid(abcd, 7); // call cpuid function 7 138 | return ((abcd[1] & (1 << 27)) != 0); // ebx bit 27 indicates AVX512ER 139 | } 140 | 141 | // detect if CPU supports the AVX512VBMI instruction set 142 | bool hasAVX512VBMI(void) { 143 | if (instrset_detect() < 10) return false; // must have AVX512BW 144 | int abcd[4]; // cpuid results 145 | cpuid(abcd, 7); // call cpuid function 7 146 | return ((abcd[2] & (1 << 1)) != 0); // ecx bit 1 indicates AVX512VBMI 147 | } 148 | 149 | // detect if CPU supports the AVX512VBMI2 instruction set 150 | bool hasAVX512VBMI2(void) { 151 | if (instrset_detect() < 10) return false; // must have AVX512BW 152 | int abcd[4]; // cpuid results 153 | cpuid(abcd, 7); // call cpuid function 7 154 | return ((abcd[2] & (1 << 6)) != 0); // ecx bit 6 indicates AVX512VBMI2 155 | } 156 | 157 | // detect if CPU supports the F16C instruction set 158 | bool hasF16C(void) { 159 | if (instrset_detect() < 7) return false; // must have AVX 160 | int abcd[4]; // cpuid results 161 | cpuid(abcd, 1); // call cpuid function 1 162 | return ((abcd[2] & (1 << 29)) != 0); // ecx bit 29 indicates F16C 163 | } 164 | 165 | // detect if CPU supports the AVX512_FP16 instruction set 166 | bool hasAVX512FP16(void) { 167 | if (instrset_detect() < 10) return false; // must have AVX512 168 | int abcd[4]; // cpuid results 169 | cpuid(abcd, 7); // call cpuid function 1 170 | return ((abcd[3] & (1 << 23)) != 0); // edx bit 23 indicates AVX512_FP16 171 | } 172 | 173 | 174 | #ifdef VCL_NAMESPACE 175 | } 176 | #endif 177 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.15) 2 | project(neo_f3kdb VERSION 10.0.0 LANGUAGES CXX) 3 | 4 | set(LIBRARY_NAME "neo-f3kdb") 5 | add_library(${LIBRARY_NAME} SHARED) 6 | 7 | target_sources(${LIBRARY_NAME} PRIVATE 8 | "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp" 9 | "${CMAKE_CURRENT_SOURCE_DIR}/src/bit_utils.h" 10 | "${CMAKE_CURRENT_SOURCE_DIR}/src/compiler_compat.h" 11 | "${CMAKE_CURRENT_SOURCE_DIR}/src/constants.h" 12 | "${CMAKE_CURRENT_SOURCE_DIR}/src/core.cpp" 13 | "${CMAKE_CURRENT_SOURCE_DIR}/src/core.h" 14 | "${CMAKE_CURRENT_SOURCE_DIR}/src/cpuid.cpp" 15 | "${CMAKE_CURRENT_SOURCE_DIR}/src/dither_high.h" 16 | "${CMAKE_CURRENT_SOURCE_DIR}/src/f3kdb.h" 17 | "${CMAKE_CURRENT_SOURCE_DIR}/src/f3kdb.hpp" 18 | "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_c.cpp" 19 | "${CMAKE_CURRENT_SOURCE_DIR}/src/impl_dispatch.cpp" 20 | "${CMAKE_CURRENT_SOURCE_DIR}/src/impl_dispatch.h" 21 | "${CMAKE_CURRENT_SOURCE_DIR}/src/impl_dispatch_decl.h" 22 | "${CMAKE_CURRENT_SOURCE_DIR}/src/neo_f3kdb.hpp" 23 | "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c.h" 24 | "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c_16bit.h" 25 | "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c_high_bit_depth_common.h" 26 | "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c_high_f_s_dithering.h" 27 | "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c_high_no_dithering.h" 28 | "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c_high_ordered_dithering.h" 29 | "${CMAKE_CURRENT_SOURCE_DIR}/src/process_plane_context.cpp" 30 | "${CMAKE_CURRENT_SOURCE_DIR}/src/process_plane_context.h" 31 | "${CMAKE_CURRENT_SOURCE_DIR}/src/random.cpp" 32 | "${CMAKE_CURRENT_SOURCE_DIR}/src/random.h" 33 | "${CMAKE_CURRENT_SOURCE_DIR}/src/sse2neon.h" 34 | "${CMAKE_CURRENT_SOURCE_DIR}/src/sse_utils.h" 35 | "${CMAKE_CURRENT_SOURCE_DIR}/src/utils.h" 36 | ) 37 | 38 | target_compile_features(${LIBRARY_NAME} PRIVATE cxx_std_17) 39 | option(ENABLE_PAR "Enable C++17 Parallel Execution" ON) 40 | 41 | if (NOT MSVC) 42 | find_package(PkgConfig REQUIRED) 43 | 44 | pkg_check_modules(AVISYNTH avisynth) 45 | if(AVISYNTH_FOUND) 46 | target_include_directories(${LIBRARY_NAME} PRIVATE ${AVISYNTH_INCLUDE_DIRS}) 47 | else() 48 | target_include_directories(${LIBRARY_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include/avisynth") 49 | endif() 50 | 51 | pkg_check_modules(VAPOURSYNTH vapoursynth) 52 | if(VAPOURSYNTH_FOUND) 53 | target_include_directories(${LIBRARY_NAME} PRIVATE ${VAPOURSYNTH_INCLUDE_DIRS}) 54 | else() 55 | target_include_directories(${LIBRARY_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include/vapoursynth") 56 | endif() 57 | else() 58 | target_include_directories(${LIBRARY_NAME} PRIVATE 59 | "${CMAKE_CURRENT_SOURCE_DIR}/include/avisynth" 60 | "${CMAKE_CURRENT_SOURCE_DIR}/include/vapoursynth" 61 | ) 62 | endif() 63 | 64 | target_include_directories(${LIBRARY_NAME} PRIVATE 65 | "${CMAKE_CURRENT_SOURCE_DIR}" 66 | "${CMAKE_CURRENT_SOURCE_DIR}/include/dualsynth" 67 | "${CMAKE_CURRENT_BINARY_DIR}" 68 | ) 69 | 70 | # We apply aggressive flags ONLY to compilers with a GNU-style frontend. 71 | if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "GNU") 72 | target_compile_options(${LIBRARY_NAME} PRIVATE -O3 -funroll-loops) 73 | endif() 74 | 75 | # Only apply SIMD flags if we are on a capable architecture (x86/x86_64). 76 | if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|i.86") 77 | message(STATUS "x86/x86_64 architecture detected. Configuring SIMD instruction sets.") 78 | 79 | target_sources(${LIBRARY_NAME} PRIVATE 80 | "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_avx2_base.h" 81 | "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_avx512_base.h" 82 | "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx2.cpp" 83 | "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx512.cpp" 84 | "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_sse4.cpp" 85 | "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_sse_base.h" 86 | ) 87 | 88 | target_include_directories(${LIBRARY_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/VCL2") 89 | 90 | set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_sse4.cpp" PROPERTIES COMPILE_OPTIONS 91 | "$<$:/arch:SSE2>$<$,$>:/arch:SSE4.1>$<$,$,$>>>:-msse4.1>" 92 | ) 93 | 94 | if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC") 95 | set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx2.cpp" PROPERTIES COMPILE_OPTIONS "/arch:AVX2") 96 | elseif(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "GNU") 97 | set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx2.cpp" PROPERTIES COMPILE_OPTIONS "-mavx2;-mfma") 98 | endif() 99 | 100 | if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC") 101 | set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx512.cpp" PROPERTIES COMPILE_OPTIONS "/arch:AVX512") 102 | elseif(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "GNU") 103 | set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx512.cpp" PROPERTIES COMPILE_OPTIONS "-mavx512f;-mavx512bw;-mavx512dq;-mavx512vl;-mavx512cd;-mfma") 104 | endif() 105 | else() 106 | message(STATUS "Non-x86 architecture detected (${CMAKE_SYSTEM_PROCESSOR}). Skipping SIMD-specific source files.") 107 | endif() 108 | 109 | target_link_libraries(${LIBRARY_NAME} PRIVATE "$<$,$>:libmmds>") 110 | 111 | # Handle legacy Windows XP support if the specific toolset is used 112 | if(CMAKE_GENERATOR_TOOLSET MATCHES "v[0-9]+_xp") 113 | message(STATUS "Windows XP toolset detected. Applying compatibility settings.") 114 | target_compile_definitions(${LIBRARY_NAME} PRIVATE WINVER=0x502 _WIN32_WINNT=0x502) 115 | target_compile_options(${LIBRARY_NAME} PRIVATE "$<$:/Zc:threadSafeInit->") 116 | endif() 117 | 118 | string(REGEX MATCH "^([0-9.]+)" CORE_VERSION_STRING "${PROJECT_VERSION}") 119 | if (CORE_VERSION_STRING) 120 | if(PROJECT_VERSION MATCHES "-") 121 | set(IS_PRERELEASE 1) 122 | else() 123 | set(IS_PRERELEASE 0) 124 | endif() 125 | else() 126 | message(WARNING "Could not parse core version from '${PROJECT_VERSION}'. Defaulting to 0.0.0.") 127 | set(CORE_VERSION_STRING "0.0.0") 128 | set(IS_PRERELEASE 0) 129 | endif() 130 | 131 | string(REPLACE "." ";" VERSION_NUMERIC_PARTS_LIST "${CORE_VERSION_STRING}") 132 | list(LENGTH VERSION_NUMERIC_PARTS_LIST NUM_PARTS) 133 | 134 | set(VERSION_MAJOR 0) 135 | set(VERSION_MINOR 0) 136 | set(VERSION_PATCH 0) 137 | set(VERSION_BUILD 0) 138 | 139 | if(NUM_PARTS GREATER_EQUAL 1) 140 | list(GET VERSION_NUMERIC_PARTS_LIST 0 VERSION_MAJOR) 141 | endif() 142 | 143 | if(NUM_PARTS GREATER_EQUAL 2) 144 | list(GET VERSION_NUMERIC_PARTS_LIST 1 VERSION_MINOR) 145 | endif() 146 | 147 | if(NUM_PARTS GREATER_EQUAL 3) 148 | list(GET VERSION_NUMERIC_PARTS_LIST 2 VERSION_PATCH) 149 | endif() 150 | 151 | if(NUM_PARTS GREATER_EQUAL 4) 152 | list(GET VERSION_NUMERIC_PARTS_LIST 3 VERSION_BUILD) 153 | endif() 154 | 155 | set(PROJECT_VERSION_STRING_FULL "r${VERSION_MAJOR}") 156 | 157 | configure_file( 158 | "${CMAKE_CURRENT_SOURCE_DIR}/src/version.hpp.in" 159 | "${CMAKE_CURRENT_BINARY_DIR}/version.hpp" 160 | ) 161 | 162 | if (WIN32) 163 | set(FILE_DESCRIPTION "Neo-F3KDB") 164 | set(INTERNAL_NAME "Neo-F3KDB") 165 | set(ORIGINAL_FILENAME "${LIBRARY_NAME}.dll") 166 | set(PRODUCT_NAME "Neo-F3KDB") 167 | 168 | if(NOT CMAKE_CONFIGURATION_TYPES) 169 | set(IS_DEBUG_BUILD 0) 170 | if(CMAKE_BUILD_TYPE MATCHES "^Debug$") 171 | set(IS_DEBUG_BUILD 1) 172 | endif() 173 | 174 | if(IS_DEBUG_BUILD AND IS_PRERELEASE) 175 | set(RC_FILEFLAGS_LINE "FILEFLAGS VS_FF_DEBUG | VS_FF_PRERELEASE") 176 | elseif(IS_DEBUG_BUILD) 177 | set(RC_FILEFLAGS_LINE "FILEFLAGS VS_FF_DEBUG") 178 | elseif(IS_PRERELEASE) 179 | set(RC_FILEFLAGS_LINE "FILEFLAGS VS_FF_PRERELEASE") 180 | else() 181 | set(RC_FILEFLAGS_LINE "FILEFLAGS 0x0L") 182 | endif() 183 | else() 184 | string(CONCAT RC_FILEFLAGS_LINE 185 | "#if defined(_DEBUG) && defined(IS_PRERELEASE_BUILD)\n" 186 | " FILEFLAGS VS_FF_DEBUG | VS_FF_PRERELEASE\n" 187 | "#elif defined(_DEBUG)\n" 188 | " FILEFLAGS VS_FF_DEBUG\n" 189 | "#elif defined(IS_PRERELEASE_BUILD)\n" 190 | " FILEFLAGS VS_FF_PRERELEASE\n" 191 | "#else\n" 192 | " FILEFLAGS 0x0L\n" 193 | "#endif" 194 | ) 195 | if(IS_PRERELEASE) 196 | target_compile_definitions(${LIBRARY_NAME} PRIVATE IS_PRERELEASE_BUILD=1) 197 | endif() 198 | endif() 199 | 200 | set(RC_FILE_OUT "${CMAKE_CURRENT_BINARY_DIR}/version.rc") # Output to build dir 201 | 202 | configure_file( 203 | "${CMAKE_CURRENT_SOURCE_DIR}/src/version.rc.in" 204 | "${RC_FILE_OUT}" 205 | @ONLY 206 | ) 207 | 208 | target_sources(${LIBRARY_NAME} PRIVATE "${RC_FILE_OUT}") 209 | 210 | if(MSVC) 211 | set_source_files_properties("${RC_FILE_OUT}" PROPERTIES 212 | VS_RESOURCE_GENERATOR "RC") 213 | endif() 214 | endif() 215 | 216 | include(CheckIncludeFileCXX) 217 | CHECK_INCLUDE_FILE_CXX(execution HAS_EXECUTION) 218 | if(HAS_EXECUTION) 219 | target_compile_definitions(${LIBRARY_NAME} PRIVATE HAS_EXECUTION) 220 | endif() 221 | if(ENABLE_PAR AND HAS_EXECUTION) 222 | target_compile_definitions(${LIBRARY_NAME} PRIVATE ENABLE_PAR) 223 | 224 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 225 | target_link_libraries(${LIBRARY_NAME} PRIVATE tbb) 226 | endif() 227 | endif() 228 | 229 | add_custom_command( 230 | TARGET ${LIBRARY_NAME} POST_BUILD 231 | COMMAND ${CMAKE_COMMAND} -E copy $ "../Release_${PROJECT_VERSION_STRING_FULL}/${_DIR}/$" 232 | ) 233 | -------------------------------------------------------------------------------- /src/f3kdb.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Xinyue Lu 3 | * 4 | * Temporal Median - filter. 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | #ifdef HAS_EXECUTION 13 | #include 14 | #endif 15 | 16 | #ifndef __cpp_lib_execution 17 | #undef ENABLE_PAR 18 | #endif 19 | 20 | #ifdef ENABLE_PAR 21 | #define PAR_POLICY std::execution::par 22 | #else 23 | #define PAR_POLICY nullptr 24 | #endif 25 | 26 | #include "compiler_compat.h" 27 | #include "core.h" 28 | #include "constants.h" 29 | #include "impl_dispatch.h" 30 | 31 | int GetCPUFlags(); 32 | 33 | struct F3KDB final : Filter { 34 | f3kdb_params_t ep; 35 | std::unique_ptr engine; 36 | char error_msg[1024]; 37 | DSVideoInfo out_vi; 38 | bool mt {true}; 39 | 40 | const char* VSName() const override { return "Deband"; } 41 | const char* AVSName() const override { return "neo_f3kdb"; } 42 | const MtMode AVSMode() const override { return MT_NICE_FILTER; } 43 | const VSFilterMode VSMode() const override { return fmParallel; } 44 | const std::vector Params() const override { 45 | return std::vector { 46 | Param {"clip", Clip, false, true, true, false}, 47 | Param {"range", Integer}, 48 | Param {"y", Integer}, 49 | Param {"cb", Integer}, 50 | Param {"cr", Integer}, 51 | Param {"grainy", Integer}, 52 | Param {"grainc", Integer}, 53 | Param {"sample_mode", Integer}, 54 | Param {"seed", Integer}, 55 | Param {"blur_first", Boolean}, 56 | Param {"dynamic_grain", Boolean}, 57 | Param {"opt", Integer}, 58 | Param {"mt", Boolean}, 59 | Param {"dither_algo", Integer}, 60 | Param {"keep_tv_range", Boolean}, 61 | Param {"output_depth", Integer}, 62 | Param {"random_algo_ref", Integer}, 63 | Param {"random_algo_grain", Integer}, 64 | Param {"random_param_ref", Float}, 65 | Param {"random_param_grain", Float}, 66 | Param {"preset", String}, 67 | Param{ "y_1", Integer}, 68 | Param{ "cb_1", Integer}, 69 | Param{ "cr_1", Integer}, 70 | Param{ "y_2", Integer }, 71 | Param{ "cb_2", Integer }, 72 | Param{ "cr_2", Integer }, 73 | Param{ "scale", Boolean }, 74 | Param{ "angle_boost", Float }, 75 | Param{ "max_angle", Float }, 76 | }; 77 | } 78 | void Initialize(InDelegator* in, DSVideoInfo in_vi, FetchFrameFunctor* fetch_frame) override 79 | { 80 | Filter::Initialize(in, in_vi, fetch_frame); 81 | std::string preset; 82 | in->Read("preset", preset); 83 | std::istringstream piss(preset); 84 | 85 | bool scale = false; 86 | in->Read("scale", scale); 87 | 88 | while(!piss.eof()) { 89 | std::string piss1; 90 | std::getline(piss, piss1, '/'); 91 | if (piss1 == "depth") 92 | ep.Y = ep.Cb = ep.Cr = ep.grainY = ep.grainC = ep.Y_1 = ep.Cb_1 = ep.Cr_1 = ep.Y_2 = ep.Cb_2 = ep.Cr_2 = 0; 93 | else if (piss1 == "low") 94 | ep.Y = ep.Cb = ep.Cr = ep.grainY = ep.grainC = ep.Y_1 = ep.Cb_1 = ep.Cr_1 = ep.Y_2 = ep.Cb_2 = ep.Cr_2 = (scale) ? 128 : 32; 95 | else if (piss1 == "medium") 96 | ep.Y = ep.Cb = ep.Cr = ep.grainY = ep.grainC = ep.Y_1 = ep.Cb_1 = ep.Cr_1 = ep.Y_2 = ep.Cb_2 = ep.Cr_2 = (scale) ? 192 : 48; 97 | else if (piss1 == "high") 98 | ep.Y = ep.Cb = ep.Cr = ep.grainY = ep.grainC = ep.Y_1 = ep.Cb_1 = ep.Cr_1 = ep.Y_2 = ep.Cb_2 = ep.Cr_2 = (scale) ? 256 : 64; 99 | else if (piss1 == "veryhigh") 100 | ep.Y = ep.Cb = ep.Cr = ep.grainY = ep.grainC = ep.Y_1 = ep.Cb_1 = ep.Cr_1 = ep.Y_2 = ep.Cb_2 = ep.Cr_2 = (scale) ? 320 : 80; 101 | else if (piss1 == "nograin") 102 | ep.grainY = ep.grainC = 0; 103 | else if (piss1 == "luma") 104 | ep.Cb = ep.Cr = ep.grainC = 0; 105 | else if (piss1 == "chroma") 106 | ep.Y = ep.grainY = 0; 107 | } 108 | int tmp; 109 | in->Read("range", ep.range); 110 | in->Read("y", ep.Y); 111 | in->Read("cb", ep.Cb); 112 | in->Read("cr", ep.Cr); 113 | in->Read("grainy", ep.grainY); 114 | in->Read("grainc", ep.grainC); 115 | in->Read("sample_mode", ep.sample_mode); 116 | in->Read("seed", ep.seed); 117 | in->Read("blur_first", ep.blur_first); 118 | in->Read("dynamic_grain", ep.dynamic_grain); 119 | tmp = static_cast(ep.dither_algo); 120 | in->Read("dither_algo", tmp); 121 | ep.dither_algo = static_cast(tmp); 122 | in->Read("keep_tv_range", ep.keep_tv_range); 123 | in->Read("output_depth", ep.output_depth); 124 | tmp = static_cast(ep.random_algo_ref); 125 | in->Read("random_algo_ref", tmp); 126 | ep.random_algo_ref = static_cast(tmp); 127 | tmp = static_cast(ep.random_algo_grain); 128 | in->Read("random_algo_grain", tmp); 129 | ep.random_algo_grain = static_cast(tmp); 130 | in->Read("random_param_ref", ep.random_param_ref); 131 | in->Read("random_param_grain", ep.random_param_grain); 132 | in->Read("y_1", ep.Y_1); 133 | in->Read("cb_1", ep.Cb_1); 134 | in->Read("cr_1", ep.Cr_1); 135 | in->Read("y_2", ep.Y_2); 136 | in->Read("cb_2", ep.Cb_2); 137 | in->Read("cr_2", ep.Cr_2); 138 | in->Read("angle_boost", ep.angle_boost); 139 | in->Read("max_angle", ep.max_angle); 140 | 141 | ep.Y_1 = ep.Y_1 == -1 ? ep.Y : ep.Y_1; 142 | ep.Cb_1 = ep.Cb_1 == -1 ? ep.Cb : ep.Cb_1; 143 | ep.Cr_1 = ep.Cr_1 == -1 ? ep.Cr : ep.Cr_1; 144 | ep.Y_2 = ep.Y_2 == -1 ? ep.Y : ep.Y_2; 145 | ep.Cb_2 = ep.Cb_2 == -1 ? ep.Cb : ep.Cb_2; 146 | ep.Cr_2 = ep.Cr_2 == -1 ? ep.Cr : ep.Cr_2; 147 | 148 | int opt_in = -1; 149 | in->Read("opt", opt_in); 150 | in->Read("mt", mt); 151 | 152 | OPTIMIZATION_MODE opt = [&]() { 153 | const int CPUFlags = GetCPUFlags(); 154 | 155 | if (ep.sample_mode >= 5 && ep.sample_mode <= 7) { 156 | const int AVX512_REQUIRED_FLAGS = CPUF_AVX512F | CPUF_AVX512BW | CPUF_AVX512DQ | CPUF_AVX512VL | CPUF_AVX512CD; 157 | 158 | if (((CPUFlags & AVX512_REQUIRED_FLAGS) == AVX512_REQUIRED_FLAGS) && (opt_in == 3 || opt_in < 0)) 159 | return IMPL_AVX512; 160 | 161 | if ((CPUFlags & CPUF_AVX2) && (opt_in == 2 || opt_in < 0)) 162 | return IMPL_AVX2; 163 | } 164 | 165 | if ((CPUFlags & CPUF_SSE4_1) && (opt_in > 0 || opt_in < 0)) 166 | return IMPL_SSE4; 167 | 168 | return IMPL_C; 169 | }(); 170 | 171 | #define INVALID_PARAM_IF(cond) \ 172 | do { if (cond) { throw("Invalid parameter condition: " #cond); } } while (0) 173 | 174 | INVALID_PARAM_IF(in_vi.Format.IsFamilyYUV != true); 175 | INVALID_PARAM_IF(in_vi.Width < 16); 176 | INVALID_PARAM_IF(in_vi.Height < 16); 177 | INVALID_PARAM_IF(in_vi.Format.SSW < 0 || in_vi.Format.SSW > 4); 178 | INVALID_PARAM_IF(in_vi.Format.SSH < 0 || in_vi.Format.SSH > 4); 179 | INVALID_PARAM_IF(in_vi.Frames <= 0); 180 | INVALID_PARAM_IF(in_vi.Format.BitsPerSample < 8 || in_vi.Format.BitsPerSample > INTERNAL_BIT_DEPTH); 181 | INVALID_PARAM_IF(in_vi.Format.IsInteger != true); 182 | 183 | if (ep.output_depth < 0) 184 | ep.output_depth = in_vi.Format.BitsPerSample; 185 | if (ep.output_depth == 16) 186 | // set to appropriate precision mode 187 | ep.dither_algo = DA_16BIT_INTERLEAVED; 188 | 189 | const int y_threshold_upper_limit = scale ? 65535 : 511; 190 | const int cb_threshold_upper_limit = scale ? 65535 : 511; 191 | const int cr_threshold_upper_limit = scale ? 65535 : 511; 192 | constexpr int dither_upper_limit = 4096; 193 | 194 | #define CHECK_PARAM(value, lower_bound, upper_bound) \ 195 | do { if ((int)value < (int)lower_bound || (int)value > (int)upper_bound) { snprintf(error_msg, sizeof(error_msg), "Invalid parameter %s, must be between %d and %d", #value, lower_bound, upper_bound); throw error_msg; } } while(0) 196 | 197 | CHECK_PARAM(ep.range, 0, 255); 198 | CHECK_PARAM(ep.Y, 0, y_threshold_upper_limit); 199 | CHECK_PARAM(ep.Cb, 0, cb_threshold_upper_limit); 200 | CHECK_PARAM(ep.Cr, 0, cr_threshold_upper_limit); 201 | CHECK_PARAM(ep.grainY, 0, dither_upper_limit); 202 | CHECK_PARAM(ep.grainC, 0, dither_upper_limit); 203 | CHECK_PARAM(ep.sample_mode, 1, 7); 204 | CHECK_PARAM(ep.dither_algo, DA_HIGH_NO_DITHERING, (DA_COUNT - 1) ); 205 | CHECK_PARAM(ep.random_algo_ref, 0, (RANDOM_ALGORITHM_COUNT - 1) ); 206 | CHECK_PARAM(ep.random_algo_grain, 0, (RANDOM_ALGORITHM_COUNT - 1) ); 207 | CHECK_PARAM(ep.Y_1, 0, y_threshold_upper_limit); 208 | CHECK_PARAM(ep.Cb_1, 0, cb_threshold_upper_limit); 209 | CHECK_PARAM(ep.Cr_1, 0, cr_threshold_upper_limit); 210 | CHECK_PARAM(ep.Y_2, 0, y_threshold_upper_limit); 211 | CHECK_PARAM(ep.Cb_2, 0, cb_threshold_upper_limit); 212 | CHECK_PARAM(ep.Cr_2, 0, cr_threshold_upper_limit); 213 | 214 | if (ep.angle_boost < 0.0f) 215 | throw "invalid parameter angle_boost, must be positive value"; 216 | 217 | if (ep.max_angle < 0.0f || ep.max_angle > 1.0f) 218 | throw "invalid parameter max_angle, must be between 0.0 and 1.0"; 219 | 220 | // now the internal bit depth is 16, 221 | // scale parameters to be consistent with 14bit range in previous versions 222 | ep.Y = scale ? ep.Y : ep.Y << 2; 223 | ep.Cb = scale ? ep.Cb : ep.Cb << 2; 224 | ep.Cr = scale ? ep.Cr : ep.Cr << 2; 225 | ep.Y_1 = scale ? ep.Y_1 : ep.Y_1 << 2; 226 | ep.Cb_1 = scale ? ep.Cb_1 : ep.Cb_1 << 2; 227 | ep.Cr_1 = scale ? ep.Cr_1 : ep.Cr_1 << 2; 228 | ep.Y_2 = scale ? ep.Y_2 : ep.Y_2 << 2; 229 | ep.Cb_2 = scale ? ep.Cb_2 : ep.Cb_2 << 2; 230 | ep.Cr_2 = scale ? ep.Cr_2 : ep.Cr_2 << 2; 231 | ep.grainY <<= 2; 232 | ep.grainC <<= 2; 233 | 234 | out_vi = in_vi; 235 | out_vi.Format.BitsPerSample = ep.output_depth; 236 | out_vi.Format.BytesPerSample = ep.output_depth == 8 ? 1 : 2; 237 | 238 | try 239 | { 240 | engine = std::make_unique(in_vi, ep, opt); 241 | } catch (std::bad_alloc&) { 242 | throw "Memory allocation failed"; 243 | } 244 | } 245 | 246 | DSFrame GetFrame(int n, std::unordered_map in_frames) override 247 | { 248 | auto src = in_frames[n]; 249 | auto dst = src.Create(out_vi); 250 | auto core = [&](char&idx) { 251 | int p = static_cast(reinterpret_cast(&idx)); 252 | auto src_stride = src.StrideBytes[p]; 253 | auto src_ptr = src.SrcPointers[p]; 254 | auto dst_stride = dst.StrideBytes[p]; 255 | auto dst_ptr = dst.DstPointers[p]; 256 | 257 | engine->process_plane(n, p, dst_ptr, dst_stride, src_ptr, src_stride); 258 | }; 259 | 260 | #ifdef ENABLE_PAR 261 | if(mt) 262 | std::for_each_n(PAR_POLICY, reinterpret_cast(0), in_vi.Format.Planes, core); 263 | else 264 | #endif 265 | for (intptr_t i = 0; i < in_vi.Format.Planes; i++) 266 | core(*reinterpret_cast(i)); 267 | 268 | return dst; 269 | } 270 | 271 | DSVideoInfo GetOutputVI() override 272 | { 273 | return out_vi; 274 | } 275 | 276 | ~F3KDB() = default; 277 | }; 278 | -------------------------------------------------------------------------------- /VCL2/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | 179 | Copyright 2012-2019 Agner Fog. 180 | 181 | Licensed under the Apache License, Version 2.0 (the "License"); 182 | you may not use this file except in compliance with the License. 183 | You may obtain a copy of the License at 184 | 185 | http://www.apache.org/licenses/LICENSE-2.0 186 | 187 | Unless required by applicable law or agreed to in writing, software 188 | distributed under the License is distributed on an "AS IS" BASIS, 189 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 190 | See the License for the specific language governing permissions and 191 | limitations under the License. 192 | -------------------------------------------------------------------------------- /src/core.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "core.h" 7 | #include "constants.h" 8 | #include "random.h" 9 | #include "impl_dispatch.h" 10 | 11 | #ifdef _WIN32 12 | #include 13 | #endif 14 | 15 | void f3kdb_core_t::destroy_frame_luts(void) 16 | { 17 | _aligned_free(_y_info); 18 | _aligned_free(_cb_info); 19 | _aligned_free(_cr_info); 20 | 21 | _y_info = NULL; 22 | _cb_info = NULL; 23 | _cr_info = NULL; 24 | 25 | _aligned_free(_grain_buffer_y); 26 | _aligned_free(_grain_buffer_c); 27 | 28 | _grain_buffer_y = NULL; 29 | _grain_buffer_c = NULL; 30 | 31 | free(_grain_buffer_offsets); 32 | _grain_buffer_offsets = NULL; 33 | 34 | // contexts are likely to be dependent on lut, so they must also be destroyed 35 | destroy_context(&_y_context); 36 | destroy_context(&_cb_context); 37 | destroy_context(&_cr_context); 38 | } 39 | 40 | static int inline min_multi( int first, ... ) 41 | { 42 | int ret = first, i = first; 43 | va_list marker; 44 | 45 | va_start( marker, first ); 46 | while( i >= 0 ) 47 | { 48 | if (i < ret) 49 | { 50 | ret = i; 51 | } 52 | i = va_arg( marker, int); 53 | } 54 | va_end( marker ); 55 | return ret; 56 | } 57 | 58 | static int get_frame_lut_stride(int width_in_pixels) 59 | { 60 | // whole multiples of alignment, so SSE codes don't need to check boundaries 61 | int width = width_in_pixels; 62 | return (((width - 1) | (FRAME_LUT_ALIGNMENT - 1)) + 1); 63 | } 64 | 65 | static short* generate_grain_buffer(size_t item_count, RANDOM_ALGORITHM algo, int& seed, double param, int range) 66 | { 67 | short* buffer = (short*)_aligned_malloc(item_count * sizeof(short), FRAME_LUT_ALIGNMENT); 68 | for (size_t i = 0; i < item_count; i++) 69 | { 70 | *(buffer + i) = random(algo, seed, range, param); 71 | } 72 | return buffer; 73 | } 74 | 75 | void f3kdb_core_t::init_frame_luts(void) 76 | { 77 | destroy_frame_luts(); 78 | 79 | int seed = 0x92D68CA2 - _params.seed; 80 | 81 | seed ^= (_video_info.Width << 16) ^ _video_info.Height; 82 | seed ^= (_video_info.Frames << 16) ^ _video_info.Frames; 83 | 84 | int height_in_pixels = _video_info.Height; 85 | int width_in_pixels = _video_info.Width; 86 | 87 | int y_stride; 88 | y_stride = get_frame_lut_stride(width_in_pixels); 89 | 90 | int y_size = sizeof(pixel_dither_info) * y_stride * height_in_pixels; 91 | _y_info = (pixel_dither_info*)_aligned_malloc(y_size, FRAME_LUT_ALIGNMENT); 92 | 93 | // ensure unused items are also initialized 94 | memset(_y_info, 0, y_size); 95 | 96 | int c_stride; 97 | c_stride = get_frame_lut_stride(width_in_pixels >> _video_info.Format.SSW); 98 | int c_size = sizeof(pixel_dither_info) * c_stride * (height_in_pixels >> _video_info.Format.SSH); 99 | _cb_info = (pixel_dither_info*)_aligned_malloc(c_size, FRAME_LUT_ALIGNMENT); 100 | _cr_info = (pixel_dither_info*)_aligned_malloc(c_size, FRAME_LUT_ALIGNMENT); 101 | 102 | memset(_cb_info, 0, c_size); 103 | memset(_cr_info, 0, c_size); 104 | 105 | pixel_dither_info *y_info_ptr, *cb_info_ptr, *cr_info_ptr; 106 | 107 | int width_subsamp = _video_info.Format.SSW; 108 | int height_subsamp = _video_info.Format.SSH; 109 | 110 | for (int y = 0; y < height_in_pixels; y++) 111 | { 112 | y_info_ptr = _y_info + y * y_stride; 113 | cb_info_ptr = _cb_info + (y >> height_subsamp) * c_stride; 114 | cr_info_ptr = _cr_info + (y >> height_subsamp) * c_stride; 115 | 116 | for (int x = 0; x < width_in_pixels; x++) 117 | { 118 | pixel_dither_info info_y = {0, 0, 0}; 119 | info_y.change = random(_params.random_algo_grain, seed, _params.grainY, _params.random_param_grain); 120 | 121 | int x_range = min_multi(_params.range, x, width_in_pixels - x - 1, -1); 122 | int y_range = min_multi(_params.range, y, height_in_pixels - y - 1, -1); 123 | int cur_range = [&]() { 124 | switch (_params.sample_mode) 125 | { 126 | case 1: 127 | return y_range; 128 | 129 | case 3: 130 | return x_range; 131 | 132 | case 2: 133 | case 4: 134 | case 5: 135 | case 6: 136 | case 7: 137 | return min_multi(x_range, y_range, -1); 138 | 139 | default: // unlikely 140 | return 0; 141 | } 142 | }(); 143 | 144 | if (cur_range > 0) { 145 | info_y.ref1 = (signed char)random(_params.random_algo_ref, seed, cur_range, _params.random_param_ref); 146 | if (_params.sample_mode == 2) 147 | { 148 | info_y.ref2 = (signed char)random(_params.random_algo_ref, seed, cur_range, _params.random_param_ref); 149 | } 150 | if (_params.sample_mode > 0) 151 | { 152 | info_y.ref1 = abs(info_y.ref1); 153 | info_y.ref2 = abs(info_y.ref2); 154 | } 155 | } 156 | 157 | *y_info_ptr = info_y; 158 | 159 | bool should_set_c = false; 160 | should_set_c = ((x & ( ( 1 << width_subsamp ) - 1)) == 0 && 161 | (y & ( ( 1 << height_subsamp ) - 1)) == 0); 162 | 163 | if (should_set_c) { 164 | pixel_dither_info info_cb = info_y; 165 | pixel_dither_info info_cr = info_cb; 166 | 167 | // don't shift ref values here, since subsampling of width and height may be different 168 | // shift them in actual processing 169 | 170 | info_cb.change = random(_params.random_algo_grain, seed, _params.grainC, _params.random_param_grain); 171 | info_cr.change = random(_params.random_algo_grain, seed, _params.grainC, _params.random_param_grain); 172 | 173 | *cb_info_ptr = info_cb; 174 | *cr_info_ptr = info_cr; 175 | cb_info_ptr++; 176 | cr_info_ptr++; 177 | } 178 | y_info_ptr++; 179 | } 180 | } 181 | 182 | int multiplier = _params.dynamic_grain ? 3 : 1; 183 | int item_count = width_in_pixels; 184 | 185 | // add some safety margin and align it 186 | item_count += 255; 187 | item_count &= 0xffffff80; 188 | 189 | item_count *= height_in_pixels; 190 | 191 | _grain_buffer_y = generate_grain_buffer( 192 | item_count * multiplier, 193 | _params.random_algo_grain, 194 | seed, 195 | _params.random_param_grain, 196 | _params.grainY); 197 | 198 | // we always generate a full-sized buffer to simplify offset calculation 199 | _grain_buffer_c = generate_grain_buffer( 200 | item_count * multiplier, 201 | _params.random_algo_grain, 202 | seed, 203 | _params.random_param_grain, 204 | _params.grainC); 205 | 206 | if (_params.dynamic_grain) 207 | { 208 | // Pre-generate offset here so that result is deterministic even if we request frame in different order 209 | _grain_buffer_offsets = (int*)malloc(sizeof(int) * _video_info.Frames); 210 | for (int i = 0; i < _video_info.Frames; i++) 211 | { 212 | int offset = item_count + random(RANDOM_ALGORITHM_UNIFORM, seed, item_count, DEFAULT_RANDOM_PARAM); 213 | offset &= 0xfffffff0; // align to 16-byte for SSE codes 214 | 215 | assert(offset >= 0); 216 | 217 | _grain_buffer_offsets[i] = offset; 218 | } 219 | } 220 | } 221 | 222 | f3kdb_core_t::f3kdb_core_t(DSVideoInfo vi, const f3kdb_params_t params, OPTIMIZATION_MODE opt) : 223 | _process_plane_impl(NULL), 224 | _y_info(NULL), 225 | _cb_info(NULL), 226 | _cr_info(NULL), 227 | _grain_buffer_y(NULL), 228 | _grain_buffer_c(NULL), 229 | _grain_buffer_offsets(NULL), 230 | _video_info(vi), 231 | _opt(opt), 232 | _params(params) 233 | { 234 | this->init(); 235 | } 236 | 237 | f3kdb_core_t::~f3kdb_core_t() 238 | { 239 | destroy_frame_luts(); 240 | } 241 | 242 | static __inline int select_impl_index(int sample_mode, bool blur_first) 243 | { 244 | assert(sample_mode != 0); 245 | return sample_mode * 2 + (blur_first ? 0 : 1) - 1; 246 | } 247 | 248 | void f3kdb_core_t::init(void) 249 | { 250 | init_context(&_y_context); 251 | init_context(&_cb_context); 252 | init_context(&_cr_context); 253 | 254 | init_frame_luts(); 255 | 256 | const process_plane_impl_t* impl_table = process_plane_impls[_params.dither_algo][(int)_opt]; 257 | _process_plane_impl = impl_table[select_impl_index(_params.sample_mode, _params.blur_first)]; 258 | } 259 | 260 | void f3kdb_core_t::process_plane(int frame_index, int plane, unsigned char* dst_frame_ptr, int dst_pitch, const unsigned char* src_frame_ptr, int src_pitch) 261 | { 262 | process_plane_params params; 263 | 264 | memset(¶ms, 0, sizeof(process_plane_params)); 265 | 266 | params.src_plane_ptr = src_frame_ptr; 267 | params.src_pitch = src_pitch; 268 | 269 | params.dst_plane_ptr = dst_frame_ptr; 270 | params.dst_pitch = dst_pitch; 271 | 272 | params.input_mode = _video_info.Format.BitsPerSample == 8 ? LOW_BIT_DEPTH : HIGH_BIT_DEPTH_INTERLEAVED; 273 | params.input_depth = _video_info.Format.BitsPerSample; 274 | params.output_mode = _params.output_depth <= 8 ? LOW_BIT_DEPTH : HIGH_BIT_DEPTH_INTERLEAVED; 275 | params.output_depth = _params.output_depth; 276 | params.angle_boost = _params.angle_boost; 277 | params.max_angle = _params.max_angle; 278 | 279 | params.plane = plane; 280 | 281 | params.width_subsampling = plane == 0 ? 0 : _video_info.Format.SSW; 282 | params.height_subsampling = plane == 0 ? 0 : _video_info.Format.SSH; 283 | 284 | params.plane_width_in_pixels = plane == 0 ? _video_info.Width : (_video_info.Width >> _video_info.Format.SSW); 285 | params.plane_height_in_pixels = plane == 0 ? _video_info.Height : (_video_info.Height >> _video_info.Format.SSH); 286 | 287 | params.info_stride = get_frame_lut_stride(params.plane_width_in_pixels); 288 | params.grain_buffer_stride = get_frame_lut_stride(params.plane_width_in_pixels); 289 | 290 | process_plane_context* context; 291 | 292 | int grain_setting = 0; 293 | 294 | switch (plane) 295 | { 296 | case 0: 297 | params.info_ptr_base = _y_info; 298 | params.threshold = _params.Y; 299 | params.threshold1 = _params.Y_1; 300 | params.threshold2 = _params.Y_2; 301 | params.pixel_max = _params.keep_tv_range ? TV_RANGE_Y_MAX : FULL_RANGE_Y_MAX; 302 | params.pixel_min = _params.keep_tv_range ? TV_RANGE_Y_MIN : FULL_RANGE_Y_MIN; 303 | params.grain_buffer = _grain_buffer_y; 304 | grain_setting = _params.grainY; 305 | context = &_y_context; 306 | break; 307 | case 1: 308 | params.info_ptr_base = _cb_info; 309 | params.threshold = _params.Cb; 310 | params.threshold1 = _params.Cb_1; 311 | params.threshold2 = _params.Cb_2; 312 | params.pixel_max = _params.keep_tv_range ? TV_RANGE_C_MAX : FULL_RANGE_C_MAX; 313 | params.pixel_min = _params.keep_tv_range ? TV_RANGE_C_MIN : FULL_RANGE_C_MIN; 314 | params.grain_buffer = _grain_buffer_c; 315 | grain_setting = _params.grainC; 316 | context = &_cb_context; 317 | break; 318 | case 2: 319 | params.info_ptr_base = _cr_info; 320 | params.threshold = _params.Cr; 321 | params.threshold1 = _params.Cr_1; 322 | params.threshold2 = _params.Cr_2; 323 | params.pixel_max = _params.keep_tv_range ? TV_RANGE_C_MAX : FULL_RANGE_C_MAX; 324 | params.pixel_min = _params.keep_tv_range ? TV_RANGE_C_MIN : FULL_RANGE_C_MIN; 325 | params.grain_buffer = _grain_buffer_c; 326 | grain_setting = _params.grainC; 327 | context = &_cr_context; 328 | break; 329 | default: 330 | abort(); 331 | } 332 | 333 | if (_grain_buffer_offsets) 334 | { 335 | params.grain_buffer += _grain_buffer_offsets[frame_index % _video_info.Frames]; 336 | } 337 | 338 | bool copy_plane = false; 339 | if (_video_info.Format.BitsPerSample == _params.output_depth && 340 | grain_setting == 0 && 341 | params.threshold == 0 && params.threshold1 == 0 && params.threshold2 == 0) 342 | { 343 | copy_plane = true; 344 | } 345 | 346 | if (copy_plane) { 347 | // no need to process 348 | int line_size = params.get_src_width(); 349 | auto src = src_frame_ptr; 350 | auto dst = dst_frame_ptr; 351 | if (line_size == src_pitch && src_pitch == dst_pitch) 352 | { 353 | memcpy(dst, src, line_size * params.get_src_height()); 354 | } else { 355 | for (int row = 0; row < params.get_src_height(); row++) 356 | { 357 | memcpy(dst, src, line_size); 358 | src += src_pitch; 359 | dst += dst_pitch; 360 | } 361 | } 362 | return; 363 | } 364 | 365 | _process_plane_impl(params, context); 366 | } 367 | -------------------------------------------------------------------------------- /VCL2/vectormath_common.h: -------------------------------------------------------------------------------- 1 | /*************************** vectormath_common.h **************************** 2 | * Author: Agner Fog 3 | * Date created: 2014-04-18 4 | * Last modified: 2022-07-20 5 | * Version: 2.02.00 6 | * Project: vector classes 7 | * Description: 8 | * Header file containing common code for inline version of mathematical functions. 9 | * 10 | * For detailed instructions, see VectorClass.pdf 11 | * 12 | * (c) Copyright 2014-2022 Agner Fog. 13 | * Apache License version 2.0 or later. 14 | ******************************************************************************/ 15 | 16 | #ifndef VECTORMATH_COMMON_H 17 | #define VECTORMATH_COMMON_H 2 18 | 19 | #ifdef VECTORMATH_LIB_H 20 | #error conflicting header files. More than one implementation of mathematical functions included 21 | #endif 22 | 23 | #include 24 | 25 | #ifndef VECTORCLASS_H 26 | #include "vectorclass.h" 27 | #endif 28 | 29 | #if VECTORCLASS_H < 20200 30 | #error Incompatible versions of vector class library mixed 31 | #endif 32 | 33 | 34 | /****************************************************************************** 35 | Define NAN payload values 36 | ******************************************************************************/ 37 | #define NAN_LOG 0x101 // logarithm for x<0 38 | #define NAN_POW 0x102 // negative number raised to non-integer power 39 | #define NAN_HYP 0x104 // acosh for x<1 and atanh for abs(x)>1 40 | 41 | 42 | /****************************************************************************** 43 | Define mathematical constants 44 | ******************************************************************************/ 45 | #define VM_PI 3.14159265358979323846 // pi 46 | #define VM_PI_2 1.57079632679489661923 // pi / 2 47 | #define VM_PI_4 0.785398163397448309616 // pi / 4 48 | #define VM_SQRT2 1.41421356237309504880 // sqrt(2) 49 | #define VM_LOG2E 1.44269504088896340736 // 1/log(2) 50 | #define VM_LOG10E 0.434294481903251827651 // 1/log(10) 51 | #define VM_LOG210 3.321928094887362347808 // log2(10) 52 | #define VM_LN2 0.693147180559945309417 // log(2) 53 | #define VM_LN10 2.30258509299404568402 // log(10) 54 | #define VM_SMALLEST_NORMAL 2.2250738585072014E-308 // smallest normal number, double 55 | #define VM_SMALLEST_NORMALF 1.17549435E-38f // smallest normal number, float 56 | 57 | 58 | #ifdef VCL_NAMESPACE 59 | namespace VCL_NAMESPACE { 60 | #endif 61 | 62 | /****************************************************************************** 63 | templates for producing infinite and nan in desired vector type 64 | ******************************************************************************/ 65 | template 66 | static inline VTYPE infinite_vec(); 67 | 68 | template <> 69 | inline Vec2d infinite_vec() { 70 | return infinite2d(); 71 | } 72 | 73 | template <> 74 | inline Vec4f infinite_vec() { 75 | return infinite4f(); 76 | } 77 | 78 | #if MAX_VECTOR_SIZE >= 256 79 | 80 | template <> 81 | inline Vec4d infinite_vec() { 82 | return infinite4d(); 83 | } 84 | 85 | template <> 86 | inline Vec8f infinite_vec() { 87 | return infinite8f(); 88 | } 89 | 90 | #endif // MAX_VECTOR_SIZE >= 256 91 | 92 | #if MAX_VECTOR_SIZE >= 512 93 | 94 | template <> 95 | inline Vec8d infinite_vec() { 96 | return infinite8d(); 97 | } 98 | 99 | template <> 100 | inline Vec16f infinite_vec() { 101 | return infinite16f(); 102 | } 103 | 104 | #endif // MAX_VECTOR_SIZE >= 512 105 | 106 | 107 | 108 | /****************************************************************************** 109 | * Detect NAN codes 110 | * 111 | * These functions return the code hidden in a NAN. The sign bit is ignored 112 | ******************************************************************************/ 113 | 114 | static inline Vec4ui nan_code(Vec4f const x) { 115 | Vec4ui a = Vec4ui(reinterpret_i(x)); 116 | Vec4ui const n = 0x007FFFFF; 117 | return select(Vec4ib(is_nan(x)), a & n, 0); 118 | } 119 | 120 | // This function returns the code hidden in a NAN. The sign bit is ignored 121 | static inline Vec2uq nan_code(Vec2d const x) { 122 | Vec2uq a = Vec2uq(reinterpret_i(x)); 123 | return select(Vec2qb(is_nan(x)), a << 12 >> (12+29), 0); 124 | } 125 | 126 | #if MAX_VECTOR_SIZE >= 256 127 | 128 | // This function returns the code hidden in a NAN. The sign bit is ignored 129 | static inline Vec8ui nan_code(Vec8f const x) { 130 | Vec8ui a = Vec8ui(reinterpret_i(x)); 131 | Vec8ui const n = 0x007FFFFF; 132 | return select(Vec8ib(is_nan(x)), a & n, 0); 133 | } 134 | 135 | // This function returns the code hidden in a NAN. The sign bit is ignored 136 | static inline Vec4uq nan_code(Vec4d const x) { 137 | Vec4uq a = Vec4uq(reinterpret_i(x)); 138 | return select(Vec4qb(is_nan(x)), a << 12 >> (12+29), 0); 139 | } 140 | 141 | #endif // MAX_VECTOR_SIZE >= 256 142 | #if MAX_VECTOR_SIZE >= 512 143 | 144 | // This function returns the code hidden in a NAN. The sign bit is ignored 145 | static inline Vec16ui nan_code(Vec16f const x) { 146 | Vec16ui a = Vec16ui(reinterpret_i(x)); 147 | Vec16ui const n = 0x007FFFFF; 148 | return select(Vec16ib(is_nan(x)), a & n, 0); 149 | } 150 | 151 | // This function returns the code hidden in a NAN. The sign bit is ignored 152 | static inline Vec8uq nan_code(Vec8d const x) { 153 | Vec8uq a = Vec8uq(reinterpret_i(x)); 154 | return select(Vec8qb(is_nan(x)), a << 12 >> (12+29), 0); 155 | } 156 | 157 | #endif // MAX_VECTOR_SIZE >= 512 158 | 159 | 160 | /****************************************************************************** 161 | templates for polynomials 162 | Using Estrin's scheme to make shorter dependency chains and use FMA, starting 163 | longest dependency chains first. 164 | ******************************************************************************/ 165 | 166 | // template 167 | template 168 | static inline VTYPE polynomial_2(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2) { 169 | // calculates polynomial c2*x^2 + c1*x + c0 170 | // VTYPE may be a vector type, CTYPE is a scalar type 171 | VTYPE x2 = x * x; 172 | //return = x2 * c2 + (x * c1 + c0); 173 | return mul_add(x2, c2, mul_add(x, c1, c0)); 174 | } 175 | 176 | template 177 | static inline VTYPE polynomial_3(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3) { 178 | // calculates polynomial c3*x^3 + c2*x^2 + c1*x + c0 179 | // VTYPE may be a vector type, CTYPE is a scalar type 180 | VTYPE x2 = x * x; 181 | //return (c2 + c3*x)*x2 + (c1*x + c0); 182 | return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)); 183 | } 184 | 185 | template 186 | static inline VTYPE polynomial_4(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4) { 187 | // calculates polynomial c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0 188 | // VTYPE may be a vector type, CTYPE is a scalar type 189 | VTYPE x2 = x * x; 190 | VTYPE x4 = x2 * x2; 191 | //return (c2+c3*x)*x2 + ((c0+c1*x) + c4*x4); 192 | return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + c4*x4); 193 | } 194 | 195 | template 196 | static inline VTYPE polynomial_4n(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3) { 197 | // calculates polynomial 1*x^4 + c3*x^3 + c2*x^2 + c1*x + c0 198 | // VTYPE may be a vector type, CTYPE is a scalar type 199 | VTYPE x2 = x * x; 200 | VTYPE x4 = x2 * x2; 201 | //return (c2+c3*x)*x2 + ((c0+c1*x) + x4); 202 | return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + x4); 203 | } 204 | 205 | template 206 | static inline VTYPE polynomial_5(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5) { 207 | // calculates polynomial c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0 208 | // VTYPE may be a vector type, CTYPE is a scalar type 209 | VTYPE x2 = x * x; 210 | VTYPE x4 = x2 * x2; 211 | //return (c2+c3*x)*x2 + ((c4+c5*x)*x4 + (c0+c1*x)); 212 | return mul_add(mul_add(c3, x, c2), x2, mul_add(mul_add(c5, x, c4), x4, mul_add(c1, x, c0))); 213 | } 214 | 215 | template 216 | static inline VTYPE polynomial_5n(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4) { 217 | // calculates polynomial 1*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0 218 | // VTYPE may be a vector type, CTYPE is a scalar type 219 | VTYPE x2 = x * x; 220 | VTYPE x4 = x2 * x2; 221 | //return (c2+c3*x)*x2 + ((c4+x)*x4 + (c0+c1*x)); 222 | return mul_add(mul_add(c3, x, c2), x2, mul_add(c4 + x, x4, mul_add(c1, x, c0))); 223 | } 224 | 225 | template 226 | static inline VTYPE polynomial_6(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6) { 227 | // calculates polynomial c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0 228 | // VTYPE may be a vector type, CTYPE is a scalar type 229 | VTYPE x2 = x * x; 230 | VTYPE x4 = x2 * x2; 231 | //return (c4+c5*x+c6*x2)*x4 + ((c2+c3*x)*x2 + (c0+c1*x)); 232 | return mul_add(mul_add(c6, x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))); 233 | } 234 | 235 | template 236 | static inline VTYPE polynomial_6n(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5) { 237 | // calculates polynomial 1*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0 238 | // VTYPE may be a vector type, CTYPE is a scalar type 239 | VTYPE x2 = x * x; 240 | VTYPE x4 = x2 * x2; 241 | //return (c4+c5*x+x2)*x4 + ((c2+c3*x)*x2 + (c0+c1*x)); 242 | return mul_add(mul_add(c5, x, c4 + x2), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))); 243 | } 244 | 245 | template 246 | static inline VTYPE polynomial_7(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7) { 247 | // calculates polynomial c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0 248 | // VTYPE may be a vector type, CTYPE is a scalar type 249 | VTYPE x2 = x * x; 250 | VTYPE x4 = x2 * x2; 251 | //return ((c6+c7*x)*x2 + (c4+c5*x))*x4 + ((c2+c3*x)*x2 + (c0+c1*x)); 252 | return mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))); 253 | } 254 | 255 | template 256 | static inline VTYPE polynomial_8(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8) { 257 | // calculates polynomial c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0 258 | // VTYPE may be a vector type, CTYPE is a scalar type 259 | VTYPE x2 = x * x; 260 | VTYPE x4 = x2 * x2; 261 | VTYPE x8 = x4 * x4; 262 | //return ((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8*x8 + (c2+c3*x)*x2 + (c0+c1*x)); 263 | return mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, 264 | mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + c8*x8)); 265 | } 266 | 267 | template 268 | static inline VTYPE polynomial_9(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9) { 269 | // calculates polynomial c9*x^9 + c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0 270 | // VTYPE may be a vector type, CTYPE is a scalar type 271 | VTYPE x2 = x * x; 272 | VTYPE x4 = x2 * x2; 273 | VTYPE x8 = x4 * x4; 274 | //return (((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8+c9*x)*x8) + ((c2+c3*x)*x2 + (c0+c1*x)); 275 | return mul_add(mul_add(c9, x, c8), x8, mul_add( 276 | mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, 277 | mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)))); 278 | } 279 | 280 | template 281 | static inline VTYPE polynomial_10(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10) { 282 | // calculates polynomial c10*x^10 + c9*x^9 + c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0 283 | // VTYPE may be a vector type, CTYPE is a scalar type 284 | VTYPE x2 = x * x; 285 | VTYPE x4 = x2 * x2; 286 | VTYPE x8 = x4 * x4; 287 | //return (((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8+c9*x+c10*x2)*x8) + ((c2+c3*x)*x2 + (c0+c1*x)); 288 | return mul_add(mul_add(x2, c10, mul_add(c9, x, c8)), x8, 289 | mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, 290 | mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)))); 291 | } 292 | 293 | template 294 | static inline VTYPE polynomial_13(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10, CTYPE c11, CTYPE c12, CTYPE c13) { 295 | // calculates polynomial c13*x^13 + c12*x^12 + ... + c1*x + c0 296 | // VTYPE may be a vector type, CTYPE is a scalar type 297 | VTYPE x2 = x * x; 298 | VTYPE x4 = x2 * x2; 299 | VTYPE x8 = x4 * x4; 300 | return mul_add( 301 | mul_add( 302 | mul_add(c13, x, c12), x4, 303 | mul_add(mul_add(c11, x, c10), x2, mul_add(c9, x, c8))), x8, 304 | mul_add( 305 | mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, 306 | mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)))); 307 | } 308 | 309 | 310 | template 311 | static inline VTYPE polynomial_13m(VTYPE const x, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10, CTYPE c11, CTYPE c12, CTYPE c13) { 312 | // calculates polynomial c13*x^13 + c12*x^12 + ... + x + 0 313 | // VTYPE may be a vector type, CTYPE is a scalar type 314 | VTYPE x2 = x * x; 315 | VTYPE x4 = x2 * x2; 316 | VTYPE x8 = x4 * x4; 317 | // return ((c8+c9*x) + (c10+c11*x)*x2 + (c12+c13*x)*x4)*x8 + (((c6+c7*x)*x2 + (c4+c5*x))*x4 + ((c2+c3*x)*x2 + x)); 318 | return mul_add( 319 | mul_add(mul_add(c13, x, c12), x4, mul_add(mul_add(c11, x, c10), x2, mul_add(c9, x, c8))), x8, 320 | mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, x))); 321 | } 322 | 323 | #ifdef VCL_NAMESPACE 324 | } 325 | #endif 326 | 327 | #endif 328 | --------------------------------------------------------------------------------