├── .gitignore
├── src
    ├── version.hpp.in
    ├── utils.h
    ├── impl_dispatch.h
    ├── random.h
    ├── flash3kyuu_deband_impl_avx2.cpp
    ├── flash3kyuu_deband_impl_avx512.cpp
    ├── process_plane_context.h
    ├── flash3kyuu_deband_impl_sse4.cpp
    ├── process_plane_context.cpp
    ├── constants.h
    ├── pixel_proc_c_high_no_dithering.h
    ├── neo_f3kdb.hpp
    ├── sse_utils.h
    ├── version.rc.in
    ├── pixel_proc_c_high_bit_depth_common.h
    ├── compiler_compat.h
    ├── pixel_proc_c_16bit.h
    ├── bit_utils.h
    ├── f3kdb.h
    ├── impl_dispatch.cpp
    ├── pixel_proc_c.h
    ├── random.cpp
    ├── pixel_proc_c_high_ordered_dithering.h
    ├── core.h
    ├── impl_dispatch_decl.h
    ├── pixel_proc_c_high_f_s_dithering.h
    ├── cpuid.cpp
    ├── dither_high.h
    ├── f3kdb.hpp
    └── core.cpp
├── debian
    ├── changelog
    ├── rules
    ├── control
    └── copyright
├── main.cpp
├── include
    ├── avisynth
    │   └── avs
    │   │   ├── filesystem.h
    │   │   ├── minmax.h
    │   │   ├── types.h
    │   │   ├── win.h
    │   │   ├── cpuid.h
    │   │   ├── capi.h
    │   │   ├── alignment.h
    │   │   ├── posix.h
    │   │   └── config.h
    ├── dualsynth
    │   ├── ds_videoinfo.hpp
    │   ├── ds_common.hpp
    │   ├── ds_filter.hpp
    │   ├── ds_format.hpp
    │   ├── avs_wrapper.hpp
    │   ├── vs_wrapper.hpp
    │   └── ds_frame.hpp
    └── vapoursynth
    │   ├── VSScript.h
    │   └── VSHelper.h
├── .github
    └── workflows
    │   └── windows.yml
├── VCL2
    ├── vectorclass.h
    ├── instrset_detect.cpp
    ├── LICENSE
    └── vectormath_common.h
├── README.md
└── CMakeLists.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | *.user
2 | *.dyn
3 | *.obj
4 | *.sublime-*
5 | build
6 | *.DS_Store
7 | 


--------------------------------------------------------------------------------
/src/version.hpp.in:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #define PLUGIN_VERSION "@PROJECT_VERSION_STRING_FULL@"


--------------------------------------------------------------------------------
/debian/changelog:
--------------------------------------------------------------------------------
1 | neo-f3kdb (0.07) unstable; urgency=medium
2 | 
3 |   * Initial release
4 | 
5 |  -- Xinyue Lu <i@7086.in>  Fri, 01 Jan 2021 00:00:00 -0500
6 | 


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
1 | #include <ds_common.hpp>
2 | #include <avs_wrapper.hpp>
3 | #include <vs_wrapper.hpp>
4 | #include <ds_filter.hpp>
5 | #include "src/neo_f3kdb.hpp"
6 | 


--------------------------------------------------------------------------------
/src/utils.h:
--------------------------------------------------------------------------------
 1 | 
 2 | static inline int clamp_pixel(int pixel, int min, int max)
 3 | {
 4 |     if (pixel > max) {
 5 |         pixel = max;
 6 |     } else if (pixel < min) {
 7 |         pixel = min;
 8 |     }
 9 |     return pixel;
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/src/impl_dispatch.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "core.h"
 4 | 
 5 | extern const process_plane_impl_t** process_plane_impls[];
 6 | 
 7 | #define DITHER_CONTEXT_BUFFER_SIZE 8192
 8 | 
 9 | #define CONTEXT_BUFFER_SIZE DITHER_CONTEXT_BUFFER_SIZE
10 | 


--------------------------------------------------------------------------------
/src/random.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "constants.h"
 4 | #include "f3kdb.h"
 5 | 
 6 | #define DEFAULT_RANDOM_PARAM 1.0
 7 | 
 8 | // returns a random number in [-range, range]
 9 | int random(RANDOM_ALGORITHM algo, int& seed, int range, double param);
10 | 


--------------------------------------------------------------------------------
/src/flash3kyuu_deband_impl_avx2.cpp:
--------------------------------------------------------------------------------
1 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
2 | #include <immintrin.h>
3 | #include "flash3kyuu_deband_avx2_base.h"
4 | 
5 | #define DECLARE_IMPL_AVX2
6 | #include "impl_dispatch_decl.h"
7 | #endif
8 | 


--------------------------------------------------------------------------------
/src/flash3kyuu_deband_impl_avx512.cpp:
--------------------------------------------------------------------------------
1 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
2 | #include <immintrin.h>
3 | #include "flash3kyuu_deband_avx512_base.h"
4 | 
5 | #define DECLARE_IMPL_AVX512
6 | #include "impl_dispatch_decl.h"
7 | #endif
8 | 


--------------------------------------------------------------------------------
/src/process_plane_context.h:
--------------------------------------------------------------------------------
 1 | 
 2 | typedef void (*destroy_data_t)(void* data);
 3 | 
 4 | typedef struct _process_plane_context
 5 | {
 6 | 	void* data;
 7 | 	destroy_data_t destroy;
 8 | } process_plane_context;
 9 | 
10 | void destroy_context(process_plane_context* context);
11 | 
12 | void init_context(process_plane_context* context);


--------------------------------------------------------------------------------
/src/flash3kyuu_deband_impl_sse4.cpp:
--------------------------------------------------------------------------------
 1 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
 2 | #include <smmintrin.h>
 3 | #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
 4 | #include "sse2neon.h"
 5 | #endif
 6 | #include "flash3kyuu_deband_sse_base.h"
 7 | 
 8 | #define DECLARE_IMPL_SSE4
 9 | #include "impl_dispatch_decl.h"
10 | 


--------------------------------------------------------------------------------
/include/avisynth/avs/filesystem.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Snippet copied from filesystem/README.md
 4 | 
 5 | #if defined(__cplusplus) && __cplusplus >= 201703L && defined(__has_include)
 6 | #if __has_include(<filesystem>)
 7 | #define GHC_USE_STD_FS
 8 | #include <filesystem>
 9 | namespace fs = std::filesystem;
10 | #endif
11 | #endif
12 | #ifndef GHC_USE_STD_FS
13 | #include <ghc/filesystem.hpp>
14 | namespace fs = ghc::filesystem;
15 | #endif
16 | 


--------------------------------------------------------------------------------
/src/process_plane_context.cpp:
--------------------------------------------------------------------------------
 1 | #include "process_plane_context.h"
 2 | 
 3 | #include <cstring>
 4 | #include <assert.h>
 5 | 
 6 | void destroy_context(process_plane_context* context)
 7 | {
 8 |     assert(context);
 9 | 
10 |     if (context->data) {
11 |         assert(context->destroy);
12 |         context->destroy(context->data);
13 |         memset(context, 0, sizeof(process_plane_context));
14 |     }
15 | }
16 | 
17 | void init_context(process_plane_context* context)
18 | {
19 |     assert(context);
20 |     memset(context, 0, sizeof(process_plane_context));
21 | }
22 | 


--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | 
 3 | include /usr/share/dpkg/architecture.mk
 4 | 
 5 | %:
 6 | 	dh $@
 7 | 
 8 | override_dh_auto_install:
 9 | 	install -m 755 -D -t debian/dualsynth-neo-f3kdb-yuuki/usr/lib/$(DEB_HOST_MULTIARCH)/dualsynth obj-$(DEB_HOST_MULTIARCH)/libneo-f3kdb.so
10 | 	mkdir debian/dualsynth-neo-f3kdb-yuuki/usr/lib/$(DEB_HOST_MULTIARCH)/avisynth
11 | 	mkdir debian/dualsynth-neo-f3kdb-yuuki/usr/lib/$(DEB_HOST_MULTIARCH)/vapoursynth
12 | 	ln -s ../dualsynth/libneo-f3kdb.so debian/dualsynth-neo-f3kdb-yuuki/usr/lib/$(DEB_HOST_MULTIARCH)/avisynth/
13 | 	ln -s ../dualsynth/libneo-f3kdb.so debian/dualsynth-neo-f3kdb-yuuki/usr/lib/$(DEB_HOST_MULTIARCH)/vapoursynth/
14 | 


--------------------------------------------------------------------------------
/src/constants.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // alignment for SSE operations
 4 | #define FRAME_LUT_ALIGNMENT 16
 5 | 
 6 | #define INTERNAL_BIT_DEPTH 16
 7 | 
 8 | // these range values are defined in internal bit depth
 9 | #define TV_RANGE_Y_MIN (16 << (INTERNAL_BIT_DEPTH - 8))
10 | #define TV_RANGE_Y_MAX (235 << (INTERNAL_BIT_DEPTH - 8))
11 | 
12 | #define TV_RANGE_C_MIN TV_RANGE_Y_MIN
13 | #define TV_RANGE_C_MAX (240 << (INTERNAL_BIT_DEPTH - 8))
14 | 
15 | #define FULL_RANGE_Y_MIN 0
16 | #define FULL_RANGE_Y_MAX ((1 << INTERNAL_BIT_DEPTH) - 1)
17 | 
18 | #define FULL_RANGE_C_MIN FULL_RANGE_Y_MIN
19 | #define FULL_RANGE_C_MAX FULL_RANGE_Y_MAX
20 | 
21 | #define VALUE_8BIT(x) ( x >> ( INTERNAL_BIT_DEPTH - 8 ) )
22 | 
23 | 


--------------------------------------------------------------------------------
/src/pixel_proc_c_high_no_dithering.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | namespace pixel_proc_high_no_dithering {
 4 | 	
 5 | 	static inline void init_context(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth)
 6 | 	{
 7 | 		// nothing to do
 8 | 	}
 9 | 
10 | 	static inline void destroy_context(void* context)
11 | 	{
12 | 		// nothing to do
13 | 	}
14 | 
15 | 	static inline void next_pixel(void* context)
16 | 	{
17 | 		// nothing to do
18 | 	}
19 | 
20 | 	static inline void next_row(void* context)
21 | 	{
22 | 		// nothing to do
23 | 	}
24 | 
25 | 	static inline int dither(void* context, int pixel, int row, int column)
26 | 	{
27 | 		return pixel;
28 | 	}
29 | 
30 | 	#include "pixel_proc_c_high_bit_depth_common.h"
31 | 
32 | };


--------------------------------------------------------------------------------
/src/neo_f3kdb.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2020 Xinyue Lu
 3 |  *
 4 |  * DualSynth bridge - plugin.
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include "version.hpp"
11 | #include "f3kdb.hpp"
12 | 
13 | namespace Plugin {
14 |   const char* Identifier = "in.7086.neo_f3kdb";
15 |   const char* Namespace = "neo_f3kdb";
16 |   const char* Description = "Neo F3KDB Deband Filter " PLUGIN_VERSION;
17 | }
18 | 
19 | std::vector<register_vsfilter_proc> RegisterVSFilters()
20 | {
21 |   return std::vector<register_vsfilter_proc> { VSInterface::RegisterFilter<F3KDB> };
22 | }
23 | 
24 | std::vector<register_avsfilter_proc> RegisterAVSFilters()
25 | {
26 |   return std::vector<register_avsfilter_proc> { AVSInterface::RegisterFilter<F3KDB> };
27 | }
28 | 


--------------------------------------------------------------------------------
/src/sse_utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // See Intel Optimization Guide: Ch. 5.6.6.2 Clipping to an Arbitrary Unsigned Range [High, Low]
 4 | // high_add = 0xffff - high
 5 | // high_sub = 0xffff - high + low
 6 | static __m128i __forceinline high_bit_depth_pixels_clamp(__m128i pixels, __m128i high_add, __m128i high_sub, const __m128i& low)
 7 | {
 8 |     pixels = _mm_adds_epu16(pixels, high_add);
 9 |     pixels = _mm_subs_epu16(pixels, high_sub);
10 |     pixels = _mm_add_epi16(pixels, low);
11 | 
12 |     return pixels;
13 | }
14 | 
15 | 
16 | // like high_bit_depth_pixels_clamp, but all values are 8bit
17 | static __m128i __forceinline low_bit_depth_pixels_clamp(__m128i pixels, __m128i high_add, __m128i high_sub, const __m128i& low)
18 | {
19 |     pixels = _mm_adds_epu8(pixels, high_add);
20 |     pixels = _mm_subs_epu8(pixels, high_sub);
21 |     pixels = _mm_add_epi8(pixels, low);
22 | 
23 |     return pixels;
24 | }
25 | 


--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
 1 | Source: neo-f3kdb
 2 | Section: libs
 3 | Priority: optional
 4 | Maintainer: Xinyue Lu <i@7086.in>
 5 | Bugs: https://github.com/HomeOfAviSynthPlusEvolution/neo_f3kdb/issues
 6 | Homepage: https://github.com/HomeOfAviSynthPlusEvolution/neo_f3kdb
 7 | Standards-Version: 4.5.0
 8 | Build-Depends: debhelper-compat (= 12),
 9 |  avisynthplus-yuuki-dev,
10 |  liblsmash-yuuki-dev
11 | 
12 | # -yuuki tag is used for yuuki's own builds.
13 | # Replace it with your own tag for your own builds.
14 | 
15 | Package: dualsynth-neo-f3kdb-yuuki
16 | Architecture: any
17 | Multi-Arch: same
18 | Depends: ${shlibs:Depends}, ${misc:Depends}
19 | Provides: dualsynth-neo-f3kdb, neo-f3kdb
20 | Conflicts: dualsynth-neo-f3kdb, neo-f3kdb
21 | Description: Plugin for AviSynthPlus and VapourSynth: neo-f3kdb
22 |  F3KDB is a deband filter. It was originally written for AviUtl by bunyuchan and later ported to AviSynth by SAPikachu many years ago.
23 | 


--------------------------------------------------------------------------------
/src/version.rc.in:
--------------------------------------------------------------------------------
 1 | #include <winver.h>
 2 | 
 3 | VS_VERSION_INFO VERSIONINFO
 4 | FILEVERSION     @VERSION_MAJOR@, @VERSION_MINOR@, @VERSION_PATCH@, @VERSION_BUILD@
 5 | PRODUCTVERSION  @VERSION_MAJOR@, @VERSION_MINOR@, @VERSION_PATCH@, @VERSION_BUILD@
 6 | FILEFLAGSMASK   VS_FFI_FILEFLAGSMASK
 7 | @RC_FILEFLAGS_LINE@
 8 | FILEOS          VOS_NT_WINDOWS32
 9 | FILETYPE        VFT_DLL
10 | FILESUBTYPE     VFT2_UNKNOWN
11 | BEGIN
12 |     BLOCK "StringFileInfo"
13 |     BEGIN
14 |         BLOCK "040904B0"
15 |         BEGIN
16 |             VALUE "FileDescription",  "@FILE_DESCRIPTION@"
17 |             VALUE "FileVersion",      "@PROJECT_VERSION_STRING_FULL@"
18 |             VALUE "InternalName",     "@INTERNAL_NAME@"
19 |             VALUE "OriginalFilename", "@ORIGINAL_FILENAME@"
20 |             VALUE "ProductName",      "@PRODUCT_NAME@"
21 |             VALUE "ProductVersion",   "@PROJECT_VERSION_STRING_FULL@"
22 |         END
23 |     END
24 |     BLOCK "VarFileInfo"
25 |     BEGIN
26 |         VALUE "Translation", 0x0409, 1200
27 |     END
28 | END
29 | 


--------------------------------------------------------------------------------
/debian/copyright:
--------------------------------------------------------------------------------
 1 | Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
 2 | Upstream-Name: neo_f3kdb
 3 | Source: https://github.com/HomeOfAviSynthPlusEvolution/neo_f3kdb
 4 | 
 5 | Files: *
 6 | Copyright: 2019-2020 Xinyue Lu <i@7086.in>, and previous developers
 7 | License: GPL-3+
 8 |  This package is free software: you can redistribute it and/or modify
 9 |  it under the terms of the GNU General Public License as published by
10 |  the Free Software Foundation, either version 3 of the License, or
11 |  (at your option) any later version.
12 |  .
13 |  This package is distributed in the hope that it will be useful,
14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  GNU General Public License for more details.
17 |  .
18 |  You should have received a copy of the GNU General Public License
19 |  along with this program. If not, see <http://www.gnu.org/licenses/>
20 |  .
21 |  On Debian systems, the complete text of the GNU General
22 |  Public License version 3 can be found in "/usr/share/common-licenses/GPL-3".
23 | 


--------------------------------------------------------------------------------
/src/pixel_proc_c_high_bit_depth_common.h:
--------------------------------------------------------------------------------
 1 | #include "utils.h"
 2 | #include "constants.h"
 3 | 
 4 |     static inline int upsample(void* context, unsigned char pixel)
 5 |     {
 6 |         return pixel << (INTERNAL_BIT_DEPTH - 8);
 7 |     }
 8 | 
 9 | #if defined(HAS_DOWNSAMPLE)
10 | #undef HAS_DOWNSAMPLE
11 | #else
12 |     static inline int downsample(void* context, int pixel, int row, int column, int pixel_min, int pixel_max, int output_depth)
13 |     {
14 |         pixel = dither(context, pixel, row, column);
15 |         return clamp_pixel(pixel, pixel_min, pixel_max) >> (INTERNAL_BIT_DEPTH - output_depth);
16 |     }
17 | #endif
18 |     
19 |     static inline int avg_2(void* context, int pixel1, int pixel2)
20 |     {
21 |         return (pixel1 + pixel2 + 1) >> 1;
22 |     }
23 | 
24 |     static inline int avg_4(void* context, int pixel1, int pixel2, int pixel3, int pixel4)
25 |     {
26 |         // consistent with SSE code
27 |         int avg1 = (pixel1 + pixel2 + 1) >> 1;
28 |         int avg2 = (pixel3 + pixel4 + 1) >> 1;
29 |         if (avg1 > 0)
30 |         {
31 |             avg1 -= 1;
32 |         }
33 |         return (avg1 + avg2 + 1) >> 1;
34 |     }
35 | 
36 | 


--------------------------------------------------------------------------------
/src/compiler_compat.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifndef _MSC_VER
 4 | #include <cstring>
 5 | #include <stdio.h>
 6 | #define _stricmp strcasecmp
 7 | #define _strnicmp strncasecmp
 8 | #endif
 9 | 
10 | #ifndef _WIN32
11 | #include <stdlib.h>
12 | #define __forceinline inline
13 | #ifndef __cdecl
14 | #define __cdecl
15 | #endif
16 | #define _InterlockedCompareExchangePointer(a,b,c) __sync_val_compare_and_swap(a,c,b)
17 | 
18 | static inline void* _aligned_malloc(size_t size, size_t alignment)
19 | {
20 |     void *tmp;
21 |     if (posix_memalign(&tmp, alignment, size))
22 |     {
23 |         tmp = 0;
24 |     }
25 |     return tmp;
26 | }
27 | #define _aligned_free free
28 | #else
29 | #include <intrin.h>
30 |     // ICL complains about unresolved external symbol
31 |     #if __INTEL_COMPILER && !_WIN64
32 |     __forceinline void* _InterlockedCompareExchangePointer(
33 |         void* volatile *Destination, void* Exchange, void* Comperand) {
34 |     return (void*) _InterlockedCompareExchange((long volatile *) Destination, (long) Exchange, (long) Comperand);
35 |     }
36 |     #endif
37 | #endif
38 | 
39 | 
40 | #define ALIGNED_ARRAY(type, decl, alignment) alignas(alignment) type decl
41 | 


--------------------------------------------------------------------------------
/src/pixel_proc_c_16bit.h:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | 
 3 | namespace pixel_proc_16bit {
 4 |     
 5 |     static inline void init_context(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth)
 6 |     {
 7 |         // sanity check only
 8 |         assert(output_depth == 16);
 9 |     }
10 | 
11 |     static inline void destroy_context(void* context)
12 |     {
13 |         // nothing to do
14 |     }
15 | 
16 |     static inline void next_pixel(void* context)
17 |     {
18 |         // nothing to do
19 |     }
20 | 
21 |     static inline void next_row(void* context)
22 |     {
23 |         // nothing to do
24 |     }
25 | 
26 |     static inline int dither(void* context, int pixel, int row, int column)
27 |     {
28 |         return pixel;
29 |     }
30 | 
31 |     #define HAS_DOWNSAMPLE
32 | 
33 |     #include "pixel_proc_c_high_bit_depth_common.h"
34 | 
35 |     static inline int downsample(void* context, int pixel, int row, int column, int pixel_min, int pixel_max, int output_depth)
36 |     {
37 |         assert(output_depth == 16);
38 |         // I know the method name is totally wrong...
39 |         return clamp_pixel(pixel, pixel_min, pixel_max) << (output_depth - INTERNAL_BIT_DEPTH);
40 |     }
41 | 
42 | 
43 | };


--------------------------------------------------------------------------------
/src/bit_utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 Google Inc.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //    http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_
16 | #define CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_
17 | 
18 | #include <assert.h>
19 | #include <stdbool.h>
20 | #include <stdint.h>
21 | #include "cpu_features_macros.h"
22 | 
23 | CPU_FEATURES_START_CPP_NAMESPACE
24 | 
25 | inline static bool IsBitSet(uint32_t reg, uint32_t bit) {
26 |   return (reg >> bit) & 0x1;
27 | }
28 | 
29 | inline static uint32_t ExtractBitRange(uint32_t reg, uint32_t msb,
30 |                                        uint32_t lsb) {
31 |   const uint64_t bits = msb - lsb + 1ULL;
32 |   const uint64_t mask = (1ULL << bits) - 1ULL;
33 |   assert(msb >= lsb);
34 |   return (reg >> lsb) & mask;
35 | }
36 | 
37 | CPU_FEATURES_END_CPP_NAMESPACE
38 | 
39 | #endif  // CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_
40 | 


--------------------------------------------------------------------------------
/.github/workflows/windows.yml:
--------------------------------------------------------------------------------
 1 | name: Build (Windows)
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 |   push:
 7 |   pull_request:
 8 |   workflow_dispatch:
 9 |     inputs:
10 |       tag:
11 |         description: 'which tag to create and release?'
12 |         required: true
13 |         default: 'v100'
14 | 
15 | jobs:
16 |   build-windows:
17 |     runs-on: windows-2022
18 | 
19 |     defaults:
20 |       run:
21 |         shell: cmd
22 | 
23 |     steps:
24 |     - name: Checkout repo
25 |       uses: actions/checkout@v4
26 |       with:
27 |         fetch-depth: 0
28 | 
29 |     - name: Setup MSVC
30 |       uses: ilammy/msvc-dev-cmd@v1
31 | 
32 |     - name: Configure
33 |       run: cmake -S . -B build -G "Visual Studio 17 2022" -A x64 \
34 |         -D CMAKE_MSVC_RUNTIME_LIBRARY="MultiThreaded"
35 | 
36 |     - name: Build
37 |       run: cmake --build build --config Release --parallel %NUMBER_OF_PROCESSORS% --verbose
38 | 
39 |     - name: Package Release
40 |       shell: bash
41 |       run: |
42 |         git clone https://github.com/AkarinVS/exe/
43 |         cd build/Release/
44 |         ../../exe/zip.exe -9r ../../Neo_f3kdb-Windows-x64.zip *.dll
45 | 
46 |     - name: Upload
47 |       uses: actions/upload-artifact@v4
48 |       with:
49 |         name: Neo_f3kdb-Windows-x64
50 |         path: build/Release/*.dll
51 | 
52 |     - name: Release
53 |       uses: softprops/action-gh-release@v2
54 |       if: github.event_name == 'release' || github.event_name == 'workflow_dispatch' && github.event.inputs.tag != ''
55 |       with:
56 |         files: "Neo_f3kdb-Windows-x64.zip"
57 |         prerelease: true
58 |       env:
59 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
60 | 


--------------------------------------------------------------------------------
/src/f3kdb.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | typedef enum _PIXEL_MODE : int {
 4 |   DEFAULT_PIXEL_MODE = -1,
 5 |   LOW_BIT_DEPTH = 0,
 6 |   INVALID_OPTION1,
 7 |   HIGH_BIT_DEPTH_INTERLEAVED,
 8 |   PIXEL_MODE_COUNT
 9 | } PIXEL_MODE;
10 | 
11 | typedef enum _DITHER_ALGORITHM : int {
12 |   // _DEPRECATED_DA_LOW = 0,
13 |   DA_HIGH_NO_DITHERING = 1,
14 |   DA_HIGH_ORDERED_DITHERING,
15 |   DA_HIGH_FLOYD_STEINBERG_DITHERING,
16 |   DA_16BIT_INTERLEAVED,
17 | 
18 |   DA_COUNT,
19 |   DA_USER_PARAM_MAX = DA_HIGH_FLOYD_STEINBERG_DITHERING
20 | } DITHER_ALGORITHM;
21 | 
22 | typedef enum _RANDOM_ALGORITHM : int {
23 |   RANDOM_ALGORITHM_OLD = 0,
24 |   RANDOM_ALGORITHM_UNIFORM,
25 |   RANDOM_ALGORITHM_GAUSSIAN,
26 |   RANDOM_ALGORITHM_COUNT
27 | } RANDOM_ALGORITHM;
28 | 
29 | typedef enum _OPTIMIZATION_MODE : int {
30 |   IMPL_AUTO_DETECT = -1,
31 |   IMPL_C = 0,
32 |   IMPL_SSE2,
33 |   IMPL_SSSE3,
34 |   IMPL_SSE4,
35 |   IMPL_AVX2,
36 |   IMPL_AVX512,
37 | 
38 |   IMPL_COUNT
39 | } OPTIMIZATION_MODE;
40 | 
41 | typedef struct _f3kdb_params_t {
42 |   int range {15};
43 |   int Y {64};
44 |   int Cb {64};
45 |   int Cr {64};
46 |   int grainY {64};
47 |   int grainC {64};
48 |   int sample_mode {2};
49 |   int seed {0};
50 |   bool blur_first {true};
51 |   bool dynamic_grain {false};
52 |   DITHER_ALGORITHM dither_algo {DA_HIGH_FLOYD_STEINBERG_DITHERING};
53 |   bool keep_tv_range {false};
54 |   int output_depth {-1};
55 |   RANDOM_ALGORITHM random_algo_ref {RANDOM_ALGORITHM_UNIFORM};
56 |   RANDOM_ALGORITHM random_algo_grain {RANDOM_ALGORITHM_UNIFORM};
57 |   double random_param_ref {1.0f};
58 |   double random_param_grain {1.0f};
59 |   int Y_1 {-1};
60 |   int Cb_1 {-1};
61 |   int Cr_1 {-1};
62 |   int Y_2 {-1};
63 |   int Cb_2 {-1};
64 |   int Cr_2 {-1};
65 |   double angle_boost {1.5};
66 |   double max_angle {0.15};
67 | } f3kdb_params_t;
68 | 


--------------------------------------------------------------------------------
/include/dualsynth/ds_videoinfo.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2020 Xinyue Lu
 3 |  *
 4 |  * DualSynth wrapper - DSVideoInfo.
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | struct DSVideoInfo
11 | {
12 |   DSFormat Format;
13 |   int64_t FPSNum {1}, FPSDenom {1};
14 |   int Width {0}, Height {0};
15 |   int Frames {0};
16 | 
17 |   int Audio_SPS {0};
18 |   int Audio_SType {0};
19 |   int64_t Audio_NSamples {0};
20 |   int Audio_NChannels {0};
21 | 
22 |   int Field {0};
23 | 
24 |   DSVideoInfo() {}
25 |   DSVideoInfo(DSFormat format, int64_t fpsnum, int64_t fpsdenom, int width, int height, int frames)
26 |     : Format(format)
27 |     , FPSNum(fpsnum), FPSDenom(fpsdenom)
28 |     , Width(width), Height(height)
29 |     , Frames(frames)
30 |   { }
31 |   DSVideoInfo(const VSVideoInfo* vsvi)
32 |     : Format(vsvi->format)
33 |     , FPSNum(vsvi->fpsNum), FPSDenom(vsvi->fpsDen)
34 |     , Width(vsvi->width), Height(vsvi->height)
35 |     , Frames(vsvi->numFrames)
36 |   { }
37 |   DSVideoInfo(const VideoInfo avsvi)
38 |     : Format(avsvi.pixel_type)
39 |     , FPSNum(avsvi.fps_numerator), FPSDenom(avsvi.fps_denominator)
40 |     , Width(avsvi.width), Height(avsvi.height)
41 |     , Frames(avsvi.num_frames)
42 |     , Audio_SPS(avsvi.audio_samples_per_second)
43 |     , Audio_SType(avsvi.sample_type)
44 |     , Audio_NSamples(avsvi.num_audio_samples)
45 |     , Audio_NChannels(avsvi.nchannels)
46 |     , Field(avsvi.image_type)
47 |   { }
48 |   const VSVideoInfo* ToVSVI(const VSCore* vscore, const VSAPI* vsapi) {
49 |     return new VSVideoInfo {Format.ToVSFormat(vscore, vsapi), FPSNum, FPSDenom, Width, Height, Frames, 0};
50 |   }
51 |   const VideoInfo ToAVSVI() {
52 |     return VideoInfo{Width, Height, static_cast<unsigned>(FPSNum), static_cast<unsigned>(FPSDenom), Frames, Format.ToAVSFormat(), Audio_SPS, Audio_SType, Audio_NSamples, Audio_NChannels, Field};
53 |   }
54 | };
55 | 


--------------------------------------------------------------------------------
/include/dualsynth/ds_common.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2020 Xinyue Lu
 3 |  *
 4 |  * DualSynth wrapper - Common header+.
 5 |  *
 6 |  */
 7 | #pragma once
 8 | 
 9 | #include <avisynth.h>
10 | #include <VapourSynth.h>
11 | #include <cstring>
12 | #include <cmath>
13 | #include <string>
14 | #include <sstream>
15 | #include <vector>
16 | #include <unordered_map>
17 | #include <algorithm>
18 | #include <mutex>
19 | #include "ds_format.hpp"
20 | #include "ds_videoinfo.hpp"
21 | #include "ds_frame.hpp"
22 | 
23 | typedef void (*register_vsfilter_proc)(VSRegisterFunction, VSPlugin*);
24 | typedef void (*register_avsfilter_proc)(IScriptEnvironment* env);
25 | std::vector<register_vsfilter_proc> RegisterVSFilters();
26 | std::vector<register_avsfilter_proc> RegisterAVSFilters();
27 | 
28 | enum ParamType
29 | {
30 |   Clip, Integer, Float, Boolean, String
31 | };
32 | 
33 | struct Param
34 | {
35 |   const char* Name;
36 |   const ParamType Type;
37 |   const bool IsArray {false};
38 |   bool AVSEnabled {true};
39 |   bool VSEnabled {true};
40 |   const bool IsOptional {true};
41 | };
42 | 
43 | struct InDelegator
44 | {
45 |   virtual void Read(const char* name, int& output) = 0;
46 |   virtual void Read(const char* name, int64_t& output) = 0;
47 |   virtual void Read(const char* name, float& output) = 0;
48 |   virtual void Read(const char* name, double& output) = 0;
49 |   virtual void Read(const char* name, bool& output) = 0;
50 |   virtual void Read(const char* name, std::string& output) = 0;
51 |   virtual void Read(const char* name, std::vector<int>& output) = 0;
52 |   virtual void Read(const char* name, std::vector<int64_t>& output) = 0;
53 |   virtual void Read(const char* name, std::vector<float>& output) = 0;
54 |   virtual void Read(const char* name, std::vector<double>& output) = 0;
55 |   virtual void Read(const char* name, std::vector<bool>& output) = 0;
56 |   virtual void Read(const char* name, void*& output) = 0;
57 |   virtual void Free(void*& clip) = 0;
58 | };
59 | 
60 | struct FetchFrameFunctor
61 | {
62 |   virtual DSFrame operator()(int n) = 0;
63 |   virtual ~FetchFrameFunctor() {}
64 | };
65 | 


--------------------------------------------------------------------------------
/src/impl_dispatch.cpp:
--------------------------------------------------------------------------------
 1 | #include "core.h"
 2 | 
 3 | #define IMPL_DISPATCH_IMPORT_DECLARATION
 4 | 
 5 | #include "impl_dispatch_decl.h"
 6 | 
 7 | const process_plane_impl_t* process_plane_impl_high_precision_no_dithering[] = {
 8 |     process_plane_impl_c_high_no_dithering,
 9 |     process_plane_impl_c_high_no_dithering,
10 |     process_plane_impl_c_high_no_dithering,
11 |     process_plane_impl_sse4_high_no_dithering,
12 |     process_plane_impl_avx2_high_no_dithering,
13 |     process_plane_impl_avx512_high_no_dithering
14 | };
15 | 
16 | const process_plane_impl_t* process_plane_impl_high_precision_ordered_dithering[] = {
17 |     process_plane_impl_c_high_ordered_dithering,
18 |     process_plane_impl_c_high_ordered_dithering,
19 |     process_plane_impl_c_high_ordered_dithering,
20 |     process_plane_impl_sse4_high_ordered_dithering,
21 |     process_plane_impl_avx2_high_ordered_dithering,
22 |     process_plane_impl_avx512_high_ordered_dithering
23 | };
24 | 
25 | const process_plane_impl_t* process_plane_impl_high_precision_floyd_steinberg_dithering[] = {
26 |     process_plane_impl_c_high_floyd_steinberg_dithering,
27 |     process_plane_impl_c_high_floyd_steinberg_dithering,
28 |     process_plane_impl_c_high_floyd_steinberg_dithering,
29 |     process_plane_impl_sse4_high_floyd_steinberg_dithering,
30 |     process_plane_impl_avx2_high_floyd_steinberg_dithering,
31 |     process_plane_impl_avx512_high_floyd_steinberg_dithering
32 | };
33 | 
34 | const process_plane_impl_t* process_plane_impl_16bit_interleaved[] = {
35 |     process_plane_impl_c_16bit_interleaved,
36 |     process_plane_impl_c_16bit_interleaved,
37 |     process_plane_impl_c_16bit_interleaved,
38 |     process_plane_impl_sse4_16bit_interleaved,
39 |     process_plane_impl_avx2_16bit_interleaved,
40 |     process_plane_impl_avx512_16bit_interleaved
41 | };
42 | 
43 | 
44 | const process_plane_impl_t** process_plane_impls[] = {
45 | 	nullptr, // process_plane_impl_low_precision has been removed,
46 | 	process_plane_impl_high_precision_no_dithering,
47 | 	process_plane_impl_high_precision_ordered_dithering,
48 | 	process_plane_impl_high_precision_floyd_steinberg_dithering,
49 |     process_plane_impl_16bit_interleaved
50 | };
51 | 


--------------------------------------------------------------------------------
/include/avisynth/avs/minmax.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_MINMAX_H
33 | #define AVSCORE_MINMAX_H
34 | 
35 | template<typename T>
36 | T min(T v1, T v2)
37 | {
38 |   return v1 < v2 ? v1 : v2;
39 | }
40 | 
41 | template<typename T>
42 | T max(T v1, T v2)
43 | {
44 |   return v1 > v2 ? v1 : v2;
45 | }
46 | 
47 | template<typename T>
48 | T clamp(T n, T min, T max)
49 | {
50 |     n = n > max ? max : n;
51 |     return n < min ? min : n;
52 | }
53 | 
54 | #endif // AVSCORE_MINMAX_H
55 | 


--------------------------------------------------------------------------------
/include/avisynth/avs/types.h:
--------------------------------------------------------------------------------
 1 | // Avisynth C Interface Version 0.20
 2 | // Copyright 2003 Kevin Atkinson
 3 | 
 4 | // This program is free software; you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License as published by
 6 | // the Free Software Foundation; either version 2 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception.  An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 | 
33 | #ifndef AVS_TYPES_H
34 | #define AVS_TYPES_H
35 | 
36 | // Define all types necessary for interfacing with avisynth.dll
37 | #include <stdint.h>
38 | #include <stdbool.h>
39 | #ifdef __cplusplus
40 |   #include <cstddef>
41 |   #include <cstdarg>
42 | #else
43 |   #include <stddef.h>
44 |   #include <stdarg.h>
45 | #endif
46 | 
47 | // Raster types used by VirtualDub & Avisynth
48 | typedef uint32_t Pixel32;
49 | typedef uint8_t  BYTE;
50 | 
51 | // Audio Sample information
52 | typedef float SFLOAT;
53 | 
54 | #endif //AVS_TYPES_H
55 | 


--------------------------------------------------------------------------------
/src/pixel_proc_c.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdlib.h>
 4 | 
 5 | #include "impl_dispatch.h"
 6 | 
 7 | #define CALL_IMPL(func, ...) \
 8 | 	( mode == DA_HIGH_NO_DITHERING ? pixel_proc_high_no_dithering::func(__VA_ARGS__) : \
 9 | 	  mode == DA_HIGH_ORDERED_DITHERING ? pixel_proc_high_ordered_dithering::func(__VA_ARGS__) : \
10 | 	  mode == DA_HIGH_FLOYD_STEINBERG_DITHERING ? pixel_proc_high_f_s_dithering::func(__VA_ARGS__) : \
11 | 	  pixel_proc_16bit::func(__VA_ARGS__) )
12 | 
13 | #define CHECK_MODE() if (mode < 0 || mode >= DA_COUNT) abort()
14 | 
15 | #include "pixel_proc_c_high_no_dithering.h"
16 | #include "pixel_proc_c_high_ordered_dithering.h"
17 | #include "pixel_proc_c_high_f_s_dithering.h"
18 | 
19 | #include "pixel_proc_c_16bit.h"
20 | 
21 | template <int mode>
22 | static inline void pixel_proc_init_context(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth)
23 | {
24 | 	CHECK_MODE();
25 | 	CALL_IMPL(init_context, context_buffer, frame_width, output_depth);
26 | }
27 | 
28 | template <int mode>
29 | static inline void pixel_proc_destroy_context(void* context)
30 | {
31 | 	CHECK_MODE();
32 | 	CALL_IMPL(destroy_context, context);
33 | }
34 | 
35 | template <int mode>
36 | static inline void pixel_proc_next_pixel(void* context)
37 | {
38 | 	CHECK_MODE();
39 | 	CALL_IMPL(next_pixel, context);
40 | }
41 | 
42 | template <int mode>
43 | static inline void pixel_proc_next_row(void* context)
44 | {
45 | 	CHECK_MODE();
46 | 	CALL_IMPL(next_row, context);
47 | }
48 | 
49 | template <int mode>
50 | static inline int pixel_proc_upsample(void* context, unsigned char pixel)
51 | {
52 | 	CHECK_MODE();
53 | 	return CALL_IMPL(upsample, context, pixel);
54 | }
55 | 
56 | template <int mode>
57 | static inline int pixel_proc_downsample(void* context, int pixel, int row, int column, int pixel_min, int pixel_max, int output_depth)
58 | {
59 | 	CHECK_MODE();
60 | 	return CALL_IMPL(downsample, context, pixel, row, column, pixel_min, pixel_max, output_depth);
61 | }
62 | 
63 | template <int mode>
64 | static inline int pixel_proc_avg_2(void* context, int pixel1, int pixel2)
65 | {
66 | 	CHECK_MODE();
67 | 	return CALL_IMPL(avg_2, context, pixel1, pixel2);
68 | }
69 | 
70 | template <int mode>
71 | static inline int pixel_proc_avg_4(void* context, int pixel1, int pixel2, int pixel3, int pixel4)
72 | {
73 | 	CHECK_MODE();
74 | 	return CALL_IMPL(avg_4, context, pixel1, pixel2, pixel3, pixel4);
75 | }
76 | 


--------------------------------------------------------------------------------
/include/avisynth/avs/win.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_WIN_H
33 | #define AVSCORE_WIN_H
34 | 
35 | // Whenever you need windows headers, start by including this file, then the rest.
36 | 
37 | // WWUUT? We require XP now?
38 | #if !defined(NTDDI_VERSION) && !defined(_WIN32_WINNT)
39 |   #define NTDDI_VERSION 0x05020000
40 |   #define _WIN32_WINNT  0x0502
41 | #endif
42 | 
43 | #define WIN32_LEAN_AND_MEAN
44 | #define STRICT
45 | #if !defined(NOMINMAX)
46 |     #define NOMINMAX
47 | #endif
48 | 
49 | #include <windows.h>
50 | 
51 | // Provision for UTF-8 max 4 bytes per code point
52 | #define AVS_MAX_PATH MAX_PATH*4
53 | 
54 | #endif // AVSCORE_WIN_H
55 | 


--------------------------------------------------------------------------------
/include/dualsynth/ds_filter.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2020 Xinyue Lu
 3 |  *
 4 |  * DualSynth wrapper - Filter parent class.
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | struct Filter
11 | {
12 |   DSVideoInfo in_vi;
13 |   FetchFrameFunctor* fetch_frame;
14 |   virtual const char* VSName() const { return "FilterFoo"; }
15 |   virtual const char* AVSName() const { return "FilterFoo"; }
16 |   virtual const MtMode AVSMode() const { return MT_SERIALIZED; }
17 |   virtual const VSFilterMode VSMode() const { return fmSerial; }
18 |   virtual const std::vector<Param> Params() const = 0;
19 |   virtual const std::string VSParams() const
20 |   {
21 |     std::stringstream ss;
22 |     auto params = this->Params();
23 |     for (auto &&p : params)
24 |     {
25 |       if (!p.VSEnabled) continue;
26 |       std::string type_name;
27 |       switch(p.Type) {
28 |         case Clip: type_name = "clip"; break;
29 |         case Integer: type_name = "int"; break;
30 |         case Float: type_name = "float"; break;
31 |         case Boolean: type_name = "int"; break;
32 |         case String: type_name = "data"; break;
33 |       }
34 |       ss << p.Name << ':' << type_name;
35 |       if (p.IsArray)
36 |         ss << "[]";
37 |       if (p.IsOptional)
38 |         ss << ":opt";
39 |       ss << ';';
40 |     }
41 |     return ss.str();
42 |   };
43 |   virtual const std::string AVSParams() const
44 |   {
45 |     std::stringstream ss;
46 |     auto params = this->Params();
47 |     for (auto &&p : params)
48 |     {
49 |       if (!p.AVSEnabled) continue;
50 |       char type_name;
51 |       switch(p.Type) {
52 |         case Clip: type_name = 'c'; break;
53 |         case Integer: type_name = 'i'; break;
54 |         case Float: type_name = 'f'; break;
55 |         case Boolean: type_name = 'b'; break;
56 |         case String: type_name = 's'; break;
57 |       }
58 |       if (p.IsOptional)
59 |         ss << '[' << p.Name << ']';
60 |       ss << type_name;
61 |     }
62 |     return ss.str();
63 |   };
64 |   virtual void Initialize(InDelegator* in, DSVideoInfo in_vi, FetchFrameFunctor* fetch_frame)
65 |   {
66 |     this->in_vi = in_vi;
67 |     this->fetch_frame = fetch_frame;
68 |   };
69 |   virtual std::vector<int> RequestReferenceFrames(int n) const
70 |   {
71 |     return std::vector<int>{n};
72 |   }
73 |   virtual DSFrame GetFrame(int n, std::unordered_map<int, DSFrame> in_frames)
74 |   {
75 |     return in_frames.size() > 0 ? in_frames.begin()->second : DSFrame();
76 |   }
77 |   virtual DSVideoInfo GetOutputVI()
78 |   {
79 |     return in_vi;
80 |   }
81 |   virtual int SetCacheHints(int cachehints, int frame_range)
82 |   {
83 |     return cachehints == CACHE_GET_MTMODE ? AVSMode() : 0;
84 |   }
85 | };
86 | 


--------------------------------------------------------------------------------
/src/random.cpp:
--------------------------------------------------------------------------------
 1 | #include "random.h"
 2 | 
 3 | #include <math.h>
 4 | 
 5 | #include <assert.h>
 6 | 
 7 | #include <stdint.h>
 8 | 
 9 | typedef double (*rand_impl_t)(int& seed, double param);
10 | 
11 | double rand_old(int& seed, double param);
12 | 
13 | double rand_uniform(int& seed, double param);
14 | 
15 | double rand_gaussian(int& seed, double param);
16 | 
17 | static const rand_impl_t rand_algorithms[] = {
18 |     rand_old,
19 |     rand_uniform,
20 |     rand_gaussian
21 | };
22 | 
23 | inline double round(double r) {
24 |     return (r > 0.0) ? floor(r + 0.5) : ceil(r - 0.5);
25 | }
26 | 
27 | int random(RANDOM_ALGORITHM algo, int& seed, int range, double param)
28 | {
29 |     assert(algo >= 0 && algo < RANDOM_ALGORITHM_COUNT);
30 | 
31 |     double num = rand_algorithms[algo](seed, param);
32 |     assert(num >= -1.0 && num <= 1.0);
33 |     return (int)round(num * range);
34 | }
35 | 
36 | // most algorithms below are stolen from AddGrainC
37 | 
38 | double rand_to_double(int rand_num)
39 | {
40 |     // convert the number to 52 bit, use high 12 bits to fill lower space 
41 |     // (otherwise the upper bound will be significantly less than 1.0)
42 |     union
43 |     {
44 |         uint64_t itemp;
45 |         double result;
46 |     };
47 |     itemp = ((uint64_t)rand_num) & 0xffffffffULL;
48 |     itemp = itemp << 20 | itemp >> 12;
49 | 
50 |     // fill exponent with 1
51 |     itemp |= 0x3ff0000000000000ULL;
52 | 
53 |     // itemp is now in [1.0, 2.0), convert to [-1.0, 1.0)
54 |     return (result - 1.0) * 2 - 1.0;
55 | }
56 | 
57 | double rand_old(int& seed, double)
58 | {
59 |     int seed_tmp = (((seed << 13) ^ (unsigned int)seed) >> 17) ^ (seed << 13) ^ seed;
60 |     seed = 32 * seed_tmp ^ seed_tmp;
61 |     return rand_to_double(seed);
62 | }
63 | 
64 | double rand_uniform(int& seed, double)
65 | {
66 |     seed = 1664525 * seed + 1013904223;
67 |     return rand_to_double(seed);
68 | }
69 | 
70 | // http://www.bearcave.com/misl/misl_tech/wavelets/hurst/random.html
71 | double rand_gaussian(int& seed, double param)
72 | {
73 |     double ret;
74 |     double x, y, r2;
75 | 
76 |     do
77 |     {
78 |         do
79 |         {
80 |             /* choose x,y in uniform square (-1,-1) to (+1,+1) */
81 | 
82 |             x = rand_uniform (seed, param);
83 |             y = rand_uniform (seed, param);
84 | 
85 |             /* see if it is in the unit circle */
86 |             r2 = x * x + y * y;
87 |         }
88 |         while (r2 > 1.0 || r2 == 0);
89 |         /* Box-Muller transform */
90 | 
91 |         // sigma = param
92 |         ret = param * y * sqrt (-2.0 * log (r2) / r2);
93 | 
94 |     } while (ret <= -1.0 || ret >= 1.0);
95 |     // we need to clip the result because the wrapper accepts [-1.0, 1.0] only
96 | 
97 |     return ret;
98 | }
99 | 


--------------------------------------------------------------------------------
/src/pixel_proc_c_high_ordered_dithering.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | namespace pixel_proc_high_ordered_dithering {
 4 | 
 5 |     // bayer dither matrix
 6 |     // align to 16 byte for reading from SSE code
 7 |     alignas(16) static const unsigned char THRESHOLD_MAP[16][16] =
 8 |     {
 9 |         {   0, 128,  32, 160,   8, 136,  40, 168,   2, 130,  34, 162,  10, 138,  42, 170 },
10 |         { 192,  64, 224,  96, 200,  72, 232, 104, 194,  66, 226,  98, 202,  74, 234, 106 },
11 |         {  48, 176,  16, 144,  56, 184,  24, 152,  50, 178,  18, 146,  58, 186,  26, 154 },
12 |         { 240, 112, 208,  80, 248, 120, 216,  88, 242, 114, 210,  82, 250, 122, 218,  90 },
13 |         {  12, 140,  44, 172,   4, 132,  36, 164,  14, 142,  46, 174,   6, 134,  38, 166 },
14 |         { 204,  76, 236, 108, 196,  68, 228, 100, 206,  78, 238, 110, 198,  70, 230, 102 },
15 |         {  60, 188,  28, 156,  52, 180,  20, 148,  62, 190,  30, 158,  54, 182,  22, 150 },
16 |         { 252, 124, 220,  92, 244, 116, 212,  84, 254, 126, 222,  94, 246, 118, 214,  86 },
17 |         {   3, 131,  35, 163,  11, 139,  43, 171,   1, 129,  33, 161,   9, 137,  41, 169 },
18 |         { 195,  67, 227,  99, 203,  75, 235, 107, 193,  65, 225,  97, 201,  73, 233, 105 },
19 |         {  51, 179,  19, 147,  59, 187,  27, 155,  49, 177,  17, 145,  57, 185,  25, 153 },
20 |         { 243, 115, 211,  83, 251, 123, 219,  91, 241, 113, 209,  81, 249, 121, 217,  89 },
21 |         {  15, 143,  47, 175,   7, 135,  39, 167,  13, 141,  45, 173,   5, 133,  37, 165 },
22 |         { 207,  79, 239, 111, 199,  71, 231, 103, 205,  77, 237, 109, 197,  69, 229, 101 },
23 |         {  63, 191,  31, 159,  55, 183,  23, 151,  61, 189,  29, 157,  53, 181,  21, 149 },
24 |         { 255, 127, 223,  95, 247, 119, 215,  87, 253, 125, 221,  93, 245, 117, 213,  85 }
25 |     };
26 | 
27 |     static const int THRESHOLD_MAP_RIGHT_SHIFT_BITS = 16 - INTERNAL_BIT_DEPTH;
28 | 
29 | 
30 |     static inline void init_context(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth)
31 |     {
32 |         *((int*)context_buffer) = output_depth;
33 |     }
34 | 
35 |     static inline void destroy_context(void* context)
36 |     {
37 |         // nothing to do
38 |     }
39 | 
40 |     static inline void next_pixel(void* context)
41 |     {
42 |         // nothing to do
43 |     }
44 | 
45 |     static inline void next_row(void* context)
46 |     {
47 |         // nothing to do
48 |     }
49 | 
50 |     static inline int dither(void* context, int pixel, int row, int column)
51 |     {
52 |         int output_depth = *(int*)context;
53 |         pixel += (THRESHOLD_MAP[row & 15][column & 15] >> (THRESHOLD_MAP_RIGHT_SHIFT_BITS + output_depth - 8));
54 |         return pixel;
55 |     }
56 | 
57 |     #include "pixel_proc_c_high_bit_depth_common.h"
58 | };
59 | 


--------------------------------------------------------------------------------
/src/core.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <ds_common.hpp>
  4 | 
  5 | #include "f3kdb.h"
  6 | #include "process_plane_context.h"
  7 | #include "compiler_compat.h"
  8 | 
  9 | typedef struct _pixel_dither_info {
 10 |     alignas(4) signed char ref1;
 11 |     signed char ref2;
 12 |     signed short change;
 13 | } pixel_dither_info;
 14 | 
 15 | static_assert(sizeof(pixel_dither_info) == 4, "Something wrong in pixel_dither_info");
 16 | 
 17 | typedef struct _process_plane_params
 18 | {
 19 |     const unsigned char *src_plane_ptr;
 20 |     int src_pitch;
 21 | 
 22 |     unsigned char *dst_plane_ptr;
 23 |     int dst_pitch;
 24 | 
 25 |     int plane_width_in_pixels;
 26 |     int plane_height_in_pixels;
 27 | 
 28 |     PIXEL_MODE input_mode;
 29 |     int input_depth;
 30 |     PIXEL_MODE output_mode;
 31 |     int output_depth;
 32 | 
 33 |     unsigned short threshold;
 34 |     unsigned short threshold1;
 35 |     unsigned short threshold2;
 36 |     float angle_boost;
 37 |     float max_angle;
 38 |     pixel_dither_info *info_ptr_base;
 39 |     int info_stride;
 40 |     
 41 |     short* grain_buffer;
 42 |     int grain_buffer_stride;
 43 | 
 44 |     int plane;
 45 | 
 46 |     unsigned char width_subsampling;
 47 |     unsigned char height_subsampling;
 48 |     
 49 |     int pixel_max;
 50 |     int pixel_min;
 51 |     
 52 |     // Helper functions
 53 |     inline int get_dst_width() const {
 54 |         return output_mode == HIGH_BIT_DEPTH_INTERLEAVED ? plane_width_in_pixels * 2 : plane_width_in_pixels;
 55 |     }
 56 |     inline int get_dst_height() const {
 57 |         return plane_height_in_pixels;
 58 |     }
 59 |     inline int get_src_width() const {
 60 |         return input_mode == HIGH_BIT_DEPTH_INTERLEAVED ? plane_width_in_pixels * 2 : plane_width_in_pixels;
 61 |     }
 62 |     inline int get_src_height() const {
 63 |         return plane_height_in_pixels;
 64 |     }
 65 | } process_plane_params;
 66 | 
 67 | typedef void (*process_plane_impl_t)(const process_plane_params& params, process_plane_context* context);
 68 | 
 69 | class f3kdb_core_t {
 70 | private:
 71 |     process_plane_impl_t _process_plane_impl;
 72 |         
 73 |     pixel_dither_info *_y_info;
 74 |     pixel_dither_info *_cb_info;
 75 |     pixel_dither_info *_cr_info;
 76 |     
 77 |     process_plane_context _y_context;
 78 |     process_plane_context _cb_context;
 79 |     process_plane_context _cr_context;
 80 |     
 81 |     short* _grain_buffer_y;
 82 |     short* _grain_buffer_c;
 83 | 
 84 |     int* _grain_buffer_offsets;
 85 | 
 86 |     DSVideoInfo _video_info;
 87 |     f3kdb_params_t _params;
 88 | 
 89 |     OPTIMIZATION_MODE _opt;
 90 | 
 91 |     void init(void);
 92 |     void init_frame_luts(void);
 93 | 
 94 |     void destroy_frame_luts(void);
 95 | 
 96 |     f3kdb_core_t(const f3kdb_core_t&);
 97 |     f3kdb_core_t operator=(const f3kdb_core_t&);
 98 |     
 99 | public:
100 |     f3kdb_core_t(DSVideoInfo vi, const f3kdb_params_t params, OPTIMIZATION_MODE opt);
101 |     virtual ~f3kdb_core_t();
102 | 
103 |     void process_plane(int frame_index, int plane, unsigned char* dst_frame_ptr, int dst_pitch, const unsigned char* src_frame_ptr, int src_pitch);
104 | };
105 | 


--------------------------------------------------------------------------------
/VCL2/vectorclass.h:
--------------------------------------------------------------------------------
 1 | /****************************  vectorclass.h   ********************************
 2 | * Author:        Agner Fog
 3 | * Date created:  2012-05-30
 4 | * Last modified: 2022-07-20
 5 | * Version:       2.02.00
 6 | * Project:       vector class library
 7 | * Home:          https://github.com/vectorclass
 8 | * Description:
 9 | * Header file defining vector classes as interface to intrinsic functions
10 | * in x86 and x86-64 microprocessors with SSE2 and later instruction sets.
11 | *
12 | * Instructions:
13 | * Use Gnu, Clang, Microsoft, or Intel C++ compiler. Compile for the desired
14 | * instruction set, which must be at least SSE2. Specify the supported
15 | * instruction set by a command line define, e.g. __SSE4_1__ if the
16 | * compiler does not automatically do so.
17 | * For detailed instructions, see vcl_manual.pdf
18 | *
19 | * Each vector object is represented internally in the CPU as a vector
20 | * register with 128, 256 or 512 bits.
21 | *
22 | * This header file includes the appropriate header files depending on the
23 | * selected instruction set.
24 | *
25 | * (c) Copyright 2012-2022 Agner Fog.
26 | * Apache License version 2.0 or later.
27 | ******************************************************************************/
28 | 
29 | #ifndef VECTORCLASS_H
30 | #define VECTORCLASS_H  20200
31 | 
32 | // Maximum vector size, bits. Allowed values are 128, 256, 512
33 | #ifndef MAX_VECTOR_SIZE
34 | #define MAX_VECTOR_SIZE 512
35 | #endif
36 | 
37 | // Determine instruction set, and define platform-dependent functions
38 | #include "instrset.h"        // Select supported instruction set
39 | 
40 | #if INSTRSET < 2             // instruction set SSE2 is the minimum
41 | #error Please compile for the SSE2 instruction set or higher
42 | #else
43 | 
44 | // Select appropriate .h files depending on instruction set
45 | #include "vectori128.h"      // 128-bit integer vectors
46 | #include "vectorf128.h"      // 128-bit floating point vectors
47 | 
48 | #if MAX_VECTOR_SIZE >= 256
49 | #if INSTRSET >= 8
50 | #include "vectori256.h"      // 256-bit integer vectors, requires AVX2 instruction set
51 | #else
52 | #include "vectori256e.h"     // 256-bit integer vectors, emulated
53 | #endif  // INSTRSET >= 8
54 | #if INSTRSET >= 7
55 | #include "vectorf256.h"      // 256-bit floating point vectors, requires AVX instruction set
56 | #else
57 | #include "vectorf256e.h"     // 256-bit floating point vectors, emulated
58 | #endif  //  INSTRSET >= 7
59 | #endif  //  MAX_VECTOR_SIZE >= 256
60 | 
61 | #if MAX_VECTOR_SIZE >= 512
62 | #if INSTRSET >= 9
63 | #include "vectori512.h"      // 512-bit vectors of 32 and 64 bit integers, requires AVX512F instruction set
64 | #include "vectorf512.h"      // 512-bit floating point vectors, requires AVX512F instruction set
65 | #else
66 | #include "vectori512e.h"     // 512-bit integer vectors, emulated
67 | #include "vectorf512e.h"     // 512-bit floating point vectors, emulated
68 | #endif  //  INSTRSET >= 9
69 | #if INSTRSET >= 10
70 | #include "vectori512s.h"     // 512-bit vectors of 8 and 16 bit integers, requires AVX512BW instruction set
71 | #else
72 | #include "vectori512se.h"    // 512-bit vectors of 8 and 16 bit integers, emulated
73 | #endif
74 | #endif  //  MAX_VECTOR_SIZE >= 512
75 | 
76 | #include "vector_convert.h"  // conversion between different vector sizes, and common templates
77 | 
78 | #endif  // INSTRSET >= 2
79 | 
80 | 
81 | #else   // VECTORCLASS_H
82 | 
83 | #if VECTORCLASS_H < 20000
84 | #error Mixed versions of vector class library
85 | #endif
86 | 
87 | #endif  // VECTORCLASS_H
88 | 


--------------------------------------------------------------------------------
/include/avisynth/avs/cpuid.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_CPUID_H
33 | #define AVSCORE_CPUID_H
34 | 
35 | // For GetCPUFlags.  These are backwards-compatible with those in VirtualDub.
36 | // ending with SSE4_2
37 | // For emulation see https://software.intel.com/en-us/articles/intel-software-development-emulator
38 | enum {
39 |                     /* oldest CPU to support extension */
40 |   CPUF_FORCE        =  0x01,   //  N/A
41 |   CPUF_FPU          =  0x02,   //  386/486DX
42 |   CPUF_MMX          =  0x04,   //  P55C, K6, PII
43 |   CPUF_INTEGER_SSE  =  0x08,   //  PIII, Athlon
44 |   CPUF_SSE          =  0x10,   //  PIII, Athlon XP/MP
45 |   CPUF_SSE2         =  0x20,   //  PIV, K8
46 |   CPUF_3DNOW        =  0x40,   //  K6-2
47 |   CPUF_3DNOW_EXT    =  0x80,   //  Athlon
48 |   CPUF_X86_64       =  0xA0,   //  Hammer (note: equiv. to 3DNow + SSE2, which
49 |                                //          only Hammer will have anyway)
50 |   CPUF_SSE3         = 0x100,   //  PIV+, K8 Venice
51 |   CPUF_SSSE3        = 0x200,   //  Core 2
52 |   CPUF_SSE4         = 0x400,
53 |   CPUF_SSE4_1       = 0x400,   //  Penryn, Wolfdale, Yorkfield
54 |   CPUF_AVX          = 0x800,   //  Sandy Bridge, Bulldozer
55 |   CPUF_SSE4_2       = 0x1000,  //  Nehalem
56 |   // AVS+
57 |   CPUF_AVX2         = 0x2000,   //  Haswell
58 |   CPUF_FMA3         = 0x4000,
59 |   CPUF_F16C         = 0x8000,
60 |   CPUF_MOVBE        = 0x10000,  // Big Endian move
61 |   CPUF_POPCNT       = 0x20000,
62 |   CPUF_AES          = 0x40000,
63 |   CPUF_FMA4         = 0x80000,
64 | 
65 |   CPUF_AVX512F      = 0x100000,  // AVX-512 Foundation.
66 |   CPUF_AVX512DQ     = 0x200000,  // AVX-512 DQ (Double/Quad granular) Instructions
67 |   CPUF_AVX512PF     = 0x400000,  // AVX-512 Prefetch
68 |   CPUF_AVX512ER     = 0x800000,  // AVX-512 Exponential and Reciprocal
69 |   CPUF_AVX512CD     = 0x1000000, // AVX-512 Conflict Detection
70 |   CPUF_AVX512BW     = 0x2000000, // AVX-512 BW (Byte/Word granular) Instructions
71 |   CPUF_AVX512VL     = 0x4000000, // AVX-512 VL (128/256 Vector Length) Extensions
72 |   CPUF_AVX512IFMA   = 0x8000000, // AVX-512 IFMA integer 52 bit
73 |   CPUF_AVX512VBMI   = 0x10000000,// AVX-512 VBMI
74 | };
75 | 
76 | #ifdef BUILDING_AVSCORE
77 | int GetCPUFlags();
78 | void SetMaxCPU(int new_flags);
79 | #endif
80 | 
81 | #endif // AVSCORE_CPUID_H
82 | 


--------------------------------------------------------------------------------
/include/dualsynth/ds_format.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2020 Xinyue Lu
  3 |  *
  4 |  * DualSynth wrapper - DSFormat.
  5 |  *
  6 |  */
  7 | 
  8 | #pragma once
  9 | 
 10 | struct DSFormat
 11 | {
 12 |   bool IsFamilyYUV {true}, IsFamilyRGB {false}, IsFamilyYCC {false};
 13 |   bool IsInteger {true}, IsFloat {false};
 14 |   int SSW {0}, SSH {0};
 15 |   int BitsPerSample {8}, BytesPerSample {1};
 16 |   int Planes {3};
 17 |   DSFormat() {}
 18 |   DSFormat(const VSFormat* format)
 19 |   {
 20 |     Planes = format->numPlanes;
 21 |     IsFamilyYUV = format->colorFamily == cmYUV || format->colorFamily == cmGray;
 22 |     IsFamilyRGB = format->colorFamily == cmRGB;
 23 |     IsFamilyYCC = format->colorFamily == cmYCoCg;
 24 |     SSW = format->subSamplingW;
 25 |     SSH = format->subSamplingH;
 26 |     BitsPerSample = format->bitsPerSample;
 27 |     BytesPerSample = format->bytesPerSample;
 28 |     IsInteger = format->sampleType == stInteger;
 29 |     IsFloat = format->sampleType == stFloat;
 30 |   }
 31 | 
 32 |   const VSFormat* ToVSFormat(const VSCore* vscore, const VSAPI* vsapi) const
 33 |   {
 34 |     VSColorFamily family = cmYUV;
 35 |     if (IsFamilyYUV)
 36 |       family = Planes == 1 ? cmGray : cmYUV;
 37 |     else if (IsFamilyRGB)
 38 |       family = cmRGB;
 39 |     else if (IsFamilyYCC)
 40 |       family = cmYCoCg;
 41 |     return vsapi->registerFormat(family, IsInteger ? stInteger : stFloat, BitsPerSample, SSW, SSH, const_cast<VSCore*>(vscore));
 42 |   }
 43 | 
 44 |   DSFormat(int format)
 45 |   {
 46 |     const int componentBitSizes[8] = {8,16,32,0,0,10,12,14};
 47 |     if (format == VideoInfo::CS_I420)
 48 |       format = VideoInfo::CS_YV12;
 49 | 
 50 |     auto PYUV = VideoInfo::CS_PLANAR | VideoInfo::CS_YUV;
 51 |     IsFamilyYUV = (format & PYUV) == PYUV;
 52 |     auto PRGB = VideoInfo::CS_PLANAR | VideoInfo::CS_BGR;
 53 |     IsFamilyRGB = (format & PRGB) == PRGB;
 54 |     IsFamilyYCC = false;
 55 |     BitsPerSample = componentBitSizes[(format >> VideoInfo::CS_Shift_Sample_Bits) & 7];
 56 |     BytesPerSample = BitsPerSample == 8 ? 1 : BitsPerSample == 32 ? 4 : 2;
 57 |     IsInteger = BitsPerSample < 32;
 58 |     IsFloat = BitsPerSample == 32;
 59 |     if (IsFamilyYUV && (format & VideoInfo::CS_GENERIC_Y) == VideoInfo::CS_GENERIC_Y)
 60 |       Planes = 1;
 61 |     else if (IsFamilyYUV && (format & VideoInfo::CS_YUVA) == VideoInfo::CS_YUVA)
 62 |       Planes = 4;
 63 |     else if (IsFamilyRGB && (format & VideoInfo::CS_RGBA_TYPE) == VideoInfo::CS_RGBA_TYPE)
 64 |       Planes = 4;
 65 | 
 66 |     if (IsFamilyYUV && Planes > 1) {
 67 |       SSW = ((format >> VideoInfo::CS_Shift_Sub_Width) + 1) & 3;
 68 |       SSH = ((format >> VideoInfo::CS_Shift_Sub_Height) + 1) & 3;
 69 |     }
 70 |   }
 71 | 
 72 |   int ToAVSFormat() const
 73 |   {
 74 |     int pixel_format = VideoInfo::CS_PLANAR | (Planes == 3 ? VideoInfo::CS_YUV : VideoInfo::CS_YUVA) | VideoInfo::CS_VPlaneFirst;
 75 |     if (IsFamilyYUV) {
 76 |       pixel_format = VideoInfo::CS_PLANAR | (Planes == 3 ? VideoInfo::CS_YUV : VideoInfo::CS_YUVA) | VideoInfo::CS_VPlaneFirst;
 77 | 
 78 |       switch(SSW) {
 79 |         case 0: pixel_format |= VideoInfo::CS_Sub_Width_1; break;
 80 |         case 1: pixel_format |= VideoInfo::CS_Sub_Width_2; break;
 81 |         case 2: pixel_format |= VideoInfo::CS_Sub_Width_4; break;
 82 |       }
 83 | 
 84 |       switch(SSH) {
 85 |         case 0: pixel_format |= VideoInfo::CS_Sub_Height_1; break;
 86 |         case 1: pixel_format |= VideoInfo::CS_Sub_Height_2; break;
 87 |         case 2: pixel_format |= VideoInfo::CS_Sub_Height_4; break;
 88 |       }
 89 | 
 90 |       if (Planes == 1)
 91 |         pixel_format = VideoInfo::CS_GENERIC_Y;
 92 |     }
 93 |     else if (IsFamilyRGB || IsFamilyYCC)
 94 |       pixel_format = VideoInfo::CS_PLANAR | VideoInfo::CS_BGR | (Planes == 3 ? VideoInfo::CS_RGB_TYPE : VideoInfo::CS_RGBA_TYPE);
 95 | 
 96 |     switch(BitsPerSample) {
 97 |       case 8: pixel_format |= VideoInfo::CS_Sample_Bits_8; break;
 98 |       case 10: pixel_format |= VideoInfo::CS_Sample_Bits_10; break;
 99 |       case 12: pixel_format |= VideoInfo::CS_Sample_Bits_12; break;
100 |       case 14: pixel_format |= VideoInfo::CS_Sample_Bits_14; break;
101 |       case 16: pixel_format |= VideoInfo::CS_Sample_Bits_16; break;
102 |       case 32: pixel_format |= VideoInfo::CS_Sample_Bits_32; break;
103 |     }
104 | 
105 |     return pixel_format;
106 |   }
107 | };
108 | 


--------------------------------------------------------------------------------
/include/avisynth/avs/capi.h:
--------------------------------------------------------------------------------
  1 | // Avisynth C Interface Version 0.20
  2 | // Copyright 2003 Kevin Atkinson
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // As a special exception, I give you permission to link to the
 20 | // Avisynth C interface with independent modules that communicate with
 21 | // the Avisynth C interface solely through the interfaces defined in
 22 | // avisynth_c.h, regardless of the license terms of these independent
 23 | // modules, and to copy and distribute the resulting combined work
 24 | // under terms of your choice, provided that every copy of the
 25 | // combined work is accompanied by a complete copy of the source code
 26 | // of the Avisynth C interface and Avisynth itself (with the version
 27 | // used to produce the combined work), being distributed under the
 28 | // terms of the GNU General Public License plus this exception.  An
 29 | // independent module is a module which is not derived from or based
 30 | // on Avisynth C Interface, such as 3rd-party filters, import and
 31 | // export plugins, or graphical user interfaces.
 32 | 
 33 | #ifndef AVS_CAPI_H
 34 | #define AVS_CAPI_H
 35 | 
 36 | #include "config.h"
 37 | 
 38 | #ifdef AVS_POSIX
 39 | // this is also defined in avs/posix.h
 40 | #ifndef AVS_HAIKU
 41 | #define __declspec(x)
 42 | #endif
 43 | #endif
 44 | 
 45 | #ifdef __cplusplus
 46 | #  define EXTERN_C extern "C"
 47 | #else
 48 | #  define EXTERN_C
 49 | #endif
 50 | 
 51 | #ifdef AVS_WINDOWS
 52 | #ifdef BUILDING_AVSCORE
 53 | #  if defined(GCC) && defined(X86_32)
 54 | #    define AVSC_CC
 55 | #  else // MSVC builds and 64-bit GCC
 56 | #    ifndef AVSC_USE_STDCALL
 57 | #      define AVSC_CC __cdecl
 58 | #    else
 59 | #      define AVSC_CC __stdcall
 60 | #    endif
 61 | #  endif
 62 | #else // needed for programs that talk to AviSynth+
 63 | #  ifndef AVSC_WIN32_GCC32 // see comment below
 64 | #    ifndef AVSC_USE_STDCALL
 65 | #      define AVSC_CC __cdecl
 66 | #    else
 67 | #      define AVSC_CC __stdcall
 68 | #    endif
 69 | #  else
 70 | #    define AVSC_CC
 71 | #  endif
 72 | #endif
 73 | #  else
 74 | #    define AVSC_CC
 75 | #endif
 76 | 
 77 | // On 64-bit Windows, there's only one calling convention,
 78 | // so there is no difference between MSVC and GCC. On 32-bit,
 79 | // this isn't true. The convention that GCC needs to use to
 80 | // even build AviSynth+ as 32-bit makes anything that uses
 81 | // it incompatible with 32-bit MSVC builds of AviSynth+.
 82 | // The AVSC_WIN32_GCC32 define is meant to provide a user
 83 | // switchable way to make builds of FFmpeg to test 32-bit
 84 | // GCC builds of AviSynth+ without having to screw around
 85 | // with alternate headers, while still default to the usual
 86 | // situation of using 32-bit MSVC builds of AviSynth+.
 87 | 
 88 | // Hopefully, this situation will eventually be resolved
 89 | // and a broadly compatible solution will arise so the
 90 | // same 32-bit FFmpeg build can handle either MSVC or GCC
 91 | // builds of AviSynth+.
 92 | 
 93 | #define AVSC_INLINE static __inline
 94 | 
 95 | #ifdef BUILDING_AVSCORE
 96 | #ifdef AVS_WINDOWS
 97 | #  ifndef AVS_STATIC_LIB
 98 | #    define AVSC_EXPORT __declspec(dllexport)
 99 | #  else
100 | #    define AVSC_EXPORT
101 | #  endif
102 | #  define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name
103 | #else
104 | #  define AVSC_EXPORT EXTERN_C
105 | #  define AVSC_API(ret, name) EXTERN_C ret AVSC_CC name
106 | #endif
107 | #else
108 | #  define AVSC_EXPORT EXTERN_C __declspec(dllexport)
109 | #  ifndef AVS_STATIC_LIB
110 | #    define AVSC_IMPORT __declspec(dllimport)
111 | #  else
112 | #    define AVSC_IMPORT
113 | #  endif
114 | #  ifndef AVSC_NO_DECLSPEC
115 | #    define AVSC_API(ret, name) EXTERN_C AVSC_IMPORT ret AVSC_CC name
116 | #  else
117 | #    define AVSC_API(ret, name) typedef ret (AVSC_CC *name##_func)
118 | #  endif
119 | #endif
120 | 
121 | #endif //AVS_CAPI_H
122 | 


--------------------------------------------------------------------------------
/include/vapoursynth/VSScript.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | * Copyright (c) 2013-2018 Fredrik Mellbin
 3 | *
 4 | * This file is part of VapourSynth.
 5 | *
 6 | * VapourSynth is free software; you can redistribute it and/or
 7 | * modify it under the terms of the GNU Lesser General Public
 8 | * License as published by the Free Software Foundation; either
 9 | * version 2.1 of the License, or (at your option) any later version.
10 | *
11 | * VapourSynth is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 | * Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public
17 | * License along with VapourSynth; if not, write to the Free Software
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 | */
20 | 
21 | #ifndef VSSCRIPT_H
22 | #define VSSCRIPT_H
23 | 
24 | #include "VapourSynth.h"
25 | 
26 | #define VSSCRIPT_API_MAJOR 3
27 | #define VSSCRIPT_API_MINOR 2
28 | #define VSSCRIPT_API_VERSION ((VSSCRIPT_API_MAJOR << 16) | (VSSCRIPT_API_MINOR))
29 | 
30 | /* As of api 3.2 all functions are threadsafe */
31 | 
32 | typedef struct VSScript VSScript;
33 | 
34 | typedef enum VSEvalFlags {
35 |     efSetWorkingDir = 1,
36 | } VSEvalFlags;
37 | 
38 | /* Get the api version */
39 | VS_API(int) vsscript_getApiVersion(void); /* api 3.1 */
40 | 
41 | /* Initialize the available scripting runtimes, returns zero on failure */
42 | VS_API(int) vsscript_init(void);
43 | 
44 | /* Free all scripting runtimes */
45 | VS_API(int) vsscript_finalize(void);
46 | 
47 | /*
48 | * Pass a pointer to a null handle to create a new one
49 | * The values returned by the query functions are only valid during the lifetime of the VSScript
50 | * scriptFilename is if the error message should reference a certain file, NULL allowed in vsscript_evaluateScript()
51 | * core is to pass in an already created instance so that mixed environments can be used,
52 | * NULL creates a new core that can be fetched with vsscript_getCore() later OR implicitly uses the one associated with an already existing handle when passed
53 | * If efSetWorkingDir is passed to flags the current working directory will be changed to the path of the script
54 | * note that if scriptFilename is NULL in vsscript_evaluateScript() then __file__ won't be set and the working directory won't be changed
55 | * Set efSetWorkingDir to get the default and recommended behavior
56 | */
57 | VS_API(int) vsscript_evaluateScript(VSScript **handle, const char *script, const char *scriptFilename, int flags);
58 | /* Convenience version of the above function that loads the script from a file */
59 | VS_API(int) vsscript_evaluateFile(VSScript **handle, const char *scriptFilename, int flags);
60 | /* Create an empty environment for use in later invocations, mostly useful to set script variables before execution */
61 | VS_API(int) vsscript_createScript(VSScript **handle);
62 | 
63 | VS_API(void) vsscript_freeScript(VSScript *handle);
64 | VS_API(const char *) vsscript_getError(VSScript *handle);
65 | /* The node returned must be freed using freeNode() before calling vsscript_freeScript() */
66 | VS_API(VSNodeRef *) vsscript_getOutput(VSScript *handle, int index);
67 | /* Both nodes returned must be freed using freeNode() before calling vsscript_freeScript(), the alpha node pointer will only be set if an alpha clip has been set in the script */
68 | VS_API(VSNodeRef *) vsscript_getOutput2(VSScript *handle, int index, VSNodeRef **alpha); /* api 3.1 */
69 | /* Unset an output index */
70 | VS_API(int) vsscript_clearOutput(VSScript *handle, int index);
71 | /* The core is valid as long as the environment exists */
72 | VS_API(VSCore *) vsscript_getCore(VSScript *handle);
73 | /* Convenience function for retrieving a vsapi pointer */
74 | VS_API(const VSAPI *) vsscript_getVSApi(void); /* deprecated as of api 3.2 since it's impossible to tell the api version supported */
75 | VS_API(const VSAPI *) vsscript_getVSApi2(int version); /* api 3.2, generally you should pass VAPOURSYNTH_API_VERSION */
76 | 
77 | /* Variables names that are not set or not of a convertible type will return an error */
78 | VS_API(int) vsscript_getVariable(VSScript *handle, const char *name, VSMap *dst);
79 | VS_API(int) vsscript_setVariable(VSScript *handle, const VSMap *vars);
80 | VS_API(int) vsscript_clearVariable(VSScript *handle, const char *name);
81 | /* Tries to clear everything set in an environment, normally it is better to simply free an environment completely and create a new one */
82 | VS_API(void) vsscript_clearEnvironment(VSScript *handle);
83 | 
84 | #endif /* VSSCRIPT_H */
85 | 


--------------------------------------------------------------------------------
/src/impl_dispatch_decl.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "core.h"
  4 | 
  5 | #ifdef IMPL_DISPATCH_IMPORT_DECLARATION
  6 | 
  7 | #define DEFINE_IMPL(n, \
  8 | 					nullptr, \
  9 | 					impl_func_mode1_blur, \
 10 | 					impl_func_mode1_noblur, \
 11 | 					impl_func_mode2_blur, \
 12 | 					impl_func_mode2_noblur, \
 13 | 					impl_func_mode3_blur, \
 14 | 					impl_func_mode3_noblur, \
 15 | 					impl_func_mode4_blur, \
 16 | 					impl_func_mode4_noblur, \
 17 | 					impl_func_mode5_blur, \
 18 | 					impl_func_mode5_noblur, \
 19 | 					impl_func_mode6_blur, \
 20 | 					impl_func_mode6_noblur, \
 21 | 					impl_func_mode7_blur, \
 22 | 					impl_func_mode7_noblur) \
 23 | 	extern const process_plane_impl_t process_plane_impl_##n [];
 24 | 
 25 | #else
 26 | 
 27 | #define DEFINE_IMPL(n, \
 28 | 					nullptr, \
 29 | 					impl_func_mode1_blur, \
 30 | 					impl_func_mode1_noblur, \
 31 | 					impl_func_mode2_blur, \
 32 | 					impl_func_mode2_noblur, \
 33 | 					impl_func_mode3_blur, \
 34 | 					impl_func_mode3_noblur, \
 35 | 					impl_func_mode4_blur, \
 36 | 					impl_func_mode4_noblur, \
 37 | 					impl_func_mode5_blur, \
 38 | 					impl_func_mode5_noblur, \
 39 | 					impl_func_mode6_blur, \
 40 | 				    impl_func_mode6_noblur, \
 41 | 					impl_func_mode7_blur, \
 42 | 				    impl_func_mode7_noblur) \
 43 | 	extern const process_plane_impl_t process_plane_impl_##n [] = { \
 44 | 					nullptr, \
 45 | 					impl_func_mode1_blur, \
 46 | 					impl_func_mode1_noblur, \
 47 | 					impl_func_mode2_blur, \
 48 | 					impl_func_mode2_noblur, \
 49 | 					impl_func_mode3_blur, \
 50 | 					impl_func_mode3_noblur, \
 51 | 					impl_func_mode4_blur, \
 52 | 					impl_func_mode4_noblur, \
 53 | 					impl_func_mode5_blur, \
 54 | 					impl_func_mode5_noblur, \
 55 | 					impl_func_mode6_blur, \
 56 | 					impl_func_mode6_noblur, \
 57 | 					impl_func_mode7_blur, \
 58 | 					impl_func_mode7_noblur};
 59 | 
 60 | #endif
 61 | 
 62 | 
 63 | #define DEFINE_TEMPLATE_IMPL(name, impl_func, ...) \
 64 | 	DEFINE_IMPL(name, \
 65 | 				(nullptr), \
 66 | 				(&impl_func<1, true, __VA_ARGS__>), \
 67 | 				(&impl_func<1, false, __VA_ARGS__>), \
 68 | 				(&impl_func<2, true, __VA_ARGS__>), \
 69 | 				(&impl_func<2, false, __VA_ARGS__>), \
 70 | 				(&impl_func<3, true, __VA_ARGS__>), \
 71 | 				(&impl_func<3, false, __VA_ARGS__>), \
 72 | 				(&impl_func<4, true, __VA_ARGS__>), \
 73 | 				(&impl_func<4, false, __VA_ARGS__>), \
 74 | 				(&impl_func<5, true, __VA_ARGS__>), \
 75 | 				(&impl_func<5, false, __VA_ARGS__>), \
 76 | 				(&impl_func<6, true, __VA_ARGS__>), \
 77 | 				(&impl_func<6, false, __VA_ARGS__>), \
 78 | 				(&impl_func<7, true, __VA_ARGS__>), \
 79 | 				(&impl_func<7, false, __VA_ARGS__>) );
 80 | 
 81 | #define DEFINE_SSE_IMPL(name, ...) \
 82 | 	DEFINE_TEMPLATE_IMPL(name, process_plane_sse_impl, __VA_ARGS__);
 83 | 
 84 | 
 85 | #if defined(IMPL_DISPATCH_IMPORT_DECLARATION) || defined(DECLARE_IMPL_C)
 86 | 	DEFINE_TEMPLATE_IMPL(c_high_no_dithering, process_plane_plainc, DA_HIGH_NO_DITHERING);
 87 | 	DEFINE_TEMPLATE_IMPL(c_high_ordered_dithering, process_plane_plainc, DA_HIGH_ORDERED_DITHERING);
 88 | 	DEFINE_TEMPLATE_IMPL(c_high_floyd_steinberg_dithering, process_plane_plainc, DA_HIGH_FLOYD_STEINBERG_DITHERING);
 89 | 	DEFINE_TEMPLATE_IMPL(c_16bit_interleaved, process_plane_plainc, DA_16BIT_INTERLEAVED);
 90 | #endif
 91 | 
 92 | 
 93 | #if defined(IMPL_DISPATCH_IMPORT_DECLARATION) || defined(DECLARE_IMPL_SSE4)
 94 | 	DEFINE_SSE_IMPL(sse4_high_no_dithering, DA_HIGH_NO_DITHERING);
 95 | 	DEFINE_SSE_IMPL(sse4_high_ordered_dithering, DA_HIGH_ORDERED_DITHERING);
 96 | 	DEFINE_SSE_IMPL(sse4_high_floyd_steinberg_dithering, DA_HIGH_FLOYD_STEINBERG_DITHERING);
 97 | 	DEFINE_SSE_IMPL(sse4_16bit_interleaved, DA_16BIT_INTERLEAVED);
 98 | #endif
 99 | 
100 | #if defined(IMPL_DISPATCH_IMPORT_DECLARATION) || defined(DECLARE_IMPL_AVX2)
101 | #define DEFINE_AVX2_IMPL(name, ...) \
102 | 		DEFINE_TEMPLATE_IMPL(name, process_plane_avx2_impl, __VA_ARGS__);
103 | 	DEFINE_AVX2_IMPL(avx2_high_no_dithering, DA_HIGH_NO_DITHERING);
104 | 	DEFINE_AVX2_IMPL(avx2_high_ordered_dithering, DA_HIGH_ORDERED_DITHERING);
105 | 	DEFINE_AVX2_IMPL(avx2_high_floyd_steinberg_dithering, DA_HIGH_FLOYD_STEINBERG_DITHERING);
106 | 	DEFINE_AVX2_IMPL(avx2_16bit_interleaved, DA_16BIT_INTERLEAVED);
107 | #endif
108 | 
109 | #if defined(IMPL_DISPATCH_IMPORT_DECLARATION) || defined(DECLARE_IMPL_AVX512)
110 | #define DEFINE_AVX512_IMPL(name, ...) \
111 | 		DEFINE_TEMPLATE_IMPL(name, process_plane_avx512_impl, __VA_ARGS__);
112 | 	DEFINE_AVX512_IMPL(avx512_high_no_dithering, DA_HIGH_NO_DITHERING);
113 | 	DEFINE_AVX512_IMPL(avx512_high_ordered_dithering, DA_HIGH_ORDERED_DITHERING);
114 | 	DEFINE_AVX512_IMPL(avx512_high_floyd_steinberg_dithering, DA_HIGH_FLOYD_STEINBERG_DITHERING);
115 | 	DEFINE_AVX512_IMPL(avx512_16bit_interleaved, DA_16BIT_INTERLEAVED);
116 | #endif
117 | 


--------------------------------------------------------------------------------
/include/avisynth/avs/alignment.h:
--------------------------------------------------------------------------------
  1 | // Avisynth C Interface Version 0.20
  2 | // Copyright 2003 Kevin Atkinson
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // As a special exception, I give you permission to link to the
 20 | // Avisynth C interface with independent modules that communicate with
 21 | // the Avisynth C interface solely through the interfaces defined in
 22 | // avisynth_c.h, regardless of the license terms of these independent
 23 | // modules, and to copy and distribute the resulting combined work
 24 | // under terms of your choice, provided that every copy of the
 25 | // combined work is accompanied by a complete copy of the source code
 26 | // of the Avisynth C interface and Avisynth itself (with the version
 27 | // used to produce the combined work), being distributed under the
 28 | // terms of the GNU General Public License plus this exception.  An
 29 | // independent module is a module which is not derived from or based
 30 | // on Avisynth C Interface, such as 3rd-party filters, import and
 31 | // export plugins, or graphical user interfaces.
 32 | 
 33 | #ifndef AVS_ALIGNMENT_H
 34 | #define AVS_ALIGNMENT_H
 35 | 
 36 | // Functions and macros to help work with alignment requirements.
 37 | 
 38 | // Tells if a number is a power of two.
 39 | #define IS_POWER2(n) ((n) && !((n) & ((n) - 1)))
 40 | 
 41 | // Tells if the pointer "ptr" is aligned to "align" bytes.
 42 | #define IS_PTR_ALIGNED(ptr, align) (((uintptr_t)ptr & ((uintptr_t)(align-1))) == 0)
 43 | 
 44 | // Rounds up the number "n" to the next greater multiple of "align"
 45 | #define ALIGN_NUMBER(n, align) (((n) + (align)-1) & (~((align)-1)))
 46 | 
 47 | // Rounds up the pointer address "ptr" to the next greater multiple of "align"
 48 | #define ALIGN_POINTER(ptr, align) (((uintptr_t)(ptr) + (align)-1) & (~(uintptr_t)((align)-1)))
 49 | 
 50 | #ifdef __cplusplus
 51 | 
 52 | #include <cassert>
 53 | #include <cstdlib>
 54 | #include <cstdint>
 55 | #include "config.h"
 56 | 
 57 | #if defined(MSVC) && _MSC_VER<1400
 58 |     // needed for VS2013, otherwise C++11 'alignas' works
 59 |     #define avs_alignas(x) __declspec(align(x))
 60 | #else
 61 |     // assumes C++11 support
 62 |     #define avs_alignas(x) alignas(x)
 63 | #endif
 64 | 
 65 | template<typename T>
 66 | static bool IsPtrAligned(T* ptr, size_t align)
 67 | {
 68 |   assert(IS_POWER2(align));
 69 |   return (bool)IS_PTR_ALIGNED(ptr, align);
 70 | }
 71 | 
 72 | template<typename T>
 73 | static T AlignNumber(T n, T align)
 74 | {
 75 |   assert(IS_POWER2(align));
 76 |   return ALIGN_NUMBER(n, align);
 77 | }
 78 | 
 79 | template<typename T>
 80 | static T* AlignPointer(T* ptr, size_t align)
 81 | {
 82 |   assert(IS_POWER2(align));
 83 |   return (T*)ALIGN_POINTER(ptr, align);
 84 | }
 85 | 
 86 | extern "C"
 87 | {
 88 | #else
 89 | #include <stdlib.h>
 90 | #endif  // __cplusplus
 91 | 
 92 | // Returns a new buffer that is at least the size "nbytes".
 93 | // The buffer will be aligned to "align" bytes.
 94 | // Returns NULL on error. On successful allocation,
 95 | // the returned buffer must be freed using "avs_free".
 96 | inline void* avs_malloc(size_t nbytes, size_t align)
 97 | {
 98 |   if (!IS_POWER2(align))
 99 |     return NULL;
100 | 
101 |   size_t offset = sizeof(void*) + align - 1;
102 | 
103 |   void *orig = malloc(nbytes + offset);
104 |   if (orig == NULL)
105 |    return NULL;
106 | 
107 |   void **aligned = (void**)(((uintptr_t)orig + (uintptr_t)offset) & (~(uintptr_t)(align-1)));
108 |   aligned[-1] = orig;
109 |   return aligned;
110 | }
111 | 
112 | // Buffers allocated using "avs_malloc" must be freed
113 | // using "avs_free" instead of "free".
114 | inline void avs_free(void *ptr)
115 | {
116 |   // Mirroring free()'s semantic requires us to accept NULLs
117 |   if (ptr == NULL)
118 |     return;
119 | 
120 |   free(((void**)ptr)[-1]);
121 | }
122 | 
123 | #ifdef __cplusplus
124 | } // extern "C"
125 | 
126 | // The point of these undef's is to force using the template functions
127 | // if we are in C++ mode. For C, the user can rely only on the macros.
128 | #undef IS_PTR_ALIGNED
129 | #undef ALIGN_NUMBER
130 | #undef ALIGN_POINTER
131 | 
132 | #endif  // __cplusplus
133 | 
134 | #endif  //AVS_ALIGNMENT_H
135 | 


--------------------------------------------------------------------------------
/src/pixel_proc_c_high_f_s_dithering.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <math.h>
  4 | #include <string.h>
  5 | 
  6 | #include "impl_dispatch.h"
  7 | 
  8 | namespace pixel_proc_high_f_s_dithering {
  9 |     
 10 | // #define DUMP_DATA
 11 | 
 12 |     typedef unsigned short ERROR_TYPE;
 13 | 
 14 |     typedef struct _context_t
 15 |     {
 16 |         int output_depth;
 17 |         ERROR_TYPE* error_buffer;
 18 |         bool buffer_needs_dealloc;
 19 |         ERROR_TYPE* current_px_error;
 20 |         int row_pitch;
 21 |         int frame_width;
 22 |         int processed_pixels_in_current_line;
 23 | #ifdef DUMP_DATA
 24 |         FILE* debug_dump_fd[3];
 25 | #endif
 26 |     } context_t;
 27 | 
 28 |     static inline void init_context(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth)
 29 |     {
 30 |         context_t* ctx = (context_t*)context_buffer;
 31 |         int ctx_size = sizeof(context_t);
 32 |         memset(ctx, 0, ctx_size);
 33 | 
 34 |         // additional 2 bytes are placed at the beginning and the end
 35 |         int size_needed = (frame_width + 2) * 2 * sizeof(ERROR_TYPE);
 36 |         if (CONTEXT_BUFFER_SIZE - ctx_size < size_needed)
 37 |         {
 38 |             ctx->error_buffer = (ERROR_TYPE*)malloc(size_needed);
 39 |             ctx->buffer_needs_dealloc = true;
 40 |         } else {
 41 |             ctx->error_buffer = (ERROR_TYPE*)(context_buffer + ctx_size);
 42 |         }
 43 |         memset(ctx->error_buffer, 0, size_needed);
 44 |         ctx->current_px_error = ctx->error_buffer + 1;
 45 |         ctx->row_pitch = frame_width + 2;
 46 |         ctx->frame_width = frame_width;
 47 |         ctx->output_depth = output_depth;
 48 | 
 49 | #ifdef DUMP_DATA
 50 |         char file_name[256];
 51 |         sprintf(file_name, "fsdither_dump_stage0_%d", frame_width);
 52 |         ctx->debug_dump_fd[0] = fopen(file_name, "wb");
 53 |         sprintf(file_name, "fsdither_dump_stage1_%d", frame_width);
 54 |         ctx->debug_dump_fd[1] = fopen(file_name, "wb");
 55 |         sprintf(file_name, "fsdither_dump_stage2_%d", frame_width);
 56 |         ctx->debug_dump_fd[2] = fopen(file_name, "wb");
 57 | #endif
 58 |     }
 59 | 
 60 |     static inline void destroy_context(void* context)
 61 |     {
 62 |         context_t* ctx = (context_t*)context;
 63 |         if (ctx->buffer_needs_dealloc)
 64 |         {
 65 |             free(ctx->error_buffer);
 66 |             ctx->error_buffer = NULL;
 67 |         }
 68 | #ifdef DUMP_DATA
 69 |         for (int i = 0; i < sizeof(ctx->debug_dump_fd) / sizeof(FILE*); i++)
 70 |         {
 71 |             if (ctx->debug_dump_fd[i])
 72 |             {
 73 |                 fclose(ctx->debug_dump_fd[i]);
 74 |             }
 75 |         }
 76 | #endif
 77 |     }
 78 | 
 79 |     static __forceinline void next_pixel(void* context)
 80 |     {
 81 |         context_t* ctx = (context_t*)context;
 82 |         ctx->current_px_error++;
 83 |         ctx->processed_pixels_in_current_line++;
 84 |     }
 85 | 
 86 |     static __forceinline void next_row(void* context)
 87 |     {
 88 |         context_t* ctx = (context_t*)context;
 89 |         ctx->row_pitch = -ctx->row_pitch;
 90 |         ctx->current_px_error = ctx->error_buffer + (ctx->row_pitch >> 31) * ctx->row_pitch;
 91 |         memset(ctx->current_px_error + ctx->row_pitch, 0, abs(ctx->row_pitch) * sizeof(ERROR_TYPE));
 92 |         ctx->current_px_error++;
 93 |         ctx->processed_pixels_in_current_line = 0;
 94 |     }
 95 | 
 96 |     static __forceinline int dither(void* context, int pixel, int row, int column);
 97 | 
 98 |     #include "pixel_proc_c_high_bit_depth_common.h"
 99 | 
100 |     static const int PIXEL_MAX = ( ( 1 << (INTERNAL_BIT_DEPTH) ) - 1 );
101 |     static const int PIXEL_MIN = 0;
102 | 
103 |     static __forceinline int dither(void* context, int pixel, int row, int column)
104 |     {
105 |         context_t* ctx = (context_t*)context;
106 |         if (ctx->processed_pixels_in_current_line >= ctx->frame_width)
107 |         {
108 |             // outside plane, can occur in SSE code
109 |             return pixel;
110 |         }
111 | #ifndef FS_DITHER_SKIP_PRE_CLAMP
112 |         pixel = clamp_pixel(pixel, PIXEL_MIN, PIXEL_MAX);
113 | #endif
114 | #ifdef DUMP_DATA
115 |         fwrite(&pixel, 4, 1, ctx->debug_dump_fd[0]);
116 | #endif
117 |         pixel += *(ctx->current_px_error);
118 | #ifdef DUMP_DATA
119 |         fwrite(&pixel, 4, 1, ctx->debug_dump_fd[1]);
120 | #endif
121 |         pixel = clamp_pixel(pixel, PIXEL_MIN, PIXEL_MAX);
122 | #ifdef DUMP_DATA
123 |         fwrite(&pixel, 4, 1, ctx->debug_dump_fd[2]);
124 | #endif
125 |         int new_error = pixel & ( ( 1 << (INTERNAL_BIT_DEPTH - ctx->output_depth) ) - 1 );
126 |         *(ctx->current_px_error + 1) += (new_error * 7) >> 4;
127 |         *(ctx->current_px_error + ctx->row_pitch - 1) += (new_error * 3) >> 4;
128 |         *(ctx->current_px_error + ctx->row_pitch) += (new_error * 5) >> 4;
129 |         *(ctx->current_px_error + ctx->row_pitch + 1) += (new_error * 1) >> 4;
130 |         return pixel;
131 |     }
132 | 
133 | 
134 | };


--------------------------------------------------------------------------------
/src/cpuid.cpp:
--------------------------------------------------------------------------------
  1 | // Avisynth v1.0 beta.  Copyright 2000 Ben Rudiak-Gould.
  2 | // http://www.math.berkeley.edu/~benrg/avisynth.html
  3 | 
  4 | //	VirtualDub - Video processing and capture application
  5 | //	Copyright (C) 1998-2000 Avery Lee
  6 | //
  7 | //	This program is free software; you can redistribute it and/or modify
  8 | //	it under the terms of the GNU General Public License as published by
  9 | //	the Free Software Foundation; either version 2 of the License, or
 10 | //	(at your option) any later version.
 11 | //
 12 | //	This program is distributed in the hope that it will be useful,
 13 | //	but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | //	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 | //	GNU General Public License for more details.
 16 | //
 17 | //	You should have received a copy of the GNU General Public License
 18 | //	along with this program; if not, write to the Free Software
 19 | //	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 20 | 
 21 | #include <avs/cpuid.h>
 22 | #include <avs/config.h>
 23 | #include <stdint.h>
 24 | 
 25 | #ifdef AVS_WINDOWS
 26 | #include <intrin.h>
 27 | #elif defined(__x86_64__) || defined(__i386__)
 28 | #include <cpuid.h>
 29 | #include <x86intrin.h>
 30 | #undef __cpuid
 31 | 
 32 | static inline void __cpuid(int cpuinfo[4], int leaf) {
 33 |   unsigned int eax, ebx, ecx, edx;
 34 |   __get_cpuid_count(leaf, 0, &eax, &ebx, &ecx, &edx);
 35 |   cpuinfo[0] = eax;
 36 |   cpuinfo[1] = ebx;
 37 |   cpuinfo[2] = ecx;
 38 |   cpuinfo[3] = edx;
 39 | }
 40 | #endif
 41 | 
 42 | #define IS_BIT_SET(bitfield, bit) ((bitfield) & (1<<(bit)) ? true : false)
 43 | 
 44 | #if defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86)
 45 | static uint32_t get_xcr0()
 46 | {
 47 |     uint32_t xcr0;
 48 | #if defined(GCC) || defined(CLANG)
 49 |     __asm__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
 50 | #else
 51 |     xcr0 = (uint32_t)_xgetbv(0);
 52 | #endif
 53 |     return xcr0;
 54 | }
 55 | #endif
 56 | 
 57 | static int CPUCheckForExtensions()
 58 | {
 59 |   int result = 0;
 60 | 
 61 | #if defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86)
 62 |   int cpuinfo[4];
 63 |   __cpuid(cpuinfo, 1);
 64 | 
 65 |   if (IS_BIT_SET(cpuinfo[3], 0))
 66 |     result |= CPUF_FPU;
 67 |   if (IS_BIT_SET(cpuinfo[3], 23))
 68 |     result |= CPUF_MMX;
 69 |   if (IS_BIT_SET(cpuinfo[3], 25))
 70 |     result |= CPUF_SSE | CPUF_INTEGER_SSE;
 71 |   if (IS_BIT_SET(cpuinfo[3], 26))
 72 |     result |= CPUF_SSE2;
 73 |   if (IS_BIT_SET(cpuinfo[2], 0))
 74 |     result |= CPUF_SSE3;
 75 |   if (IS_BIT_SET(cpuinfo[2], 9))
 76 |     result |= CPUF_SSSE3;
 77 |   if (IS_BIT_SET(cpuinfo[2], 19))
 78 |     result |= CPUF_SSE4_1;
 79 |   if (IS_BIT_SET(cpuinfo[2], 20))
 80 |     result |= CPUF_SSE4_2;
 81 |   if (IS_BIT_SET(cpuinfo[2], 22))
 82 |     result |= CPUF_MOVBE;
 83 |   if (IS_BIT_SET(cpuinfo[2], 23))
 84 |     result |= CPUF_POPCNT;
 85 |   if (IS_BIT_SET(cpuinfo[2], 25))
 86 |     result |= CPUF_AES;
 87 |   if (IS_BIT_SET(cpuinfo[2], 29))
 88 |     result |= CPUF_F16C;
 89 | 
 90 |   bool xgetbv_supported = IS_BIT_SET(cpuinfo[2], 27);
 91 |   bool avx_supported = IS_BIT_SET(cpuinfo[2], 28);
 92 |   if (xgetbv_supported && avx_supported)
 93 |   {
 94 |     uint32_t xgetbv0_32 = get_xcr0();
 95 |     if ((xgetbv0_32 & 0x6u) == 0x6u) {
 96 |       result |= CPUF_AVX;
 97 |       if (IS_BIT_SET(cpuinfo[2], 12))
 98 |         result |= CPUF_FMA3;
 99 |       __cpuid(cpuinfo, 7);
100 |       if (IS_BIT_SET(cpuinfo[1], 5))
101 |         result |= CPUF_AVX2;
102 |     }
103 |     if((xgetbv0_32 & (0x7u << 5)) && (xgetbv0_32 & (0x3u << 1))) {
104 |       __cpuid(cpuinfo, 7);
105 |       if (IS_BIT_SET(cpuinfo[1], 16))
106 |         result |= CPUF_AVX512F;
107 |       if (IS_BIT_SET(cpuinfo[1], 17))
108 |         result |= CPUF_AVX512DQ;
109 |       if (IS_BIT_SET(cpuinfo[1], 21))
110 |         result |= CPUF_AVX512IFMA;
111 |       if (IS_BIT_SET(cpuinfo[1], 26))
112 |         result |= CPUF_AVX512PF;
113 |       if (IS_BIT_SET(cpuinfo[1], 27))
114 |         result |= CPUF_AVX512ER;
115 |       if (IS_BIT_SET(cpuinfo[1], 28))
116 |         result |= CPUF_AVX512CD;
117 |       if (IS_BIT_SET(cpuinfo[1], 30))
118 |         result |= CPUF_AVX512BW;
119 |       if (IS_BIT_SET(cpuinfo[1], 31))
120 |         result |= CPUF_AVX512VL;
121 |       if (IS_BIT_SET(cpuinfo[2], 1))
122 |         result |= CPUF_AVX512VBMI;
123 |     }
124 |   }
125 | 
126 |   __cpuid(cpuinfo, 0x80000000);
127 |   if (cpuinfo[0] >= 0x80000001)
128 |   {
129 |     __cpuid(cpuinfo, 0x80000001);
130 | 
131 |     if (IS_BIT_SET(cpuinfo[3], 31))
132 |       result |= CPUF_3DNOW;
133 | 
134 |     if (IS_BIT_SET(cpuinfo[3], 30))
135 |       result |= CPUF_3DNOW_EXT;
136 | 
137 |     if (IS_BIT_SET(cpuinfo[3], 22))
138 |       result |= CPUF_INTEGER_SSE;
139 | 
140 |     if (result & CPUF_AVX) {
141 |       if (IS_BIT_SET(cpuinfo[2], 16))
142 |         result |= CPUF_FMA4;
143 |     }
144 |   }
145 | #elif defined(__aarch64__) || defined(__arm__)
146 |   result |= CPUF_SSE | CPUF_SSE2 | CPUF_SSE3 | CPUF_SSSE3;
147 |   result |= CPUF_SSE4_1 | CPUF_SSE4_2 | CPUF_AES;
148 | #endif
149 | 
150 |   return result;
151 | }
152 | 
153 | int GetCPUFlags() {
154 |   static int lCPUExtensionsAvailable = CPUCheckForExtensions();
155 |   return lCPUExtensionsAvailable;
156 | }


--------------------------------------------------------------------------------
/include/avisynth/avs/posix.h:
--------------------------------------------------------------------------------
  1 | // This program is free software; you can redistribute it and/or modify
  2 | // it under the terms of the GNU General Public License as published by
  3 | // the Free Software Foundation; either version 2 of the License, or
  4 | // (at your option) any later version.
  5 | //
  6 | // This program is distributed in the hope that it will be useful,
  7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
  8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  9 | // GNU General Public License for more details.
 10 | //
 11 | // You should have received a copy of the GNU General Public License
 12 | // along with this program; if not, write to the Free Software
 13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 14 | // http://www.gnu.org/copyleft/gpl.html .
 15 | //
 16 | // Linking Avisynth statically or dynamically with other modules is making a
 17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
 18 | // General Public License cover the whole combination.
 19 | //
 20 | // As a special exception, the copyright holders of Avisynth give you
 21 | // permission to link Avisynth with independent modules that communicate with
 22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
 23 | // terms of these independent modules, and to copy and distribute the
 24 | // resulting combined work under terms of your choice, provided that
 25 | // every copy of the combined work is accompanied by a complete copy of
 26 | // the source code of Avisynth (the version of Avisynth used to produce the
 27 | // combined work), being distributed under the terms of the GNU General
 28 | // Public License plus this exception.  An independent module is a module
 29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
 30 | // import and export plugins, or graphical user interfaces.
 31 | 
 32 | #ifdef AVS_POSIX
 33 | #ifndef AVSCORE_POSIX_H
 34 | #define AVSCORE_POSIX_H
 35 | 
 36 | #ifdef __cplusplus
 37 | #include <cstring>
 38 | #endif
 39 | #include <strings.h>
 40 | #include <unistd.h>
 41 | 
 42 | // Define these MSVC-extension used in Avisynth
 43 | #define __single_inheritance
 44 | 
 45 | // These things don't exist in Linux
 46 | #if defined(AVS_HAIKU)
 47 | #undef __declspec
 48 | #endif
 49 | #define __declspec(x)
 50 | #define lstrlen strlen
 51 | #define lstrcmp strcmp
 52 | #define lstrcmpi strcasecmp
 53 | #define _stricmp strcasecmp
 54 | #define _strnicmp strncasecmp
 55 | #define _strdup strdup
 56 | #define SetCurrentDirectory(x) chdir(x)
 57 | #define SetCurrentDirectoryW(x) chdir(x)
 58 | #define GetCurrentDirectoryW(x) getcwd(x)
 59 | #define _putenv putenv
 60 | #define _alloca alloca
 61 | 
 62 | // Borrowing some compatibility macros from AvxSynth, slightly modified
 63 | #define UInt32x32To64(a, b) ((uint64_t)(((uint64_t)((uint32_t)(a))) * ((uint32_t)(b))))
 64 | #define Int64ShrlMod32(a, b) ((uint64_t)((uint64_t)(a) >> (b)))
 65 | #define Int32x32To64(a, b)  ((int64_t)(((int64_t)((long)(a))) * ((long)(b))))
 66 | 
 67 | #define InterlockedIncrement(x) __sync_add_and_fetch((x), 1)
 68 | #define InterlockedDecrement(x) __sync_sub_and_fetch((x), 1)
 69 | #define MulDiv(nNumber, nNumerator, nDenominator)   (int32_t) (((int64_t) (nNumber) * (int64_t) (nNumerator) + (int64_t) ((nDenominator)/2)) / (int64_t) (nDenominator))
 70 | 
 71 | #ifndef TRUE
 72 | #define TRUE  true
 73 | #endif
 74 | 
 75 | #ifndef FALSE
 76 | #define FALSE false
 77 | #endif
 78 | 
 79 | #define S_FALSE       (0x00000001)
 80 | #define E_FAIL        (0x80004005)
 81 | #define FAILED(hr)    ((hr) & 0x80000000)
 82 | #define SUCCEEDED(hr) (!FAILED(hr))
 83 | 
 84 | // Statuses copied from comments in exception.cpp
 85 | #define STATUS_GUARD_PAGE_VIOLATION 0x80000001
 86 | #define STATUS_DATATYPE_MISALIGNMENT 0x80000002
 87 | #define STATUS_BREAKPOINT 0x80000003
 88 | #define STATUS_SINGLE_STEP 0x80000004
 89 | #define STATUS_ACCESS_VIOLATION 0xc0000005
 90 | #define STATUS_IN_PAGE_ERROR 0xc0000006
 91 | #define STATUS_INVALID_HANDLE 0xc0000008
 92 | #define STATUS_NO_MEMORY 0xc0000017
 93 | #define STATUS_ILLEGAL_INSTRUCTION 0xc000001d
 94 | #define STATUS_NONCONTINUABLE_EXCEPTION 0xc0000025
 95 | #define STATUS_INVALID_DISPOSITION 0xc0000026
 96 | #define STATUS_ARRAY_BOUNDS_EXCEEDED 0xc000008c
 97 | #define STATUS_FLOAT_DENORMAL_OPERAND 0xc000008d
 98 | #define STATUS_FLOAT_DIVIDE_BY_ZERO 0xc000008e
 99 | #define STATUS_FLOAT_INEXACT_RESULT 0xc000008f
100 | #define STATUS_FLOAT_INVALID_OPERATION 0xc0000090
101 | #define STATUS_FLOAT_OVERFLOW 0xc0000091
102 | #define STATUS_FLOAT_STACK_CHECK 0xc0000092
103 | #define STATUS_FLOAT_UNDERFLOW 0xc0000093
104 | #define STATUS_INTEGER_DIVIDE_BY_ZERO 0xc0000094
105 | #define STATUS_INTEGER_OVERFLOW 0xc0000095
106 | #define STATUS_PRIVILEGED_INSTRUCTION 0xc0000096
107 | #define STATUS_STACK_OVERFLOW 0xc00000fd
108 | 
109 | // Calling convension
110 | #ifndef AVS_HAIKU
111 | #define __stdcall
112 | #define __cdecl
113 | #endif
114 | 
115 | // PowerPC OS X is really niche these days, but this painless equivocation
116 | // of the function/macro names used in posix_get_available_memory()
117 | // is all it takes to let it work.  The G5 was 64-bit, and if 10.5 Leopard
118 | // can run in native 64-bit, it probably uses the names in that block as-is.
119 | #ifdef AVS_MACOS
120 | #ifdef PPC32
121 | #define vm_statistics64_data_t vm_statistics_data_t
122 | #define HOST_VM_INFO64_COUNT HOST_VM_INFO_COUNT
123 | #define HOST_VM_INFO64 HOST_VM_INFO
124 | #define host_statistics64 host_statistics
125 | #endif // PPC32
126 | #endif // AVS_MACOS
127 | 
128 | #endif // AVSCORE_POSIX_H
129 | #endif // AVS_POSIX
130 | 


--------------------------------------------------------------------------------
/include/avisynth/avs/config.h:
--------------------------------------------------------------------------------
  1 | // Avisynth C Interface Version 0.20
  2 | // Copyright 2003 Kevin Atkinson
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // As a special exception, I give you permission to link to the
 20 | // Avisynth C interface with independent modules that communicate with
 21 | // the Avisynth C interface solely through the interfaces defined in
 22 | // avisynth_c.h, regardless of the license terms of these independent
 23 | // modules, and to copy and distribute the resulting combined work
 24 | // under terms of your choice, provided that every copy of the
 25 | // combined work is accompanied by a complete copy of the source code
 26 | // of the Avisynth C interface and Avisynth itself (with the version
 27 | // used to produce the combined work), being distributed under the
 28 | // terms of the GNU General Public License plus this exception.  An
 29 | // independent module is a module which is not derived from or based
 30 | // on Avisynth C Interface, such as 3rd-party filters, import and
 31 | // export plugins, or graphical user interfaces.
 32 | 
 33 | #ifndef AVS_CONFIG_H
 34 | #define AVS_CONFIG_H
 35 | 
 36 | // Undefine this to get cdecl calling convention
 37 | #define AVSC_USE_STDCALL 1
 38 | 
 39 | // NOTE TO PLUGIN AUTHORS:
 40 | // Because FRAME_ALIGN can be substantially higher than the alignment
 41 | // a plugin actually needs, plugins should not use FRAME_ALIGN to check for
 42 | // alignment. They should always request the exact alignment value they need.
 43 | // This is to make sure that plugins work over the widest range of AviSynth
 44 | // builds possible.
 45 | #define FRAME_ALIGN 64
 46 | 
 47 | #if   defined(_M_AMD64) || defined(__x86_64)
 48 | #   define X86_64
 49 | #elif defined(_M_IX86) || defined(__i386__)
 50 | #   define X86_32
 51 | // VS2017 introduced _M_ARM64
 52 | #elif defined(_M_ARM64) || defined(__aarch64__)
 53 | #   define ARM64
 54 | #elif defined(_M_ARM) || defined(__arm__)
 55 | #   define ARM32
 56 | #elif defined(__PPC64__)
 57 | #   define PPC64
 58 | #elif defined(_M_PPC) || defined(__PPC__) || defined(__POWERPC__)
 59 | #   define PPC32
 60 | #else
 61 | #   error Unsupported CPU architecture.
 62 | #endif
 63 | 
 64 | //            VC++  LLVM-Clang-cl   MinGW-Gnu
 65 | // MSVC        x          x
 66 | // MSVC_PURE   x
 67 | // CLANG                  x
 68 | // GCC                                  x
 69 | 
 70 | #if defined(__clang__)
 71 | // Check clang first. clang-cl also defines __MSC_VER
 72 | // We set MSVC because they are mostly compatible
 73 | #   define CLANG
 74 | #if defined(_MSC_VER)
 75 | #   define MSVC
 76 | #   define AVS_FORCEINLINE __attribute__((always_inline))
 77 | #else
 78 | #   define AVS_FORCEINLINE __attribute__((always_inline)) inline
 79 | #endif
 80 | #elif   defined(_MSC_VER)
 81 | #   define MSVC
 82 | #   define MSVC_PURE
 83 | #   define AVS_FORCEINLINE __forceinline
 84 | #elif defined(__GNUC__)
 85 | #   define GCC
 86 | #   define AVS_FORCEINLINE __attribute__((always_inline)) inline
 87 | #else
 88 | #   error Unsupported compiler.
 89 | #   define AVS_FORCEINLINE inline
 90 | #   undef __forceinline
 91 | #   define __forceinline inline
 92 | #endif
 93 | 
 94 | #if defined(_WIN32)
 95 | #   define AVS_WINDOWS
 96 | #elif defined(__linux__)
 97 | #   define AVS_LINUX
 98 | #   define AVS_POSIX
 99 | #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
100 | #   define AVS_BSD
101 | #   define AVS_POSIX
102 | #elif defined(__APPLE__)
103 | #   define AVS_MACOS
104 | #   define AVS_POSIX
105 | #elif defined(__HAIKU__)
106 | #   define AVS_HAIKU
107 | #   define AVS_POSIX
108 | #else
109 | #   error Operating system unsupported.
110 | #endif
111 | 
112 | // useful warnings disabler macros for supported compilers
113 | 
114 | #if defined(_MSC_VER)
115 | #define DISABLE_WARNING_PUSH           __pragma(warning( push ))
116 | #define DISABLE_WARNING_POP            __pragma(warning( pop ))
117 | #define DISABLE_WARNING(warningNumber) __pragma(warning( disable : warningNumber ))
118 | 
119 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE      DISABLE_WARNING(4101)
120 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION            DISABLE_WARNING(4505)
121 | // other warnings you want to deactivate...
122 | 
123 | #elif defined(__GNUC__) || defined(__clang__)
124 | #define DO_PRAGMA(X) _Pragma(#X)
125 | #define DISABLE_WARNING_PUSH           DO_PRAGMA(GCC diagnostic push)
126 | #define DISABLE_WARNING_POP            DO_PRAGMA(GCC diagnostic pop)
127 | #define DISABLE_WARNING(warningName)   DO_PRAGMA(GCC diagnostic ignored #warningName)
128 | 
129 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE      DISABLE_WARNING(-Wunused-variable)
130 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION            DISABLE_WARNING(-Wunused-function)
131 | // other warnings you want to deactivate...
132 | 
133 | #else
134 | #define DISABLE_WARNING_PUSH
135 | #define DISABLE_WARNING_POP
136 | #define DISABLE_WARNING_UNREFERENCED_LOCAL_VARIABLE
137 | #define DISABLE_WARNING_UNREFERENCED_FUNCTION
138 | // other warnings you want to deactivate...
139 | 
140 | #endif
141 | 
142 | #if defined(AVS_POSIX)
143 | #define NEW_AVSVALUE
144 | #else
145 | #define NEW_AVSVALUE
146 | #endif
147 | 
148 | #if defined(AVS_WINDOWS)
149 | // Windows XP does not have proper initialization for
150 | // thread local variables.
151 | // Use workaround instead __declspec(thread)
152 | #define XP_TLS
153 | #endif
154 | 
155 | #endif //AVS_CONFIG_H
156 | 


--------------------------------------------------------------------------------
/include/vapoursynth/VSHelper.h:
--------------------------------------------------------------------------------
  1 | /*****************************************************************************
  2 | * Copyright (c) 2012-2015 Fredrik Mellbin
  3 | * --- Legal stuff ---
  4 | * This program is free software. It comes without any warranty, to
  5 | * the extent permitted by applicable law. You can redistribute it
  6 | * and/or modify it under the terms of the Do What The Fuck You Want
  7 | * To Public License, Version 2, as published by Sam Hocevar. See
  8 | * http://sam.zoy.org/wtfpl/COPYING for more details.
  9 | *****************************************************************************/
 10 | 
 11 | #ifndef VSHELPER_H
 12 | #define VSHELPER_H
 13 | 
 14 | #include <limits.h>
 15 | #include <stdint.h>
 16 | #include <stdlib.h>
 17 | #include <string.h>
 18 | #include <assert.h>
 19 | #include <math.h>
 20 | #ifdef _WIN32
 21 | #include <malloc.h>
 22 | #endif
 23 | #include "VapourSynth.h"
 24 | 
 25 | /* Visual Studio doesn't recognize inline in c mode */
 26 | #if defined(_MSC_VER) && !defined(__cplusplus)
 27 | #define inline _inline
 28 | #endif
 29 | 
 30 | /* A kinda portable definition of the C99 restrict keyword (or its inofficial C++ equivalent) */
 31 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* Available in C99 */
 32 | #define VS_RESTRICT restrict
 33 | #elif defined(__cplusplus) || defined(_MSC_VER) /* Almost all relevant C++ compilers support it so just assume it works */
 34 | #define VS_RESTRICT __restrict
 35 | #else /* Not supported */
 36 | #define VS_RESTRICT
 37 | #endif
 38 | 
 39 | #ifdef _WIN32
 40 | #define VS_ALIGNED_MALLOC(pptr, size, alignment) do { *(pptr) = _aligned_malloc((size), (alignment)); } while (0)
 41 | #define VS_ALIGNED_FREE(ptr) do { _aligned_free((ptr)); } while (0)
 42 | #else
 43 | #define VS_ALIGNED_MALLOC(pptr, size, alignment) do { if(posix_memalign((void**)(pptr), (alignment), (size))) *((void**)pptr) = NULL; } while (0)
 44 | #define VS_ALIGNED_FREE(ptr) do { free((ptr)); } while (0)
 45 | #endif
 46 | 
 47 | #define VSMAX(a,b) ((a) > (b) ? (a) : (b))
 48 | #define VSMIN(a,b) ((a) > (b) ? (b) : (a))
 49 | 
 50 | #ifdef __cplusplus 
 51 | /* A nicer templated malloc for all the C++ users out there */
 52 | #if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900)
 53 | template<typename T=void>
 54 | #else
 55 | template<typename T>
 56 | #endif
 57 | static inline T* vs_aligned_malloc(size_t size, size_t alignment) {
 58 | #ifdef _WIN32
 59 |     return (T*)_aligned_malloc(size, alignment);
 60 | #else
 61 |     void *tmp = NULL;
 62 |     if (posix_memalign(&tmp, alignment, size))
 63 |         tmp = 0;
 64 |     return (T*)tmp;
 65 | #endif
 66 | }
 67 | 
 68 | static inline void vs_aligned_free(void *ptr) {
 69 |     VS_ALIGNED_FREE(ptr);
 70 | }
 71 | #endif /* __cplusplus */
 72 | 
 73 | /* convenience function for checking if the format never changes between frames */
 74 | static inline int isConstantFormat(const VSVideoInfo *vi) {
 75 |     return vi->height > 0 && vi->width > 0 && vi->format;
 76 | }
 77 | 
 78 | /* convenience function to check for if two clips have the same format (unknown/changeable will be considered the same too) */
 79 | static inline int isSameFormat(const VSVideoInfo *v1, const VSVideoInfo *v2) {
 80 |     return v1->height == v2->height && v1->width == v2->width && v1->format == v2->format;
 81 | }
 82 | 
 83 | /* multiplies and divides a rational number, such as a frame duration, in place and reduces the result */
 84 | static inline void muldivRational(int64_t *num, int64_t *den, int64_t mul, int64_t div) {
 85 |     /* do nothing if the rational number is invalid */
 86 |     if (!*den)
 87 |         return;
 88 | 
 89 |     /* nobody wants to accidentally divide by zero */
 90 |     assert(div);
 91 | 
 92 |     int64_t a, b;
 93 |     *num *= mul;
 94 |     *den *= div;
 95 |     a = *num;
 96 |     b = *den;
 97 |     while (b != 0) {
 98 |         int64_t t = a;
 99 |         a = b;
100 |         b = t % b;
101 |     }
102 |     if (a < 0)
103 |         a = -a;
104 |     *num /= a;
105 |     *den /= a;
106 | }
107 | 
108 | /* reduces a rational number */
109 | static inline void vs_normalizeRational(int64_t *num, int64_t *den) {
110 |     muldivRational(num, den, 1, 1);
111 | }
112 | 
113 | /* add two rational numbers and reduces the result */
114 | static inline void vs_addRational(int64_t *num, int64_t *den, int64_t addnum, int64_t addden) {
115 |     /* do nothing if the rational number is invalid */
116 |     if (!*den)
117 |         return;
118 | 
119 |     /* nobody wants to accidentally add an invalid rational number */
120 |     assert(addden);
121 | 
122 |     if (*den == addden) {
123 |         *num += addnum;
124 |     } else {
125 |         int64_t temp = addden;
126 |         addnum *= *den;
127 |         addden *= *den;
128 |         *num *= temp;
129 |         *den *= temp;
130 | 
131 |         *num += addnum;
132 | 
133 |         vs_normalizeRational(num, den);
134 |     }
135 | }
136 | 
137 | /* converts an int64 to int with saturation, useful to silence warnings when reading int properties among other things */
138 | static inline int int64ToIntS(int64_t i) {
139 |     if (i > INT_MAX)
140 |         return INT_MAX;
141 |     else if (i < INT_MIN)
142 |         return INT_MIN;
143 |     else return (int)i;
144 | }
145 | 
146 | static inline void vs_bitblt(void *dstp, int dst_stride, const void *srcp, int src_stride, size_t row_size, size_t height) {
147 |     if (height) {
148 |         if (src_stride == dst_stride && src_stride == (int)row_size) {
149 |             memcpy(dstp, srcp, row_size * height);
150 |         } else {
151 |             const uint8_t *srcp8 = (const uint8_t *)srcp;
152 |             uint8_t *dstp8 = (uint8_t *)dstp;
153 |             size_t i;
154 |             for (i = 0; i < height; i++) {
155 |                 memcpy(dstp8, srcp8, row_size);
156 |                 srcp8 += src_stride;
157 |                 dstp8 += dst_stride;
158 |             }
159 |         }
160 |     }
161 | }
162 | 
163 | /* check if the frame dimensions are valid for a given format */
164 | /* returns non-zero for valid width and height */
165 | static inline int areValidDimensions(const VSFormat *fi, int width, int height) {
166 |     return !(width % (1 << fi->subSamplingW) || height % (1 << fi->subSamplingH));
167 | }
168 | 
169 | /* Visual Studio doesn't recognize inline in c mode */
170 | #if defined(_MSC_VER) && !defined(__cplusplus)
171 | #undef inline
172 | #endif
173 | 
174 | #endif
175 | 


--------------------------------------------------------------------------------
/include/dualsynth/avs_wrapper.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2020 Xinyue Lu
  3 |  *
  4 |  * DualSynth wrapper - AviSynth+.
  5 |  *
  6 |  */
  7 | 
  8 | #pragma once
  9 | 
 10 | namespace Plugin {
 11 |   extern const char* Description;
 12 | }
 13 | 
 14 | namespace AVSInterface
 15 | {
 16 |   struct AVSInDelegator final : InDelegator {
 17 |     const AVSValue _args;
 18 |     std::unordered_map<std::string, int> _params_index_map;
 19 |     int NameToIndex(const char* name) {
 20 |       std::string name_string(name);
 21 |       if (_params_index_map.find(name_string) == _params_index_map.end())
 22 |         throw "Unknown parameter during NameToIndex";
 23 |       return _params_index_map[name_string];
 24 |     }
 25 |     void Read(const char* name, int& output) override {
 26 |       output = _args[NameToIndex(name)].AsInt(output);
 27 |     }
 28 |     void Read(const char* name, int64_t& output) override {
 29 |       output = _args[NameToIndex(name)].AsInt(static_cast<int>(output));
 30 |     }
 31 |     void Read(const char* name, float& output) override {
 32 |       output = static_cast<float>(_args[NameToIndex(name)].AsFloat(output));
 33 |     }
 34 |     void Read(const char* name, double& output) override {
 35 |       auto _default = output;
 36 |       output = _args[NameToIndex(name)].AsFloat(NAN);
 37 |       if (std::isnan(output))
 38 |         output = _default;
 39 |     }
 40 |     void Read(const char* name, bool& output) override {
 41 |       output = _args[NameToIndex(name)].AsBool(output);
 42 |     }
 43 |     void Read(const char* name, std::string& output) override {
 44 |       const char * result = _args[NameToIndex(name)].AsString(output.c_str());
 45 |       if (result)
 46 |         output = result;
 47 |     }
 48 |     void Read(const char* name, void*& output) override {
 49 |       PClip* clip = new PClip(_args[NameToIndex(name)].AsClip());
 50 |       output = (void *)(clip);
 51 |     }
 52 |     void Read(const char* name, std::vector<int>& output) override {
 53 |       auto arg = _args[NameToIndex(name)];
 54 |       if (!arg.IsArray())
 55 |         throw "Argument is not array";
 56 |       auto size = arg.ArraySize();
 57 |       output.clear();
 58 |       for (int i = 0; i < size; i++)
 59 |         output.push_back(arg[i].AsInt());
 60 |     }
 61 |     void Read(const char* name, std::vector<int64_t>& output) override {
 62 |       auto arg = _args[NameToIndex(name)];
 63 |       if (!arg.IsArray())
 64 |         throw "Argument is not array";
 65 |       auto size = arg.ArraySize();
 66 |       output.clear();
 67 |       for (int i = 0; i < size; i++)
 68 |         output.push_back(arg[i].AsInt());
 69 |     }
 70 |     void Read(const char* name, std::vector<float>& output) override {
 71 |       auto arg = _args[NameToIndex(name)];
 72 |       if (!arg.IsArray())
 73 |         throw "Argument is not array";
 74 |       auto size = arg.ArraySize();
 75 |       output.clear();
 76 |       for (int i = 0; i < size; i++)
 77 |         output.push_back(static_cast<float>(arg[i].AsFloat()));
 78 |     }
 79 |     void Read(const char* name, std::vector<double>& output) override {
 80 |       auto arg = _args[NameToIndex(name)];
 81 |       if (!arg.IsArray())
 82 |         throw "Argument is not array";
 83 |       auto size = arg.ArraySize();
 84 |       output.clear();
 85 |       for (int i = 0; i < size; i++)
 86 |         output.push_back(arg[i].AsFloat());
 87 |     }
 88 |     void Read(const char* name, std::vector<bool>& output) override {
 89 |       auto arg = _args[NameToIndex(name)];
 90 |       if (!arg.IsArray())
 91 |         throw "Argument is not array";
 92 |       auto size = arg.ArraySize();
 93 |       output.clear();
 94 |       for (int i = 0; i < size; i++)
 95 |         output.push_back(arg[i].AsBool());
 96 |     }
 97 |     void Free(void*& clip) override {
 98 |       PClip* c = (PClip *)(clip);
 99 |       delete c;
100 |       clip = nullptr;
101 |     }
102 | 
103 |     AVSInDelegator(const AVSValue args, std::vector<Param> params) : _args(args)
104 |     {
105 |       int idx = 0;
106 |       for (auto &&param : params)
107 |       {
108 |         if (!param.AVSEnabled) continue;
109 |         _params_index_map[param.Name] = idx++;
110 |       }
111 |     }
112 |   };
113 | 
114 |   struct AVSFetchFrameFunctor final : FetchFrameFunctor {
115 |     PClip _clip;
116 |     VideoInfo _vi;
117 |     IScriptEnvironment* _env;
118 |     std::mutex fetch_frame_mutex;
119 |     AVSFetchFrameFunctor(PClip clip, VideoInfo vi, IScriptEnvironment * env)
120 |       : _clip(clip), _vi(vi), _env(env) {}
121 |     DSFrame operator()(int n) override {
122 |       std::lock_guard<std::mutex> guard(fetch_frame_mutex);
123 |       auto frame = _clip->GetFrame(n, _env);
124 |       return DSFrame(frame, _vi, _env);
125 |     }
126 |     ~AVSFetchFrameFunctor() override {}
127 |   };
128 | 
129 |   template<typename FilterType>
130 |   struct AVSWrapper : IClip
131 |   {
132 |     AVSValue _args;
133 |     IScriptEnvironment* _env;
134 |     FilterType data;
135 |     PClip clip;
136 |     VideoInfo vi;
137 |     AVSFetchFrameFunctor* functor {nullptr};
138 |     
139 |     AVSWrapper(AVSValue args, IScriptEnvironment* env)
140 |       : _args(args), _env(env) {}
141 |     
142 |     void Initialize()
143 |     {
144 |       auto input_vi = DSVideoInfo();
145 |       if (_args[0].IsClip()) {
146 |         clip = _args[0].AsClip();
147 |         input_vi = DSVideoInfo(clip->GetVideoInfo());
148 |         functor = new AVSFetchFrameFunctor(clip, clip->GetVideoInfo(), _env);
149 |       }
150 |       auto argument = AVSInDelegator(_args, data.Params());
151 |       data.Initialize(&argument, input_vi, functor);
152 |     }
153 | 
154 |     PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment * env) override {
155 |       std::unordered_map<int, DSFrame> in_frames;
156 |       if (functor) {
157 |         std::vector<int> requests = data.RequestReferenceFrames(n);
158 |         for (auto &&i : requests) {
159 |           auto frame = clip->GetFrame(i, env);
160 |           in_frames[i] = DSFrame(frame, vi, env);
161 |         }
162 |       }
163 |       else
164 |         in_frames[n] = DSFrame(env);
165 |       
166 |       return data.GetFrame(n, in_frames).ToAVSFrame();
167 |     }
168 | 
169 |     const VideoInfo& __stdcall GetVideoInfo() override {
170 |       auto output_vi = data.GetOutputVI();
171 |       vi = output_vi.ToAVSVI();
172 |       return vi;
173 |     }
174 | 
175 |     void __stdcall GetAudio(void* buf, int64_t start, int64_t count, IScriptEnvironment* env) override { if (clip) clip->GetAudio(buf, start, count, env); }
176 |     bool __stdcall GetParity(int n) override { return clip ? clip->GetParity(n) : false; }
177 |     int __stdcall SetCacheHints(int cachehints, int frame_range) override { return data.SetCacheHints(cachehints, frame_range); }
178 |     ~AVSWrapper() {
179 |       delete functor;
180 |     }
181 |   };
182 | 
183 |   template<typename FilterType>
184 |   AVSValue __cdecl Create(AVSValue args, void* user_data, IScriptEnvironment* env)
185 |   {
186 |     auto filter = new AVSWrapper<FilterType>(args, env);
187 |     try {
188 |       filter->Initialize();
189 |     }
190 |     catch (const char *err) {
191 |       env->ThrowError("%s: %s", filter->data.AVSName(), err);
192 |     }
193 |     return filter;
194 |   }
195 | 
196 |   template<typename FilterType>
197 |   void RegisterFilter(IScriptEnvironment* env) {
198 |     FilterType filter;
199 |     env->AddFunction(filter.AVSName(), filter.AVSParams().c_str(), Create<FilterType>, nullptr);
200 |   }
201 | }
202 | 
203 | const AVS_Linkage *AVS_linkage = NULL;
204 | 
205 | extern "C" __declspec(dllexport) const char* __stdcall AvisynthPluginInit3(IScriptEnvironment* env, AVS_Linkage* linkage)
206 | {
207 |   AVS_linkage = linkage;
208 |   auto filters = RegisterAVSFilters();
209 |   for (auto &&RegisterFilter : filters) {
210 |     RegisterFilter(env);
211 |   }
212 |   return Plugin::Description;
213 | }
214 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Neo f3kdb (forked from flash3kyuu_deband)
  2 | 
  3 | Neo f3kdb Copyright(C) 2019-2020 Xinyue Lu, and previous developers
  4 | 
  5 | F3kdb is a deband filter. It was originally written for AviUtl by [bunyuchan](https://twitter.com/bunyuchan) and later ported to AviSynth by [SAPikachu](https://github.com/SAPikachu) many years ago.
  6 | 
  7 | Legacy format support was removed and a few options that are no longer useful were also removed. Due to API change, the project has been renamed from f3kdb to Neo_f3kdb to avoid confusion. SSE4.1 is now required to run optimized routine. SSE4.1 is supported since Intel Penryn (2007) and AMD bulldozer (2011). AVX routine didn't show much performance benefit and is not included.
  8 | 
  9 | ## Usage
 10 | 
 11 | ```python
 12 | # AviSynth+
 13 | LoadPlugin("neo-f3kdb.dll")
 14 | neo_f3kdb(clip, y=64, cb=64, cr=64, grainy=0, grainc=0, ...)
 15 | # VapourSynth
 16 | core.neo_f3kdb.Deband(clip, y=64, cb=64, cr=64, grainy=0, grainc=0, ...)
 17 | ```
 18 | 
 19 | [Check original usage documents.](https://f3kdb.readthedocs.io/en/stable/usage.html)
 20 | 
 21 | Below are the additional parameters or the these that differ from the original ones.
 22 | 
 23 | - *range* (>r9)
 24 | 
 25 |     Banding detection range.
 26 | 
 27 |     Must be between `0` to `255`.
 28 | 
 29 |     Default value - `15`.
 30 | 
 31 | - *sample_mode*
 32 | 
 33 |     * 1: Column references.
 34 | 
 35 |             +
 36 |             o
 37 |             +
 38 | 
 39 |     * 2: Square references.
 40 | 
 41 |             + +
 42 |              o
 43 |             + +
 44 | 
 45 |     * 3: Row references. (> r2)
 46 | 
 47 |             + o +
 48 | 
 49 |     * 4: Average of sample mode 1 and 3. (> r2)
 50 | 
 51 |              +
 52 |             (o) => A
 53 |              +
 54 | 
 55 |             + (o) + => B
 56 | 
 57 |             (A + B) / 2
 58 | 
 59 |     * 5: (Integer-based) Similar to sample mode 4 but uses multiple thresholds for detail preservation. (>r8)<br>
 60 |         Optimized for speed version of https://forum.doom9.org/showthread.php?p=1652256#post1652256.<br>
 61 |         `blur_first` doesn't have effect for this sample mode.<br>
 62 |         `Y`/`Cb`/`Cr` - for this mode they are used for the `avgDif` check – the difference between the current pixel and the average of all four cross-shaped reference pixels.
 63 | 
 64 |     * 6: (Floating-point) Similar to sample mode 4 but uses multiple thresholds for detail preservation. (>r9)<br>
 65 |         Direct implementation of https://forum.doom9.org/showthread.php?p=1652256#post1652256.<br>
 66 |         `blur_first` doesn't have effect for this sample mode.<br>
 67 |         `Y`/`Cb`/`Cr` - for this mode they are used for the `avgDif` check – the difference between the current pixel and the average of all four cross-shaped reference pixels.
 68 | 
 69 |     * 7: (Floating-point) An extension of sample_mode=6 that adds a gradient angle check for more intelligent detail preservation. (>r9)<br>
 70 |         Direct implementation of https://forum.doom9.org/showthread.php?p=1652256#post1652256.<br>
 71 |         `blur_first` doesn't have effect for this sample mode.<br>
 72 |         `Y`/`Cb`/`Cr` - for this mode they are used for the `avgDif` check – the difference between the current pixel and the average of all four cross-shaped reference pixels.
 73 | 
 74 |     Reference points are randomly picked within the `range`.
 75 | 
 76 | - *input_depth* (removed)
 77 | 
 78 | - *input_mode* (removed)
 79 | 
 80 | - *output_mode* (removed)
 81 | 
 82 | - *opt*
 83 | 
 84 |     Sets which cpu optimizations to use.
 85 | 
 86 |     `sample_mode=1`, `sample_mode=2`, `sample_mode=3`, and `sample_mode=4` have `C++` and `SSE4.1` code.
 87 | 
 88 |     `sample_mode=5`, `sample_mode=6` and `sample_mode=7` have `C++`, `SSE4.1`, `AVX2` and `AVX-512` code.
 89 | 
 90 |     - `-1`: Auto-detect.
 91 |     - `0`: Use C++ code.
 92 |     - `1`: Use SSE4.1 code.
 93 |     - `2`: Use AVX2 code.
 94 |     - `3`: Use AVX-512 code.
 95 | 
 96 |     Default: `-1`.
 97 | 
 98 | - *mt*
 99 | 
100 |     Process planes in parallel. Default: true.
101 | 
102 |     If you notice a dead lock under extreme condition, try disabling it.
103 | 
104 | - *scale* (> r8)
105 | 
106 |     Whether to use threshold parameters (Y, Cb, Cr...) within the internal bit depth range (0..65535).
107 | 
108 |     Default: `false`.
109 | 
110 | - *Y_1 / Cb_1 / Cr_1 (maxDif)* (> r8)
111 | 
112 |     Detail protection threshold (max difference) for `sample_mode=5`, `sample_mode=6` and `sample_mode=7`.
113 | 
114 |     This threshold applies to the `maxDif` check. `maxDif` is the largest absolute difference found between the current pixel and any of its four individual cross-shaped reference pixels. If this `maxDif` is greater than or equal to `Y_1`/`Cb_1`/`Cr_1`, the pixel is considered detail.
115 | 
116 |     Helps protect sharp edges and fine details from being blurred by the debanding process.
117 | 
118 |     The valid range is same as `Y`/`Cb`/`Cr`.
119 | 
120 |     Default value - they are equal to `Y`/`Cb`/`Cr`.
121 | 
122 | - *Y_2 / Cb_2 / Cr_2 (midDifs)* (> r8)
123 | 
124 |     Gradient/Texture protection threshold (mid-pair difference) for `sample_mode=5`, `sample_mode=6` and `sample_mode=7`.
125 | 
126 |     This threshold applies to the `midDif` checks. `midDif` measures how much the current pixel deviates from the midpoint of a pair of opposing reference pixels (one check for the vertical pair, one for the horizontal pair). If the current pixel is far from this midpoint (i.e., `midDif` is greater than or equal to `Y_2` / `Cb_2` / `Cr_2`), it might indicate a texture.
127 | 
128 |     This helps distinguish true banding in gradients from textured areas or complex details.
129 | 
130 |     The valid range is same as `Y`/`Cb`/`Cr`.
131 | 
132 |     Default value - they are equal to `Y`/`Cb`/`Cr`.
133 | 
134 | - *angle_boost* (>r9)
135 | 
136 |     A multiplier used in `sample_mode=7` to increase the debanding strength on consistent gradients.
137 | 
138 |     When the gradient angle check passes, the `Y`/`Cb`/`Cr`, `Y_1`/`Cb_1`/`Cr_1`, and `Y_2`/`Cb_2`/`Cr_2` thresholds are multiplied by this factor.
139 | 
140 |     A value greater than `1.0` boosts the strength. A value of `1.0` has no effect.
141 | 
142 |     Must be a positive number.
143 | 
144 |     Default value - `1.5`.
145 | 
146 | - *max_angle* (>r9)
147 | 
148 |     The threshold for the gradient angle check in `sample_mode=7`.
149 | 
150 |     It represents the maximum allowed difference between the gradient angle of the center pixel and its reference pixels for the `angle_boost` to be applied. The gradient angle is normalized to a `[0.0, 1.0]` range.
151 | 
152 |     A smaller value is stricter and requires a more consistent gradient. A larger value is more lenient.
153 | 
154 |     The valid range is `0.0` to `1.0`.
155 | 
156 |     Default value - `0.15`.
157 | 
158 | ## Compilation
159 | 
160 | ```cmd
161 | cmake -B build\x86 -S . -DCMAKE_GENERATOR_PLATFORM=Win32 -D_DIR=x86
162 | cmake -B build\x64 -S . -DCMAKE_GENERATOR_PLATFORM=x64 -D_DIR=x64
163 | cmake --build build\x86 --config Release
164 | cmake --build build\x64 --config Release
165 | ```
166 | 
167 | ## Compilation (GCC, Windows)
168 | 
169 | ```bash
170 | cmake -B build/gcc -S . -G "MSYS Makefiles" -D_DIR=gcc
171 | cmake --build build/gcc
172 | ```
173 | 
174 | ## Compilation (GCC, Unix-like)
175 | 
176 | ```bash
177 | cmake -B build/gcc -S . -G "Unix Makefiles" -D_DIR=gcc
178 | cmake --build build/gcc
179 | ```
180 | 
181 | ## License
182 | 
183 |     This program is free software: you can redistribute it and/or modify
184 |     it under the terms of the GNU General Public License as published by
185 |     the Free Software Foundation, either version 3 of the License, or
186 |     (at your option) any later version.
187 | 
188 |     This program is distributed in the hope that it will be useful,
189 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
190 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
191 |     GNU General Public License for more details.
192 | 
193 |     You should have received a copy of the GNU General Public License
194 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
195 | 


--------------------------------------------------------------------------------
/include/dualsynth/vs_wrapper.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2020 Xinyue Lu
  3 |  *
  4 |  * DualSynth wrapper - VapourSynth.
  5 |  *
  6 |  */
  7 | 
  8 | #pragma once
  9 | 
 10 | namespace Plugin {
 11 |   extern const char* Identifier;
 12 |   extern const char* Namespace;
 13 |   extern const char* Description;
 14 | }
 15 | 
 16 | namespace VSInterface {
 17 |   const VSAPI * API;
 18 | 
 19 |   struct VSInDelegator final : InDelegator {
 20 |     const VSMap *_in;
 21 |     const VSAPI *_vsapi;
 22 |     int _err;
 23 |     void Read(const char* name, int& output) override {
 24 |       auto _default = output;
 25 |       output = static_cast<int>(_vsapi->propGetInt(_in, name, 0, &_err));
 26 |       if (_err) output = _default;
 27 |     }
 28 |     void Read(const char* name, int64_t& output) override {
 29 |       auto _default = output;
 30 |       output = _vsapi->propGetInt(_in, name, 0, &_err);
 31 |       if (_err) output = _default;
 32 |     }
 33 |     void Read(const char* name, float& output) override {
 34 |       auto _default = output;
 35 |       output = static_cast<float>(_vsapi->propGetFloat(_in, name, 0, &_err));
 36 |       if (_err) output = _default;
 37 |     }
 38 |     void Read(const char* name, double& output) override {
 39 |       auto _default = output;
 40 |       output = _vsapi->propGetFloat(_in, name, 0, &_err);
 41 |       if (_err) output = _default;
 42 |     }
 43 |     void Read(const char* name, bool& output) override {
 44 |       auto output_int = _vsapi->propGetInt(_in, name, 0, &_err);
 45 |       if (!_err) output = output_int != 0;
 46 |     }
 47 |     void Read(const char* name, std::string& output) override {
 48 |       auto output_str = _vsapi->propGetData(_in, name, 0, &_err);
 49 |       if (!_err) output = output_str;
 50 |     }
 51 |     void Read(const char* name, std::vector<int>& output) override {
 52 |       auto size = _vsapi->propNumElements(_in, name);
 53 |       if (size < 0) return;
 54 |       output.clear();
 55 |       for (int i = 0; i < size; i++)
 56 |         output.push_back(static_cast<int>(_vsapi->propGetInt(_in, name, i, &_err)));
 57 |     }
 58 |     void Read(const char* name, std::vector<int64_t>& output) override {
 59 |       auto size = _vsapi->propNumElements(_in, name);
 60 |       if (size < 0) return;
 61 |       output.clear();
 62 |       for (int i = 0; i < size; i++)
 63 |         output.push_back(_vsapi->propGetInt(_in, name, i, &_err));
 64 |     }
 65 |     void Read(const char* name, std::vector<float>& output) override {
 66 |       auto size = _vsapi->propNumElements(_in, name);
 67 |       if (size < 0) return;
 68 |       output.clear();
 69 |       for (int i = 0; i < size; i++)
 70 |         output.push_back(static_cast<float>(_vsapi->propGetFloat(_in, name, i, &_err)));
 71 |     }
 72 |     void Read(const char* name, std::vector<double>& output) override {
 73 |       auto size = _vsapi->propNumElements(_in, name);
 74 |       if (size < 0) return;
 75 |       output.clear();
 76 |       for (int i = 0; i < size; i++)
 77 |         output.push_back(_vsapi->propGetFloat(_in, name, i, &_err));
 78 |     }
 79 |     void Read(const char* name, std::vector<bool>& output) override {
 80 |       auto size = _vsapi->propNumElements(_in, name);
 81 |       if (size < 0) return;
 82 |       output.clear();
 83 |       for (int i = 0; i < size; i++)
 84 |         output.push_back(_vsapi->propGetInt(_in, name, i, &_err));
 85 |     }
 86 |     void Read(const char* name, void*& output) override {
 87 |       output = reinterpret_cast<void *>(_vsapi->propGetNode(_in, name, 0, &_err));
 88 |     }
 89 |     void Free(void*& clip) override {
 90 |       _vsapi->freeNode(reinterpret_cast<VSNodeRef *>(clip));
 91 |       clip = nullptr;
 92 |     }
 93 |     VSInDelegator(const VSMap *in, const VSAPI *vsapi) : _in(in), _vsapi(vsapi) {}
 94 |   };
 95 | 
 96 |   struct VSFetchFrameFunctor final : FetchFrameFunctor {
 97 |     VSNodeRef *_vs_clip;
 98 |     VSCore *_core;
 99 |     const VSAPI *_vsapi;
100 |     VSFrameContext *_frameCtx;
101 |     VSFetchFrameFunctor(VSNodeRef *clip, VSCore *core, const VSAPI *vsapi)
102 |       : _vs_clip(clip), _core(core), _vsapi(vsapi) {}
103 |     DSFrame operator()(int n) override {
104 |       return DSFrame(_vsapi->getFrameFilter(n, _vs_clip, _frameCtx), _core, _vsapi);
105 |     }
106 |     ~VSFetchFrameFunctor() override {
107 |       _vsapi->freeNode(_vs_clip);
108 |     }
109 |   };
110 | 
111 |   template<typename FilterType>
112 |   void VS_CC Initialize(VSMap *in, VSMap *out, void **instanceData, VSNode *node, VSCore *core, const VSAPI *vsapi) {
113 |     auto Data = reinterpret_cast<FilterType*>(*instanceData);
114 |     auto output_vi = Data->GetOutputVI();
115 |     vsapi->setVideoInfo(output_vi.ToVSVI(core, vsapi), 1, node);
116 |   }
117 | 
118 |   template<typename FilterType>
119 |   void VS_CC Delete(void *instanceData, VSCore *core, const VSAPI *vsapi) {
120 |     auto filter = reinterpret_cast<FilterType*>(instanceData);
121 |     auto functor = reinterpret_cast<VSFetchFrameFunctor*>(filter->fetch_frame);
122 |     delete functor;
123 |     delete filter;
124 |   }
125 | 
126 |   template<typename FilterType>
127 |   const VSFrameRef* VS_CC GetFrame(int n, int activationReason, void **instanceData, void **frameData, VSFrameContext *frameCtx, VSCore *core, const VSAPI *vsapi) {
128 |     auto filter = reinterpret_cast<FilterType*>(*instanceData);
129 |     auto functor = reinterpret_cast<VSFetchFrameFunctor*>(filter->fetch_frame);
130 |     if (functor)
131 |       functor->_frameCtx = frameCtx;
132 | 
133 |     std::vector<int> ref_frames;
134 |     if (activationReason == VSActivationReason::arInitial) {
135 |       if (functor) {
136 |         ref_frames = filter->RequestReferenceFrames(n);
137 |         for (auto &&i : ref_frames)
138 |           vsapi->requestFrameFilter(i, functor->_vs_clip, frameCtx);
139 |       }
140 |       else {
141 |         std::unordered_map<int, DSFrame> in_frames;
142 |         in_frames[n] = DSFrame(core, vsapi);
143 |         auto vs_frame = (filter->GetFrame(n, in_frames).ToVSFrame());
144 |         return vs_frame;
145 |       }
146 |     }
147 |     else if (activationReason == VSActivationReason::arAllFramesReady) {
148 |       std::unordered_map<int, DSFrame> in_frames;
149 |       if (functor) {
150 |         ref_frames = filter->RequestReferenceFrames(n);
151 |         for (auto &&i : ref_frames)
152 |           in_frames[i] = DSFrame(vsapi->getFrameFilter(i, functor->_vs_clip, frameCtx), core, vsapi);
153 |       }
154 |       else
155 |         in_frames[n] = DSFrame(core, vsapi);
156 | 
157 |       auto vs_frame = (filter->GetFrame(n, in_frames).ToVSFrame());
158 |       return vs_frame;
159 |     }
160 |     return nullptr;
161 |   }
162 | 
163 |   template<typename FilterType>
164 |   void VS_CC Create(const VSMap *in, VSMap *out, void *userData, VSCore *core, const VSAPI *vsapi) {
165 |     auto filter = new FilterType{};
166 |     auto argument = VSInDelegator(in, vsapi);
167 |     try {
168 |       void* clip = nullptr;
169 |       VSFetchFrameFunctor* functor = nullptr;
170 |       DSVideoInfo input_vi;
171 |       try {
172 |         argument.Read("clip", clip);
173 |         if (clip) {
174 |           auto vs_clip = reinterpret_cast<VSNodeRef*>(clip);
175 |           functor = new VSFetchFrameFunctor(vs_clip, core, vsapi);
176 |           input_vi = DSVideoInfo(vsapi->getVideoInfo(vs_clip));
177 |         }
178 |       }
179 |       catch(const char *) { /* No clip, source filter */ }
180 |       filter->Initialize(&argument, input_vi, functor);
181 |       vsapi->createFilter(in, out, filter->VSName(), Initialize<FilterType>, GetFrame<FilterType>, Delete<FilterType>, filter->VSMode(), 0, filter, core);
182 |     }
183 |     catch(const char *err){
184 |       char msg_buff[256];
185 |       snprintf(msg_buff, 256, "%s: %s", filter->VSName(), err);
186 |       vsapi->setError(out, msg_buff);
187 |       delete filter;
188 |     }
189 |   }
190 | 
191 |   template<typename FilterType>
192 |   void RegisterFilter(VSRegisterFunction registerFunc, VSPlugin* vsplugin) {
193 |     FilterType filter;
194 |     registerFunc(filter.VSName(), filter.VSParams().c_str(), Create<FilterType>, nullptr, vsplugin);
195 |   }
196 | 
197 |   void RegisterPlugin(VSConfigPlugin configFunc, VSPlugin* vsplugin) {
198 |     configFunc(Plugin::Identifier, Plugin::Namespace, Plugin::Description, VAPOURSYNTH_API_VERSION, 1, vsplugin);
199 |   }
200 | }
201 | 
202 | VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin* vsplugin) {
203 |   VSInterface::RegisterPlugin(configFunc, vsplugin);
204 |   auto filters = RegisterVSFilters();
205 |   for (auto &&RegisterFilter : filters) {
206 |     RegisterFilter(registerFunc, vsplugin);
207 |   }
208 | }
209 | 


--------------------------------------------------------------------------------
/include/dualsynth/ds_frame.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2020 Xinyue Lu
  3 |  *
  4 |  * DualSynth wrapper - DSFrame.
  5 |  *
  6 |  */
  7 | 
  8 | #pragma once
  9 | 
 10 | struct DSFrame
 11 | {
 12 |   int FrameWidth {0}, FrameHeight {0};
 13 | 
 14 |   const unsigned char ** SrcPointers {nullptr};
 15 |   int * StrideBytes {nullptr};
 16 |   unsigned char ** DstPointers {nullptr};
 17 |   DSFormat Format;
 18 | 
 19 |   // VapourSynth Interface
 20 |   const VSFrameRef* _vssrc {nullptr};
 21 |   VSFrameRef* _vsdst {nullptr};
 22 |   const VSCore* _vscore {nullptr};
 23 |   const VSAPI* _vsapi {nullptr};
 24 |   const VSFormat* _vsformat {nullptr};
 25 | 
 26 |   // AviSynth+ Interface
 27 |   PVideoFrame _avssrc;
 28 |   VideoInfo _vi;
 29 |   IScriptEnvironment * _env {nullptr};
 30 |   int planes_y[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A };
 31 |   int planes_r[4] = { PLANAR_R, PLANAR_G, PLANAR_B, PLANAR_A };
 32 |   int *planes {0};
 33 | 
 34 |   DSFrame() {}
 35 |   DSFrame(const VSCore* vscore, const VSAPI* vsapi)
 36 |     : _vscore(vscore), _vsapi(vsapi) {}
 37 |   DSFrame(const VSFrameRef* src, const VSCore* vscore, const VSAPI* vsapi)
 38 |     : _vssrc(src), _vscore(vscore), _vsapi(vsapi)
 39 |     , _vsformat(src ? _vsapi->getFrameFormat(src) : nullptr)
 40 |   {
 41 |     if (_vssrc) {
 42 |       Format = DSFormat(_vsformat);
 43 |       FrameWidth = _vsapi->getFrameWidth(src, 0);
 44 |       FrameHeight = _vsapi->getFrameHeight(src, 0);
 45 | 
 46 |       SrcPointers = new const unsigned char*[Format.Planes];
 47 |       StrideBytes = new int[Format.Planes];
 48 |       for (int i = 0; i < Format.Planes; i++) {
 49 |         SrcPointers[i] = _vsapi->getReadPtr(src, i);
 50 |         StrideBytes[i] = _vsapi->getStride(src, i);
 51 |       }
 52 |     }
 53 |   }
 54 | 
 55 |   DSFrame(IScriptEnvironment * env)
 56 |     : _env(env) {}
 57 |   DSFrame(PVideoFrame &src, VideoInfo vi, IScriptEnvironment * env)
 58 |     : _avssrc(src), _vi(vi), _env(env)
 59 |   {
 60 |     if (_avssrc) {
 61 |       Format = DSFormat(_vi.pixel_type);
 62 |       planes = Format.IsFamilyYUV ? planes_y : planes_r;
 63 |       FrameWidth = _vi.width;
 64 |       FrameHeight = _vi.height;
 65 | 
 66 |       SrcPointers = new const unsigned char*[Format.Planes];
 67 |       StrideBytes = new int[Format.Planes];
 68 |       for (int i = 0; i < Format.Planes; i++) {
 69 |         SrcPointers[i] = src->GetReadPtr(planes[i]);
 70 |         StrideBytes[i] = src->GetPitch(planes[i]);
 71 |       }
 72 |     }
 73 |   }
 74 | 
 75 |   DSFrame Create() { return Create(false, false); }
 76 |   DSFrame Create(bool copy) { return Create(copy, false); }
 77 |   DSFrame Create(bool copy, bool inplace)
 78 |   {
 79 |     if (_vssrc) {
 80 |       // Create a new VS frame
 81 |       const VSFrameRef* copy_frames[1] {ToVSFrame()};
 82 |       int copy_planes[4] = {0};
 83 |       auto vsframe = copy ?
 84 |         _vsapi->newVideoFrame2(_vsformat, FrameWidth, FrameHeight, copy_frames, copy_planes, _vssrc, const_cast<VSCore*>(_vscore)) :
 85 |         _vsapi->newVideoFrame(_vsformat, FrameWidth, FrameHeight, _vssrc, const_cast<VSCore*>(_vscore));
 86 |       _vsapi->freeFrame(copy_frames[0]);
 87 | 
 88 |       DSFrame new_frame(vsframe, _vscore, _vsapi);
 89 |       new_frame._vsdst = vsframe;
 90 |       new_frame.DstPointers = new unsigned char*[Format.Planes];
 91 |       for (int i = 0; i < Format.Planes; i++)
 92 |         new_frame.DstPointers[i] = _vsapi->getWritePtr(vsframe, i);
 93 |       return new_frame;
 94 |     }
 95 |     else if(_avssrc) {
 96 |       // Create a new AVS frame
 97 |       return Create(_vi);
 98 |     }
 99 |     throw "Unable to create from nothing.";
100 |   }
101 |   DSFrame Create(DSVideoInfo vi) {
102 |     planes = vi.Format.IsFamilyYUV ? planes_y : planes_r;
103 |     if (_vsapi) {
104 |       auto vsframe = _vsapi->newVideoFrame(vi.Format.ToVSFormat(_vscore, _vsapi), vi.Width, vi.Height, _vssrc, const_cast<VSCore*>(_vscore));
105 |       DSFrame new_frame(vsframe, _vscore, _vsapi);
106 |       new_frame._vsdst = vsframe;
107 |       new_frame.DstPointers = new unsigned char*[Format.Planes];
108 |       for (int i = 0; i < Format.Planes; i++)
109 |         new_frame.DstPointers[i] = _vsapi->getWritePtr(vsframe, i);
110 |       return new_frame;
111 |     }
112 |     else if (_env) {
113 |       auto avsvi = vi.ToAVSVI();
114 |       bool has_at_least_v8 = true;
115 |       try { _env->CheckVersion(8); }
116 |       catch (const AvisynthError&) { has_at_least_v8 = false; }
117 |       auto new_avsframe = (has_at_least_v8) ? _env->NewVideoFrameP(avsvi, &_avssrc) : _env->NewVideoFrame(avsvi);
118 |       auto dstp = new unsigned char*[Format.Planes];
119 |       for (int i = 0; i < Format.Planes; i++)
120 |         dstp[i] = new_avsframe->GetWritePtr(planes[i]);
121 |       DSFrame new_frame(new_avsframe, avsvi, _env);
122 |       new_frame.DstPointers = dstp;
123 |       return new_frame;
124 |     }
125 |     throw "Unable to create from nothing.";
126 |   }
127 | 
128 |   const VSFrameRef* ToVSFrame()
129 |   {
130 |     return _vsdst ? _vsapi->cloneFrameRef(_vsdst) :
131 |            _vssrc ? _vsapi->cloneFrameRef(_vssrc) :
132 |            nullptr;
133 |   }
134 |   PVideoFrame ToAVSFrame() {return _avssrc ? _avssrc : nullptr;}
135 | 
136 |   ~DSFrame()
137 |   {
138 |     if (SrcPointers)
139 |       delete[] SrcPointers;
140 |     if (DstPointers)
141 |       delete[] DstPointers;
142 |     if (StrideBytes)
143 |       delete[] StrideBytes;
144 |     if (_vsdst && _vsdst != _vssrc)
145 |       _vsapi->freeFrame(_vsdst);
146 |     if (_vssrc)
147 |       _vsapi->freeFrame(_vssrc);
148 |   }
149 | 
150 |   DSFrame(const DSFrame & old)
151 |   {
152 |     _avssrc = old._avssrc;
153 |     std::memcpy(this, &old, sizeof(DSFrame));
154 |     if (old.SrcPointers) {
155 |       SrcPointers = new const unsigned char*[Format.Planes];
156 |       std::copy_n(old.SrcPointers, Format.Planes, SrcPointers);
157 |     }
158 |     if (old.DstPointers) {
159 |       DstPointers = new unsigned char*[Format.Planes];
160 |       std::copy_n(old.DstPointers, Format.Planes, DstPointers);
161 |     }
162 |     if (old.StrideBytes) {
163 |       StrideBytes = new int[Format.Planes];
164 |       std::copy_n(old.StrideBytes, Format.Planes, StrideBytes);
165 |     }
166 |     if (_vsdst && _vsdst != _vssrc)
167 |       _vsdst = const_cast<VSFrameRef*>(_vsapi->cloneFrameRef(old._vsdst));
168 |     if (_vssrc)
169 |       _vssrc = _vsapi->cloneFrameRef(old._vssrc);
170 |   }
171 |   DSFrame& operator =(const DSFrame & old)
172 |   {
173 |     if (&old == this)
174 |       return *this;
175 | 
176 |     if (SrcPointers)
177 |       delete[] SrcPointers;
178 |     if (DstPointers)
179 |       delete[] DstPointers;
180 |     if (StrideBytes)
181 |       delete[] StrideBytes;
182 |     if (_vsdst && _vsdst != _vssrc)
183 |       _vsapi->freeFrame(_vsdst);
184 |     if (_vssrc)
185 |       _vsapi->freeFrame(_vssrc);
186 | 
187 |     _avssrc = old._avssrc;
188 |     std::memcpy(this, &old, sizeof(DSFrame));
189 |     if (old.SrcPointers) {
190 |       SrcPointers = new const unsigned char*[Format.Planes];
191 |       std::copy_n(old.SrcPointers, Format.Planes, SrcPointers);
192 |     }
193 |     if (old.DstPointers) {
194 |       DstPointers = new unsigned char*[Format.Planes];
195 |       std::copy_n(old.DstPointers, Format.Planes, DstPointers);
196 |     }
197 |     if (old.StrideBytes) {
198 |       StrideBytes = new int[Format.Planes];
199 |       std::copy_n(old.StrideBytes, Format.Planes, StrideBytes);
200 |     }
201 |     if (_vsdst && _vsdst != _vssrc)
202 |       _vsdst = const_cast<VSFrameRef*>(_vsapi->cloneFrameRef(old._vsdst));
203 |     if (_vssrc)
204 |       _vssrc = _vsapi->cloneFrameRef(old._vssrc);
205 |     return *this;
206 |   }
207 |   DSFrame(DSFrame && old) noexcept
208 |   {
209 |     _avssrc = old._avssrc;
210 |     std::memcpy(this, &old, sizeof(DSFrame));
211 |     old.SrcPointers = nullptr;
212 |     old.DstPointers = nullptr;
213 |     old.StrideBytes = nullptr;
214 |     old._vssrc = nullptr;
215 |     old._vsdst = nullptr;
216 |   }
217 |   DSFrame& operator =(DSFrame && old) noexcept
218 |   {
219 |     if (&old == this)
220 |       return *this;
221 | 
222 |     if (SrcPointers)
223 |       delete[] SrcPointers;
224 |     if (DstPointers)
225 |       delete[] DstPointers;
226 |     if (StrideBytes)
227 |       delete[] StrideBytes;
228 |     if (_vsdst && _vsdst != _vssrc)
229 |       _vsapi->freeFrame(_vsdst);
230 |     if (_vssrc)
231 |       _vsapi->freeFrame(_vssrc);
232 | 
233 |     _avssrc = old._avssrc;
234 |     std::memcpy(this, &old, sizeof(DSFrame));
235 |     old.SrcPointers = nullptr;
236 |     old.DstPointers = nullptr;
237 |     old.StrideBytes = nullptr;
238 |     old._vssrc = nullptr;
239 |     old._vsdst = nullptr;
240 |     return *this;
241 |   }
242 | };
243 | 


--------------------------------------------------------------------------------
/src/dither_high.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "impl_dispatch.h"
  3 | #include "compiler_compat.h"
  4 | 
  5 | #define FS_DITHER_SKIP_PRE_CLAMP
  6 | 
  7 | #include "pixel_proc_c_high_f_s_dithering.h"
  8 | #include "pixel_proc_c_high_ordered_dithering.h"
  9 | 
 10 | #include <assert.h>
 11 | #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
 12 | #include <emmintrin.h>
 13 | #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
 14 | #include "sse2neon.h"
 15 |  #endif
 16 | namespace dither_high
 17 | {
 18 |     static __m128i _ordered_dithering_threshold_map[16] [2];
 19 |     static __m128i _ordered_dithering_threshold_map_yuy2[16] [8];
 20 |     static volatile bool _threshold_map_initialized = false;
 21 | 
 22 |     static __inline void init_ordered_dithering()
 23 |     {
 24 |         if (!_threshold_map_initialized) {
 25 |             __m128i threhold_row;
 26 |             __m128i zero = _mm_setzero_si128();
 27 |             for (int i = 0; i < 16; i++) 
 28 |             {
 29 |                 threhold_row = *(__m128i*)pixel_proc_high_ordered_dithering::THRESHOLD_MAP[i];
 30 |                     
 31 |                 __m128i part_0 = _mm_unpacklo_epi8(threhold_row, zero);
 32 |                 __m128i part_1 = _mm_unpackhi_epi8(threhold_row, zero);
 33 | 
 34 |                 if (INTERNAL_BIT_DEPTH < 16)
 35 |                 {
 36 |                     part_0 = _mm_srli_epi16(part_0, 16 - INTERNAL_BIT_DEPTH);
 37 |                     part_1 = _mm_srli_epi16(part_1, 16 - INTERNAL_BIT_DEPTH);
 38 |                 }
 39 |                 _ordered_dithering_threshold_map[i][0] = part_0;
 40 |                 _ordered_dithering_threshold_map[i][1] = part_1;
 41 |                 
 42 |                 __m128i tmp = _mm_unpacklo_epi8(part_0, part_0);
 43 |                 _ordered_dithering_threshold_map_yuy2[i][0] = _mm_unpacklo_epi16(part_0, tmp);
 44 |                 _ordered_dithering_threshold_map_yuy2[i][1] = _mm_unpackhi_epi16(part_0, tmp);
 45 | 
 46 |                 tmp = _mm_unpackhi_epi8(part_0, part_0);
 47 |                 _ordered_dithering_threshold_map_yuy2[i][2] = _mm_unpacklo_epi16(part_1, tmp);
 48 |                 _ordered_dithering_threshold_map_yuy2[i][3] = _mm_unpackhi_epi16(part_1, tmp);
 49 | 
 50 |                 tmp = _mm_unpacklo_epi8(part_1, part_1);
 51 |                 _ordered_dithering_threshold_map_yuy2[i][4] = _mm_unpacklo_epi16(part_0, tmp);
 52 |                 _ordered_dithering_threshold_map_yuy2[i][5] = _mm_unpackhi_epi16(part_0, tmp);
 53 | 
 54 |                 tmp = _mm_unpackhi_epi8(part_1, part_1);
 55 |                 _ordered_dithering_threshold_map_yuy2[i][6] = _mm_unpacklo_epi16(part_1, tmp);
 56 |                 _ordered_dithering_threshold_map_yuy2[i][7] = _mm_unpackhi_epi16(part_1, tmp);
 57 |             }
 58 |             _mm_mfence();
 59 |             _threshold_map_initialized = true;
 60 |         }
 61 |     }
 62 | 
 63 |     static void init_ordered_dithering_with_output_depth(char context_buffer[CONTEXT_BUFFER_SIZE], int output_depth)
 64 |     {
 65 |         assert(_threshold_map_initialized);
 66 | 
 67 |         __m128i shift = _mm_set_epi32(0, 0, 0, output_depth - 8);
 68 | 
 69 |         for (int i = 0; i < 16; i++)
 70 |         {
 71 |             for (int j = 0; j < 2; j++)
 72 |             {
 73 |                 __m128i item = _ordered_dithering_threshold_map[i][j];
 74 |                 item = _mm_srl_epi16(item, shift);
 75 |                 _mm_store_si128((__m128i*)(context_buffer + (i * 2 + j) * 16), item);
 76 |             }
 77 |         }
 78 |     }
 79 | 
 80 |     template <int dither_algo>
 81 |     static __inline void init(char context_buffer[CONTEXT_BUFFER_SIZE], int frame_width, int output_depth) 
 82 |     {
 83 |         if (dither_algo == DA_HIGH_FLOYD_STEINBERG_DITHERING)
 84 |         {
 85 |             pixel_proc_high_f_s_dithering::init_context(context_buffer, frame_width, output_depth);
 86 |         } else if (dither_algo == DA_HIGH_ORDERED_DITHERING) {
 87 |             init_ordered_dithering();
 88 |             init_ordered_dithering_with_output_depth(context_buffer, output_depth);
 89 |         }
 90 |     }
 91 | 
 92 |     template <int dither_algo>
 93 |     static __inline void complete(void* context) 
 94 |     {
 95 |         if (dither_algo == DA_HIGH_FLOYD_STEINBERG_DITHERING)
 96 |         {
 97 |             pixel_proc_high_f_s_dithering::destroy_context(context);
 98 |         }
 99 |     }
100 |     
101 |     template <int dither_algo>
102 |     static __forceinline __m128i dither(void* context, __m128i pixels, int row, int column)
103 |     {
104 |         switch (dither_algo)
105 |         {
106 |         case DA_HIGH_NO_DITHERING:
107 |             return pixels;
108 |         case DA_HIGH_ORDERED_DITHERING:
109 |             {
110 |             // row: use lowest 4 bits as index, mask = 0b00001111 = 15
111 |             // column: always multiples of 8, so use 8 (bit 4) as selector, mask = 0b00001000
112 |             assert((column & 7) == 0);
113 |             __m128i threshold = _mm_load_si128((__m128i*)((char*)context + ( ( (row & 15) * 2 ) + ( (column & 8) >> 3 ) ) * 16 ) );
114 |             return _mm_adds_epu16(pixels, threshold);
115 |             }
116 |         case DA_HIGH_FLOYD_STEINBERG_DITHERING:
117 |             // fixme, remove shitty compat
118 |             // due to an ICC bug, accessing pixels using union will give us incorrect results
119 |             // so we have to use a buffer here
120 |             // tested on ICC 12.0.1024.2010
121 |             alignas(16) unsigned short buffer[8];
122 |             _mm_store_si128((__m128i*)buffer, pixels);
123 |             for (int i = 0; i < 8; i++)
124 |             {
125 |                 buffer[i] = (unsigned short)pixel_proc_high_f_s_dithering::dither(context, buffer[i], row, column + i);
126 |                 pixel_proc_high_f_s_dithering::next_pixel(context);
127 |             }
128 |             return _mm_load_si128((__m128i*)buffer);
129 |         case DA_16BIT_INTERLEAVED:
130 |             return _mm_setzero_si128();
131 |             break;
132 |         default:
133 |             abort();
134 |             return _mm_setzero_si128();
135 |         }
136 |     }
137 | 
138 |     // fixme, remove yuy2?
139 |     template <int dither_algo>
140 |     static __forceinline __m128i dither_yuy2(char contexts[3][CONTEXT_BUFFER_SIZE], __m128i pixels, int row, int column)
141 |     {
142 |         switch (dither_algo)
143 |         {
144 |         case DA_HIGH_NO_DITHERING:
145 |             return pixels;
146 |         case DA_HIGH_ORDERED_DITHERING:
147 |             // row: use lowest 4 bits as index, mask = 0b00001111 = 15
148 |             // column: always multiples of 8, yuy2 threshold map has 8 items, mask = 0b00111000
149 |             assert((column & 7) == 0);
150 |             return _mm_adds_epu16(pixels, _ordered_dithering_threshold_map_yuy2[row & 15][(column >> 3) & 7]);
151 |         case DA_HIGH_FLOYD_STEINBERG_DITHERING:
152 |             // fixme, remove shitty compat
153 |             // due to an ICC bug, accessing pixels using union will give us incorrect results
154 |             // so we have to use a buffer here
155 |             // tested on ICC 12.0.1024.2010
156 |             alignas(16)
157 |             unsigned short buffer[8];
158 |             _mm_store_si128((__m128i*)buffer, pixels);
159 |             for (int i = 0; i < 8; i++)
160 |             {
161 |                 int cur_column = column + i;
162 |                 void *cur_context;
163 |                 switch (i & 3)
164 |                 {
165 |                 case 0:
166 |                 case 2:
167 |                     cur_column >>= 1;
168 |                     cur_context = contexts[0];
169 |                     break;
170 |                 case 1:
171 |                     cur_column >>= 2;
172 |                     cur_context = contexts[1];
173 |                     break;
174 |                 case 3:
175 |                     cur_column >>= 2;
176 |                     cur_context = contexts[2];
177 |                     break;
178 |                 }
179 |                 buffer[i] = (unsigned short)pixel_proc_high_f_s_dithering::dither(cur_context, buffer[i], row, cur_column);
180 |                 pixel_proc_high_f_s_dithering::next_pixel(cur_context);
181 |             }
182 |             return _mm_load_si128((__m128i*)buffer);
183 |         case DA_16BIT_INTERLEAVED:
184 |             return _mm_setzero_si128();
185 |             break;
186 |         default:
187 |             abort();
188 |             return _mm_setzero_si128();
189 |         }
190 |     }
191 |     
192 |     template <int dither_algo>
193 |     static __inline void next_row(void* context)
194 |     {
195 |         if (dither_algo == DA_HIGH_FLOYD_STEINBERG_DITHERING)
196 |         {
197 |             pixel_proc_high_f_s_dithering::next_row(context);
198 |         }
199 |     }
200 | };


--------------------------------------------------------------------------------
/VCL2/instrset_detect.cpp:
--------------------------------------------------------------------------------
  1 | /**************************  instrset_detect.cpp   ****************************
  2 | * Author:        Agner Fog
  3 | * Date created:  2012-05-30
  4 | * Last modified: 2022-07-20
  5 | * Version:       2.02.00
  6 | * Project:       vector class library
  7 | * Description:
  8 | * Functions for checking which instruction sets are supported.
  9 | *
 10 | * (c) Copyright 2012-2022 Agner Fog.
 11 | * Apache License version 2.0 or later.
 12 | ******************************************************************************/
 13 | 
 14 | #include "instrset.h"
 15 | 
 16 | #ifdef VCL_NAMESPACE
 17 | namespace VCL_NAMESPACE {
 18 | #endif
 19 | 
 20 | 
 21 | // Define interface to xgetbv instruction
 22 | static inline uint64_t xgetbv (int ctr) {
 23 | #if (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200)
 24 |     // Microsoft or Intel compiler supporting _xgetbv intrinsic
 25 | 
 26 |     return uint64_t(_xgetbv(ctr));                    // intrinsic function for XGETBV
 27 | 
 28 | #elif defined(__GNUC__) ||  defined (__clang__)       // use inline assembly, Gnu/AT&T syntax
 29 | 
 30 |    uint32_t a, d;
 31 |    __asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : );
 32 |    return a | (uint64_t(d) << 32);
 33 | 
 34 | #else  // #elif defined (_WIN32)                      // other compiler. try inline assembly with masm/intel/MS syntax
 35 |    uint32_t a, d;
 36 |     __asm {
 37 |         mov ecx, ctr
 38 |         _emit 0x0f
 39 |         _emit 0x01
 40 |         _emit 0xd0 ; // xgetbv
 41 |         mov a, eax
 42 |         mov d, edx
 43 |     }
 44 |    return a | (uint64_t(d) << 32);
 45 | 
 46 | #endif
 47 | }
 48 | 
 49 | /* find supported instruction set
 50 |     return value:
 51 |     0           = 80386 instruction set
 52 |     1  or above = SSE (XMM) supported by CPU (not testing for OS support)
 53 |     2  or above = SSE2
 54 |     3  or above = SSE3
 55 |     4  or above = Supplementary SSE3 (SSSE3)
 56 |     5  or above = SSE4.1
 57 |     6  or above = SSE4.2
 58 |     7  or above = AVX supported by CPU and operating system
 59 |     8  or above = AVX2
 60 |     9  or above = AVX512F
 61 |    10  or above = AVX512VL, AVX512BW, AVX512DQ
 62 | */
 63 | int instrset_detect(void) {
 64 | 
 65 |     static int iset = -1;                                  // remember value for next call
 66 |     if (iset >= 0) {
 67 |         return iset;                                       // called before
 68 |     }
 69 |     iset = 0;                                              // default value
 70 |     int abcd[4] = {0,0,0,0};                               // cpuid results
 71 |     cpuid(abcd, 0);                                        // call cpuid function 0
 72 |     if (abcd[0] == 0) return iset;                         // no further cpuid function supported
 73 |     cpuid(abcd, 1);                                        // call cpuid function 1 for feature flags
 74 |     if ((abcd[3] & (1 <<  0)) == 0) return iset;           // no floating point
 75 |     if ((abcd[3] & (1 << 23)) == 0) return iset;           // no MMX
 76 |     if ((abcd[3] & (1 << 15)) == 0) return iset;           // no conditional move
 77 |     if ((abcd[3] & (1 << 24)) == 0) return iset;           // no FXSAVE
 78 |     if ((abcd[3] & (1 << 25)) == 0) return iset;           // no SSE
 79 |     iset = 1;                                              // 1: SSE supported
 80 |     if ((abcd[3] & (1 << 26)) == 0) return iset;           // no SSE2
 81 |     iset = 2;                                              // 2: SSE2 supported
 82 |     if ((abcd[2] & (1 <<  0)) == 0) return iset;           // no SSE3
 83 |     iset = 3;                                              // 3: SSE3 supported
 84 |     if ((abcd[2] & (1 <<  9)) == 0) return iset;           // no SSSE3
 85 |     iset = 4;                                              // 4: SSSE3 supported
 86 |     if ((abcd[2] & (1 << 19)) == 0) return iset;           // no SSE4.1
 87 |     iset = 5;                                              // 5: SSE4.1 supported
 88 |     if ((abcd[2] & (1 << 23)) == 0) return iset;           // no POPCNT
 89 |     if ((abcd[2] & (1 << 20)) == 0) return iset;           // no SSE4.2
 90 |     iset = 6;                                              // 6: SSE4.2 supported
 91 |     if ((abcd[2] & (1 << 27)) == 0) return iset;           // no OSXSAVE
 92 |     if ((xgetbv(0) & 6) != 6)       return iset;           // AVX not enabled in O.S.
 93 |     if ((abcd[2] & (1 << 28)) == 0) return iset;           // no AVX
 94 |     iset = 7;                                              // 7: AVX supported
 95 |     cpuid(abcd, 7);                                        // call cpuid leaf 7 for feature flags
 96 |     if ((abcd[1] & (1 <<  5)) == 0) return iset;           // no AVX2
 97 |     iset = 8;
 98 |     if ((abcd[1] & (1 << 16)) == 0) return iset;           // no AVX512
 99 |     cpuid(abcd, 0xD);                                      // call cpuid leaf 0xD for feature flags
100 |     if ((abcd[0] & 0x60) != 0x60)   return iset;           // no AVX512
101 |     iset = 9;
102 |     cpuid(abcd, 7);                                        // call cpuid leaf 7 for feature flags
103 |     if ((abcd[1] & (1 << 31)) == 0) return iset;           // no AVX512VL
104 |     if ((abcd[1] & 0x40020000) != 0x40020000) return iset; // no AVX512BW, AVX512DQ
105 |     iset = 10;
106 |     return iset;
107 | }
108 | 
109 | // detect if CPU supports the FMA3 instruction set
110 | bool hasFMA3(void) {
111 |     if (instrset_detect() < 7) return false;               // must have AVX
112 |     int abcd[4];                                           // cpuid results
113 |     cpuid(abcd, 1);                                        // call cpuid function 1
114 |     return ((abcd[2] & (1 << 12)) != 0);                   // ecx bit 12 indicates FMA3
115 | }
116 | 
117 | // detect if CPU supports the FMA4 instruction set
118 | bool hasFMA4(void) {
119 |     if (instrset_detect() < 7) return false;               // must have AVX
120 |     int abcd[4];                                           // cpuid results
121 |     cpuid(abcd, 0x80000001);                               // call cpuid function 0x80000001
122 |     return ((abcd[2] & (1 << 16)) != 0);                   // ecx bit 16 indicates FMA4
123 | }
124 | 
125 | // detect if CPU supports the XOP instruction set
126 | bool hasXOP(void) {
127 |     if (instrset_detect() < 7) return false;               // must have AVX
128 |     int abcd[4];                                           // cpuid results
129 |     cpuid(abcd, 0x80000001);                               // call cpuid function 0x80000001
130 |     return ((abcd[2] & (1 << 11)) != 0);                   // ecx bit 11 indicates XOP
131 | }
132 | 
133 | // detect if CPU supports the AVX512ER instruction set
134 | bool hasAVX512ER(void) {
135 |     if (instrset_detect() < 9) return false;               // must have AVX512F
136 |     int abcd[4];                                           // cpuid results
137 |     cpuid(abcd, 7);                                        // call cpuid function 7
138 |     return ((abcd[1] & (1 << 27)) != 0);                   // ebx bit 27 indicates AVX512ER
139 | }
140 | 
141 | // detect if CPU supports the AVX512VBMI instruction set
142 | bool hasAVX512VBMI(void) {
143 |     if (instrset_detect() < 10) return false;              // must have AVX512BW
144 |     int abcd[4];                                           // cpuid results
145 |     cpuid(abcd, 7);                                        // call cpuid function 7
146 |     return ((abcd[2] & (1 << 1)) != 0);                    // ecx bit 1 indicates AVX512VBMI
147 | }
148 | 
149 | // detect if CPU supports the AVX512VBMI2 instruction set
150 | bool hasAVX512VBMI2(void) {
151 |     if (instrset_detect() < 10) return false;              // must have AVX512BW
152 |     int abcd[4];                                           // cpuid results
153 |     cpuid(abcd, 7);                                        // call cpuid function 7
154 |     return ((abcd[2] & (1 << 6)) != 0);                    // ecx bit 6 indicates AVX512VBMI2
155 | }
156 | 
157 | // detect if CPU supports the F16C instruction set
158 | bool hasF16C(void) {
159 |     if (instrset_detect() < 7) return false;               // must have AVX
160 |     int abcd[4];                                           // cpuid results
161 |     cpuid(abcd, 1);                                        // call cpuid function 1
162 |     return ((abcd[2] & (1 << 29)) != 0);                   // ecx bit 29 indicates F16C
163 | }
164 | 
165 | // detect if CPU supports the AVX512_FP16 instruction set
166 | bool hasAVX512FP16(void) {
167 |     if (instrset_detect() < 10) return false;              // must have AVX512
168 |     int abcd[4];                                           // cpuid results
169 |     cpuid(abcd, 7);                                        // call cpuid function 1
170 |     return ((abcd[3] & (1 << 23)) != 0);                   // edx bit 23 indicates AVX512_FP16
171 | }
172 | 
173 | 
174 | #ifdef VCL_NAMESPACE
175 | }
176 | #endif
177 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.15)
  2 | project(neo_f3kdb VERSION 10.0.0 LANGUAGES CXX)
  3 | 
  4 | set(LIBRARY_NAME "neo-f3kdb")
  5 | add_library(${LIBRARY_NAME} SHARED)
  6 | 
  7 | target_sources(${LIBRARY_NAME} PRIVATE
  8 |   "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp"
  9 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/bit_utils.h"
 10 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/compiler_compat.h"
 11 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/constants.h"
 12 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/core.cpp"
 13 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/core.h"
 14 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/cpuid.cpp"
 15 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/dither_high.h"
 16 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/f3kdb.h"
 17 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/f3kdb.hpp"
 18 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_c.cpp"
 19 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/impl_dispatch.cpp"
 20 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/impl_dispatch.h"
 21 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/impl_dispatch_decl.h"
 22 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/neo_f3kdb.hpp"
 23 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c.h"
 24 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c_16bit.h"
 25 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c_high_bit_depth_common.h"
 26 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c_high_f_s_dithering.h"
 27 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c_high_no_dithering.h"
 28 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/pixel_proc_c_high_ordered_dithering.h"
 29 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/process_plane_context.cpp"
 30 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/process_plane_context.h"
 31 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/random.cpp"
 32 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/random.h"
 33 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/sse2neon.h"
 34 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/sse_utils.h"
 35 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/utils.h"
 36 | )
 37 | 
 38 | target_compile_features(${LIBRARY_NAME} PRIVATE cxx_std_17)
 39 | option(ENABLE_PAR "Enable C++17 Parallel Execution" ON)
 40 | 
 41 | if (NOT MSVC)
 42 |   find_package(PkgConfig REQUIRED)
 43 | 
 44 |   pkg_check_modules(AVISYNTH avisynth)
 45 |   if(AVISYNTH_FOUND)
 46 |     target_include_directories(${LIBRARY_NAME} PRIVATE ${AVISYNTH_INCLUDE_DIRS})
 47 |   else()
 48 |     target_include_directories(${LIBRARY_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include/avisynth")
 49 |   endif()
 50 | 
 51 |   pkg_check_modules(VAPOURSYNTH vapoursynth)
 52 |   if(VAPOURSYNTH_FOUND)
 53 |     target_include_directories(${LIBRARY_NAME} PRIVATE ${VAPOURSYNTH_INCLUDE_DIRS})
 54 |   else()
 55 |     target_include_directories(${LIBRARY_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include/vapoursynth")
 56 |   endif()
 57 | else()
 58 |   target_include_directories(${LIBRARY_NAME} PRIVATE
 59 |     "${CMAKE_CURRENT_SOURCE_DIR}/include/avisynth"
 60 |     "${CMAKE_CURRENT_SOURCE_DIR}/include/vapoursynth"
 61 |   )
 62 | endif()
 63 | 
 64 | target_include_directories(${LIBRARY_NAME} PRIVATE
 65 |   "${CMAKE_CURRENT_SOURCE_DIR}"
 66 |   "${CMAKE_CURRENT_SOURCE_DIR}/include/dualsynth"
 67 |   "${CMAKE_CURRENT_BINARY_DIR}"
 68 | )
 69 | 
 70 | # We apply aggressive flags ONLY to compilers with a GNU-style frontend.
 71 | if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "GNU")
 72 |     target_compile_options(${LIBRARY_NAME} PRIVATE -O3 -funroll-loops)
 73 | endif()
 74 | 
 75 | # Only apply SIMD flags if we are on a capable architecture (x86/x86_64).
 76 | if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|i.86")
 77 |     message(STATUS "x86/x86_64 architecture detected. Configuring SIMD instruction sets.")
 78 | 
 79 |     target_sources(${LIBRARY_NAME} PRIVATE
 80 |         "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_avx2_base.h"
 81 |         "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_avx512_base.h"
 82 |         "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx2.cpp"
 83 |         "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx512.cpp"
 84 |         "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_sse4.cpp"
 85 |         "${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_sse_base.h"
 86 |     )
 87 | 
 88 |     target_include_directories(${LIBRARY_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/VCL2")
 89 | 
 90 |     set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_sse4.cpp" PROPERTIES COMPILE_OPTIONS
 91 |         "$<$<CXX_COMPILER_ID:MSVC>:/arch:SSE2>$<$<AND:$<CXX_COMPILER_ID:Intel>,$<PLATFORM_ID:Windows>>:/arch:SSE4.1>$<$<NOT:$<OR:$<CXX_COMPILER_ID:MSVC>,$<AND:$<CXX_COMPILER_ID:Intel>,$<PLATFORM_ID:Windows>>>>:-msse4.1>"
 92 |     )
 93 | 
 94 |     if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")
 95 |         set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx2.cpp" PROPERTIES COMPILE_OPTIONS "/arch:AVX2")
 96 |     elseif(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "GNU")
 97 |         set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx2.cpp" PROPERTIES COMPILE_OPTIONS "-mavx2;-mfma")
 98 |     endif()
 99 | 
100 |     if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")
101 |         set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx512.cpp" PROPERTIES COMPILE_OPTIONS "/arch:AVX512")
102 |     elseif(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "GNU")
103 |         set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx512.cpp" PROPERTIES COMPILE_OPTIONS "-mavx512f;-mavx512bw;-mavx512dq;-mavx512vl;-mavx512cd;-mfma")
104 |     endif()
105 | else()
106 |     message(STATUS "Non-x86 architecture detected (${CMAKE_SYSTEM_PROCESSOR}). Skipping SIMD-specific source files.")
107 | endif()
108 | 
109 | target_link_libraries(${LIBRARY_NAME} PRIVATE "$<$<OR:$<CXX_COMPILER_ID:Intel>,$<CXX_COMPILER_ID:IntelLLVM>>:libmmds>")
110 | 
111 | # Handle legacy Windows XP support if the specific toolset is used
112 | if(CMAKE_GENERATOR_TOOLSET MATCHES "v[0-9]+_xp")
113 |     message(STATUS "Windows XP toolset detected. Applying compatibility settings.")
114 |     target_compile_definitions(${LIBRARY_NAME} PRIVATE WINVER=0x502 _WIN32_WINNT=0x502)
115 |     target_compile_options(${LIBRARY_NAME} PRIVATE "$<$<CXX_COMPILER_ID:MSVC>:/Zc:threadSafeInit->")
116 | endif()
117 | 
118 | string(REGEX MATCH "^([0-9.]+)" CORE_VERSION_STRING "${PROJECT_VERSION}")
119 | if (CORE_VERSION_STRING)
120 |     if(PROJECT_VERSION MATCHES "-")
121 |         set(IS_PRERELEASE 1)
122 |     else()
123 |         set(IS_PRERELEASE 0)
124 |     endif()
125 | else()
126 |     message(WARNING "Could not parse core version from '${PROJECT_VERSION}'. Defaulting to 0.0.0.")
127 |     set(CORE_VERSION_STRING "0.0.0")
128 |     set(IS_PRERELEASE 0)
129 | endif()
130 | 
131 | string(REPLACE "." ";" VERSION_NUMERIC_PARTS_LIST "${CORE_VERSION_STRING}")
132 | list(LENGTH VERSION_NUMERIC_PARTS_LIST NUM_PARTS)
133 | 
134 | set(VERSION_MAJOR 0)
135 | set(VERSION_MINOR 0)
136 | set(VERSION_PATCH 0)
137 | set(VERSION_BUILD 0)
138 | 
139 | if(NUM_PARTS GREATER_EQUAL 1)
140 |     list(GET VERSION_NUMERIC_PARTS_LIST 0 VERSION_MAJOR)
141 | endif()
142 | 
143 | if(NUM_PARTS GREATER_EQUAL 2)
144 |     list(GET VERSION_NUMERIC_PARTS_LIST 1 VERSION_MINOR)
145 | endif()
146 | 
147 | if(NUM_PARTS GREATER_EQUAL 3)
148 |     list(GET VERSION_NUMERIC_PARTS_LIST 2 VERSION_PATCH)
149 | endif()
150 | 
151 | if(NUM_PARTS GREATER_EQUAL 4)
152 |     list(GET VERSION_NUMERIC_PARTS_LIST 3 VERSION_BUILD)
153 | endif()
154 | 
155 | set(PROJECT_VERSION_STRING_FULL "r${VERSION_MAJOR}")
156 | 
157 | configure_file(
158 |   "${CMAKE_CURRENT_SOURCE_DIR}/src/version.hpp.in"
159 |   "${CMAKE_CURRENT_BINARY_DIR}/version.hpp"
160 | )
161 | 
162 | if (WIN32)
163 |     set(FILE_DESCRIPTION "Neo-F3KDB")
164 |     set(INTERNAL_NAME "Neo-F3KDB")
165 |     set(ORIGINAL_FILENAME "${LIBRARY_NAME}.dll")
166 |     set(PRODUCT_NAME "Neo-F3KDB")
167 | 
168 |     if(NOT CMAKE_CONFIGURATION_TYPES)
169 |         set(IS_DEBUG_BUILD 0)
170 |         if(CMAKE_BUILD_TYPE MATCHES "^Debug$")
171 |             set(IS_DEBUG_BUILD 1)
172 |         endif()
173 | 
174 |         if(IS_DEBUG_BUILD AND IS_PRERELEASE)
175 |             set(RC_FILEFLAGS_LINE "FILEFLAGS   VS_FF_DEBUG | VS_FF_PRERELEASE")
176 |         elseif(IS_DEBUG_BUILD)
177 |             set(RC_FILEFLAGS_LINE "FILEFLAGS   VS_FF_DEBUG")
178 |         elseif(IS_PRERELEASE)
179 |             set(RC_FILEFLAGS_LINE "FILEFLAGS   VS_FF_PRERELEASE")
180 |         else()
181 |             set(RC_FILEFLAGS_LINE "FILEFLAGS   0x0L")
182 |         endif()
183 |     else()
184 |         string(CONCAT RC_FILEFLAGS_LINE
185 |             "#if defined(_DEBUG) && defined(IS_PRERELEASE_BUILD)\n"
186 |             "    FILEFLAGS   VS_FF_DEBUG | VS_FF_PRERELEASE\n"
187 |             "#elif defined(_DEBUG)\n"
188 |             "    FILEFLAGS   VS_FF_DEBUG\n"
189 |             "#elif defined(IS_PRERELEASE_BUILD)\n"
190 |             "    FILEFLAGS   VS_FF_PRERELEASE\n"
191 |             "#else\n"
192 |             "    FILEFLAGS   0x0L\n"
193 |             "#endif"
194 |         )
195 |         if(IS_PRERELEASE)
196 |             target_compile_definitions(${LIBRARY_NAME} PRIVATE IS_PRERELEASE_BUILD=1)
197 |         endif()
198 |     endif()
199 | 
200 |     set(RC_FILE_OUT "${CMAKE_CURRENT_BINARY_DIR}/version.rc") # Output to build dir
201 | 
202 |     configure_file(
203 |         "${CMAKE_CURRENT_SOURCE_DIR}/src/version.rc.in"
204 |         "${RC_FILE_OUT}"
205 |         @ONLY
206 |     )
207 | 
208 |     target_sources(${LIBRARY_NAME} PRIVATE "${RC_FILE_OUT}")
209 | 
210 |     if(MSVC)
211 |         set_source_files_properties("${RC_FILE_OUT}" PROPERTIES
212 |             VS_RESOURCE_GENERATOR "RC")
213 |     endif()
214 | endif()
215 | 
216 | include(CheckIncludeFileCXX)
217 | CHECK_INCLUDE_FILE_CXX(execution HAS_EXECUTION)
218 | if(HAS_EXECUTION)
219 |   target_compile_definitions(${LIBRARY_NAME} PRIVATE HAS_EXECUTION)
220 | endif()
221 | if(ENABLE_PAR AND HAS_EXECUTION)
222 |   target_compile_definitions(${LIBRARY_NAME} PRIVATE ENABLE_PAR)
223 | 
224 |   if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
225 |     target_link_libraries(${LIBRARY_NAME} PRIVATE tbb)
226 |   endif()
227 | endif()
228 | 
229 | add_custom_command(
230 |   TARGET ${LIBRARY_NAME} POST_BUILD
231 |   COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${LIBRARY_NAME}> "../Release_${PROJECT_VERSION_STRING_FULL}/${_DIR}/$<TARGET_FILE_NAME:${LIBRARY_NAME}>"
232 | )
233 | 


--------------------------------------------------------------------------------
/src/f3kdb.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2020 Xinyue Lu
  3 |  *
  4 |  * Temporal Median - filter.
  5 |  *
  6 |  */
  7 | 
  8 | #pragma once
  9 | 
 10 | #include <memory>
 11 | 
 12 | #ifdef HAS_EXECUTION
 13 |   #include <execution>
 14 | #endif
 15 | 
 16 | #ifndef __cpp_lib_execution
 17 |   #undef ENABLE_PAR
 18 | #endif
 19 | 
 20 | #ifdef ENABLE_PAR
 21 |   #define PAR_POLICY std::execution::par
 22 | #else
 23 |   #define PAR_POLICY nullptr
 24 | #endif
 25 | 
 26 | #include "compiler_compat.h"
 27 | #include "core.h"
 28 | #include "constants.h"
 29 | #include "impl_dispatch.h"
 30 | 
 31 | int GetCPUFlags();
 32 | 
 33 | struct F3KDB final : Filter {
 34 |   f3kdb_params_t ep;
 35 |   std::unique_ptr<f3kdb_core_t> engine;
 36 |   char error_msg[1024];
 37 |   DSVideoInfo out_vi;
 38 |   bool mt {true};
 39 | 
 40 |   const char* VSName() const override { return "Deband"; }
 41 |   const char* AVSName() const override { return "neo_f3kdb"; }
 42 |   const MtMode AVSMode() const override { return MT_NICE_FILTER; }
 43 |   const VSFilterMode VSMode() const override { return fmParallel; }
 44 |   const std::vector<Param> Params() const override {
 45 |     return std::vector<Param> {
 46 |       Param {"clip", Clip, false, true, true, false},
 47 |       Param {"range", Integer},
 48 |       Param {"y", Integer},
 49 |       Param {"cb", Integer},
 50 |       Param {"cr", Integer},
 51 |       Param {"grainy", Integer},
 52 |       Param {"grainc", Integer},
 53 |       Param {"sample_mode", Integer},
 54 |       Param {"seed", Integer},
 55 |       Param {"blur_first", Boolean},
 56 |       Param {"dynamic_grain", Boolean},
 57 |       Param {"opt", Integer},
 58 |       Param {"mt", Boolean},
 59 |       Param {"dither_algo", Integer},
 60 |       Param {"keep_tv_range", Boolean},
 61 |       Param {"output_depth", Integer},
 62 |       Param {"random_algo_ref", Integer},
 63 |       Param {"random_algo_grain", Integer},
 64 |       Param {"random_param_ref", Float},
 65 |       Param {"random_param_grain", Float},
 66 |       Param {"preset", String},
 67 |       Param{ "y_1", Integer},
 68 |       Param{ "cb_1", Integer},
 69 |       Param{ "cr_1", Integer},
 70 |       Param{ "y_2", Integer },
 71 |       Param{ "cb_2", Integer },
 72 |       Param{ "cr_2", Integer },
 73 |       Param{ "scale", Boolean },
 74 |       Param{ "angle_boost", Float },
 75 |       Param{ "max_angle", Float },
 76 |     };
 77 |   }
 78 |   void Initialize(InDelegator* in, DSVideoInfo in_vi, FetchFrameFunctor* fetch_frame) override
 79 |   {
 80 |     Filter::Initialize(in, in_vi, fetch_frame);
 81 |     std::string preset;
 82 |     in->Read("preset", preset);
 83 |     std::istringstream piss(preset);
 84 | 
 85 |     bool scale = false;
 86 |     in->Read("scale", scale);
 87 | 
 88 |     while(!piss.eof()) {
 89 |       std::string piss1;
 90 |       std::getline(piss, piss1, '/');
 91 |       if (piss1 == "depth")
 92 |           ep.Y = ep.Cb = ep.Cr = ep.grainY = ep.grainC = ep.Y_1 = ep.Cb_1 = ep.Cr_1 = ep.Y_2 = ep.Cb_2 = ep.Cr_2 = 0;
 93 |       else if (piss1 == "low")
 94 |           ep.Y = ep.Cb = ep.Cr = ep.grainY = ep.grainC = ep.Y_1 = ep.Cb_1 = ep.Cr_1 = ep.Y_2 = ep.Cb_2 = ep.Cr_2 = (scale) ? 128 : 32;
 95 |       else if (piss1 == "medium")
 96 |           ep.Y = ep.Cb = ep.Cr = ep.grainY = ep.grainC = ep.Y_1 = ep.Cb_1 = ep.Cr_1 = ep.Y_2 = ep.Cb_2 = ep.Cr_2 = (scale) ? 192 : 48;
 97 |       else if (piss1 == "high")
 98 |           ep.Y = ep.Cb = ep.Cr = ep.grainY = ep.grainC = ep.Y_1 = ep.Cb_1 = ep.Cr_1 = ep.Y_2 = ep.Cb_2 = ep.Cr_2 = (scale) ? 256 : 64;
 99 |       else if (piss1 == "veryhigh")
100 |           ep.Y = ep.Cb = ep.Cr = ep.grainY = ep.grainC = ep.Y_1 = ep.Cb_1 = ep.Cr_1 = ep.Y_2 = ep.Cb_2 = ep.Cr_2 = (scale) ? 320 : 80;
101 |       else if (piss1 == "nograin")
102 |         ep.grainY = ep.grainC = 0;
103 |       else if (piss1 == "luma")
104 |         ep.Cb = ep.Cr = ep.grainC = 0;
105 |       else if (piss1 == "chroma")
106 |         ep.Y = ep.grainY = 0;
107 |     }
108 |     int tmp;
109 |     in->Read("range", ep.range);
110 |     in->Read("y", ep.Y);
111 |     in->Read("cb", ep.Cb);
112 |     in->Read("cr", ep.Cr);
113 |     in->Read("grainy", ep.grainY);
114 |     in->Read("grainc", ep.grainC);
115 |     in->Read("sample_mode", ep.sample_mode);
116 |     in->Read("seed", ep.seed);
117 |     in->Read("blur_first", ep.blur_first);
118 |     in->Read("dynamic_grain", ep.dynamic_grain);
119 |     tmp = static_cast<int>(ep.dither_algo);
120 |     in->Read("dither_algo", tmp);
121 |     ep.dither_algo = static_cast<DITHER_ALGORITHM>(tmp);
122 |     in->Read("keep_tv_range", ep.keep_tv_range);
123 |     in->Read("output_depth", ep.output_depth);
124 |     tmp = static_cast<int>(ep.random_algo_ref);
125 |     in->Read("random_algo_ref", tmp);
126 |     ep.random_algo_ref = static_cast<RANDOM_ALGORITHM>(tmp);
127 |     tmp = static_cast<int>(ep.random_algo_grain);
128 |     in->Read("random_algo_grain", tmp);
129 |     ep.random_algo_grain = static_cast<RANDOM_ALGORITHM>(tmp);
130 |     in->Read("random_param_ref", ep.random_param_ref);
131 |     in->Read("random_param_grain", ep.random_param_grain);
132 |     in->Read("y_1", ep.Y_1);
133 |     in->Read("cb_1", ep.Cb_1);
134 |     in->Read("cr_1", ep.Cr_1);
135 |     in->Read("y_2", ep.Y_2);
136 |     in->Read("cb_2", ep.Cb_2);
137 |     in->Read("cr_2", ep.Cr_2);
138 |     in->Read("angle_boost", ep.angle_boost);
139 |     in->Read("max_angle", ep.max_angle);
140 | 
141 |     ep.Y_1 = ep.Y_1 == -1 ? ep.Y : ep.Y_1;
142 |     ep.Cb_1 = ep.Cb_1 == -1 ? ep.Cb : ep.Cb_1;
143 |     ep.Cr_1 = ep.Cr_1 == -1 ? ep.Cr : ep.Cr_1;
144 |     ep.Y_2 = ep.Y_2 == -1 ? ep.Y : ep.Y_2;
145 |     ep.Cb_2 = ep.Cb_2 == -1 ? ep.Cb : ep.Cb_2;
146 |     ep.Cr_2 = ep.Cr_2 == -1 ? ep.Cr : ep.Cr_2;
147 | 
148 |     int opt_in = -1;
149 |     in->Read("opt", opt_in);
150 |     in->Read("mt", mt);
151 | 
152 |     OPTIMIZATION_MODE opt = [&]() {
153 |         const int CPUFlags = GetCPUFlags();
154 | 
155 |         if (ep.sample_mode >= 5 && ep.sample_mode <= 7) {
156 |             const int AVX512_REQUIRED_FLAGS = CPUF_AVX512F | CPUF_AVX512BW | CPUF_AVX512DQ | CPUF_AVX512VL | CPUF_AVX512CD;
157 | 
158 |             if (((CPUFlags & AVX512_REQUIRED_FLAGS) == AVX512_REQUIRED_FLAGS) && (opt_in == 3 || opt_in < 0))
159 |                 return IMPL_AVX512;
160 | 
161 |             if ((CPUFlags & CPUF_AVX2) && (opt_in == 2 || opt_in < 0))
162 |                 return IMPL_AVX2;
163 |         }
164 | 
165 |         if ((CPUFlags & CPUF_SSE4_1) && (opt_in > 0 || opt_in < 0))
166 |             return IMPL_SSE4;
167 | 
168 |         return IMPL_C;
169 |         }();
170 | 
171 |     #define INVALID_PARAM_IF(cond) \
172 |     do { if (cond) { throw("Invalid parameter condition: " #cond); } } while (0)
173 | 
174 |     INVALID_PARAM_IF(in_vi.Format.IsFamilyYUV != true);
175 |     INVALID_PARAM_IF(in_vi.Width < 16);
176 |     INVALID_PARAM_IF(in_vi.Height < 16);
177 |     INVALID_PARAM_IF(in_vi.Format.SSW < 0 || in_vi.Format.SSW > 4);
178 |     INVALID_PARAM_IF(in_vi.Format.SSH < 0 || in_vi.Format.SSH > 4);
179 |     INVALID_PARAM_IF(in_vi.Frames <= 0);
180 |     INVALID_PARAM_IF(in_vi.Format.BitsPerSample < 8 || in_vi.Format.BitsPerSample > INTERNAL_BIT_DEPTH);
181 |     INVALID_PARAM_IF(in_vi.Format.IsInteger != true);
182 | 
183 |     if (ep.output_depth < 0)
184 |       ep.output_depth = in_vi.Format.BitsPerSample;
185 |     if (ep.output_depth == 16)
186 |         // set to appropriate precision mode
187 |         ep.dither_algo = DA_16BIT_INTERLEAVED;
188 | 
189 |     const int y_threshold_upper_limit = scale ? 65535 : 511;
190 |     const int cb_threshold_upper_limit = scale ? 65535 : 511;
191 |     const int cr_threshold_upper_limit = scale ? 65535 : 511;
192 |     constexpr int dither_upper_limit = 4096;
193 | 
194 |     #define CHECK_PARAM(value, lower_bound, upper_bound) \
195 |     do { if ((int)value < (int)lower_bound || (int)value > (int)upper_bound) { snprintf(error_msg, sizeof(error_msg), "Invalid parameter %s, must be between %d and %d", #value, lower_bound, upper_bound); throw error_msg; } } while(0)
196 | 
197 |     CHECK_PARAM(ep.range, 0, 255);
198 |     CHECK_PARAM(ep.Y, 0, y_threshold_upper_limit);
199 |     CHECK_PARAM(ep.Cb, 0, cb_threshold_upper_limit);
200 |     CHECK_PARAM(ep.Cr, 0, cr_threshold_upper_limit);
201 |     CHECK_PARAM(ep.grainY, 0, dither_upper_limit);
202 |     CHECK_PARAM(ep.grainC, 0, dither_upper_limit);
203 |     CHECK_PARAM(ep.sample_mode, 1, 7);
204 |     CHECK_PARAM(ep.dither_algo, DA_HIGH_NO_DITHERING, (DA_COUNT - 1) );
205 |     CHECK_PARAM(ep.random_algo_ref, 0, (RANDOM_ALGORITHM_COUNT - 1) );
206 |     CHECK_PARAM(ep.random_algo_grain, 0, (RANDOM_ALGORITHM_COUNT - 1) );
207 |     CHECK_PARAM(ep.Y_1, 0, y_threshold_upper_limit);
208 |     CHECK_PARAM(ep.Cb_1, 0, cb_threshold_upper_limit);
209 |     CHECK_PARAM(ep.Cr_1, 0, cr_threshold_upper_limit);
210 |     CHECK_PARAM(ep.Y_2, 0, y_threshold_upper_limit);
211 |     CHECK_PARAM(ep.Cb_2, 0, cb_threshold_upper_limit);
212 |     CHECK_PARAM(ep.Cr_2, 0, cr_threshold_upper_limit);
213 | 
214 |     if (ep.angle_boost < 0.0f)
215 |         throw "invalid parameter angle_boost, must be positive value";
216 | 
217 |     if (ep.max_angle < 0.0f || ep.max_angle > 1.0f)
218 |         throw "invalid parameter max_angle, must be between 0.0 and 1.0";
219 | 
220 |     // now the internal bit depth is 16,
221 |     // scale parameters to be consistent with 14bit range in previous versions
222 |     ep.Y = scale ? ep.Y : ep.Y << 2;
223 |     ep.Cb = scale ? ep.Cb : ep.Cb << 2;
224 |     ep.Cr = scale ? ep.Cr : ep.Cr << 2;
225 |     ep.Y_1 = scale ? ep.Y_1 : ep.Y_1 << 2;
226 |     ep.Cb_1 = scale ? ep.Cb_1 : ep.Cb_1 << 2;
227 |     ep.Cr_1 = scale ? ep.Cr_1 : ep.Cr_1 << 2;
228 |     ep.Y_2 = scale ? ep.Y_2 : ep.Y_2 << 2;
229 |     ep.Cb_2 = scale ? ep.Cb_2 : ep.Cb_2 << 2;
230 |     ep.Cr_2 = scale ? ep.Cr_2 : ep.Cr_2 << 2;
231 |     ep.grainY <<= 2;
232 |     ep.grainC <<= 2;
233 | 
234 |     out_vi = in_vi;
235 |     out_vi.Format.BitsPerSample = ep.output_depth;
236 |     out_vi.Format.BytesPerSample = ep.output_depth == 8 ? 1 : 2;
237 | 
238 |     try
239 |     {
240 |         engine = std::make_unique<f3kdb_core_t>(in_vi, ep, opt);
241 |     } catch (std::bad_alloc&) {
242 |         throw "Memory allocation failed";
243 |     }
244 |   }
245 | 
246 |   DSFrame GetFrame(int n, std::unordered_map<int, DSFrame> in_frames) override
247 |   {
248 |     auto src = in_frames[n];
249 |     auto dst = src.Create(out_vi);
250 |     auto core = [&](char&idx) {
251 |       int p = static_cast<int>(reinterpret_cast<intptr_t>(&idx));
252 |       auto src_stride = src.StrideBytes[p];
253 |       auto src_ptr = src.SrcPointers[p];
254 |       auto dst_stride = dst.StrideBytes[p];
255 |       auto dst_ptr = dst.DstPointers[p];
256 | 
257 |       engine->process_plane(n, p, dst_ptr, dst_stride, src_ptr, src_stride);
258 |     };
259 | 
260 | #ifdef ENABLE_PAR
261 |     if(mt)
262 |       std::for_each_n(PAR_POLICY, reinterpret_cast<char*>(0), in_vi.Format.Planes, core);
263 |     else
264 | #endif
265 |     for (intptr_t i = 0; i < in_vi.Format.Planes; i++)
266 |       core(*reinterpret_cast<char*>(i));
267 | 
268 |     return dst;
269 |   }
270 | 
271 |   DSVideoInfo GetOutputVI() override
272 |   {
273 |     return out_vi;
274 |   }
275 | 
276 |   ~F3KDB() = default;
277 | };
278 | 


--------------------------------------------------------------------------------
/VCL2/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |   
179 |    Copyright 2012-2019 Agner Fog.
180 | 
181 |    Licensed under the Apache License, Version 2.0 (the "License");
182 |    you may not use this file except in compliance with the License.
183 |    You may obtain a copy of the License at
184 | 
185 |        http://www.apache.org/licenses/LICENSE-2.0
186 | 
187 |    Unless required by applicable law or agreed to in writing, software
188 |    distributed under the License is distributed on an "AS IS" BASIS,
189 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
190 |    See the License for the specific language governing permissions and
191 |    limitations under the License.
192 | 


--------------------------------------------------------------------------------
/src/core.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdarg.h>
  3 | #include <memory.h>
  4 | #include <assert.h>
  5 | 
  6 | #include "core.h"
  7 | #include "constants.h"
  8 | #include "random.h"
  9 | #include "impl_dispatch.h"
 10 | 
 11 | #ifdef _WIN32
 12 | #include <intrin.h>
 13 | #endif
 14 | 
 15 | void f3kdb_core_t::destroy_frame_luts(void)
 16 | {
 17 |     _aligned_free(_y_info);
 18 |     _aligned_free(_cb_info);
 19 |     _aligned_free(_cr_info);
 20 |     
 21 |     _y_info = NULL;
 22 |     _cb_info = NULL;
 23 |     _cr_info = NULL;
 24 |     
 25 |     _aligned_free(_grain_buffer_y);
 26 |     _aligned_free(_grain_buffer_c);
 27 |     
 28 |     _grain_buffer_y = NULL;
 29 |     _grain_buffer_c = NULL;
 30 | 
 31 |     free(_grain_buffer_offsets);
 32 |     _grain_buffer_offsets = NULL;
 33 |     
 34 |     // contexts are likely to be dependent on lut, so they must also be destroyed
 35 |     destroy_context(&_y_context);
 36 |     destroy_context(&_cb_context);
 37 |     destroy_context(&_cr_context);
 38 | }
 39 | 
 40 | static int inline min_multi( int first, ... )
 41 | {
 42 |     int ret = first, i = first;
 43 |     va_list marker;
 44 | 
 45 |     va_start( marker, first );
 46 |     while( i >= 0 )
 47 |     {
 48 |         if (i < ret)
 49 |         {
 50 |             ret = i;
 51 |         }
 52 |         i = va_arg( marker, int);
 53 |     }
 54 |     va_end( marker );
 55 |     return ret;
 56 | }
 57 | 
 58 | static int get_frame_lut_stride(int width_in_pixels)
 59 | {
 60 |     // whole multiples of alignment, so SSE codes don't need to check boundaries
 61 |     int width = width_in_pixels;
 62 |     return (((width - 1) | (FRAME_LUT_ALIGNMENT - 1)) + 1);
 63 | }
 64 | 
 65 | static short* generate_grain_buffer(size_t item_count, RANDOM_ALGORITHM algo, int& seed, double param, int range)
 66 | {
 67 |     short* buffer = (short*)_aligned_malloc(item_count * sizeof(short), FRAME_LUT_ALIGNMENT);
 68 |     for (size_t i = 0; i < item_count; i++)
 69 |     {
 70 |         *(buffer + i) = random(algo, seed, range, param);
 71 |     }
 72 |     return buffer;
 73 | }
 74 | 
 75 | void f3kdb_core_t::init_frame_luts(void)
 76 | {
 77 |     destroy_frame_luts();
 78 | 
 79 |     int seed = 0x92D68CA2 - _params.seed;
 80 | 
 81 |     seed ^= (_video_info.Width << 16) ^ _video_info.Height;
 82 |     seed ^= (_video_info.Frames << 16) ^ _video_info.Frames;
 83 | 
 84 |     int height_in_pixels = _video_info.Height;
 85 |     int width_in_pixels =  _video_info.Width;
 86 | 
 87 |     int y_stride;
 88 |     y_stride = get_frame_lut_stride(width_in_pixels);
 89 | 
 90 |     int y_size = sizeof(pixel_dither_info) * y_stride * height_in_pixels;
 91 |     _y_info = (pixel_dither_info*)_aligned_malloc(y_size, FRAME_LUT_ALIGNMENT);
 92 | 
 93 |     // ensure unused items are also initialized
 94 |     memset(_y_info, 0, y_size);
 95 | 
 96 |     int c_stride;
 97 |     c_stride = get_frame_lut_stride(width_in_pixels >> _video_info.Format.SSW);
 98 |     int c_size = sizeof(pixel_dither_info) * c_stride * (height_in_pixels >> _video_info.Format.SSH);
 99 |     _cb_info = (pixel_dither_info*)_aligned_malloc(c_size, FRAME_LUT_ALIGNMENT);
100 |     _cr_info = (pixel_dither_info*)_aligned_malloc(c_size, FRAME_LUT_ALIGNMENT);
101 | 
102 |     memset(_cb_info, 0, c_size);
103 |     memset(_cr_info, 0, c_size);
104 | 
105 |     pixel_dither_info *y_info_ptr, *cb_info_ptr, *cr_info_ptr;
106 | 
107 |     int width_subsamp = _video_info.Format.SSW;
108 |     int height_subsamp = _video_info.Format.SSH;
109 | 
110 |     for (int y = 0; y < height_in_pixels; y++)
111 |     {
112 |         y_info_ptr = _y_info + y * y_stride;
113 |         cb_info_ptr = _cb_info + (y >> height_subsamp) * c_stride;
114 |         cr_info_ptr = _cr_info + (y >> height_subsamp) * c_stride;
115 | 
116 |         for (int x = 0; x < width_in_pixels; x++)
117 |         {
118 |             pixel_dither_info info_y = {0, 0, 0};
119 |             info_y.change = random(_params.random_algo_grain, seed, _params.grainY, _params.random_param_grain);
120 | 
121 |             int x_range = min_multi(_params.range, x, width_in_pixels - x - 1, -1);
122 |             int y_range = min_multi(_params.range, y, height_in_pixels - y - 1, -1);
123 |             int cur_range = [&]() {
124 |                 switch (_params.sample_mode)
125 |                 {
126 |                 case 1:
127 |                     return y_range;
128 | 
129 |                 case 3:
130 |                     return x_range;
131 | 
132 |                 case 2:
133 |                 case 4:
134 |                 case 5:
135 |                 case 6:
136 |                 case 7:
137 |                     return min_multi(x_range, y_range, -1);
138 | 
139 |                 default: // unlikely
140 |                     return 0;
141 |                 }
142 |                 }();
143 | 
144 |             if (cur_range > 0) {
145 |                 info_y.ref1 = (signed char)random(_params.random_algo_ref, seed, cur_range, _params.random_param_ref);
146 |                 if (_params.sample_mode == 2)
147 |                 {
148 |                     info_y.ref2 = (signed char)random(_params.random_algo_ref, seed, cur_range, _params.random_param_ref);
149 |                 }
150 |                 if (_params.sample_mode > 0)
151 |                 {
152 |                     info_y.ref1 = abs(info_y.ref1);
153 |                     info_y.ref2 = abs(info_y.ref2);
154 |                 }
155 |             }
156 | 
157 |             *y_info_ptr = info_y;
158 | 
159 |             bool should_set_c = false;
160 |             should_set_c = ((x & ( ( 1 << width_subsamp ) - 1)) == 0 && 
161 |                 (y & ( ( 1 << height_subsamp ) - 1)) == 0);
162 | 
163 |             if (should_set_c) {
164 |                 pixel_dither_info info_cb = info_y;
165 |                 pixel_dither_info info_cr = info_cb;
166 | 
167 |                 // don't shift ref values here, since subsampling of width and height may be different
168 |                 // shift them in actual processing
169 | 
170 |                 info_cb.change = random(_params.random_algo_grain, seed, _params.grainC, _params.random_param_grain);
171 |                 info_cr.change = random(_params.random_algo_grain, seed, _params.grainC, _params.random_param_grain);
172 | 
173 |                 *cb_info_ptr = info_cb;
174 |                 *cr_info_ptr = info_cr;
175 |                 cb_info_ptr++;
176 |                 cr_info_ptr++;
177 |             }
178 |             y_info_ptr++;
179 |         }
180 |     }
181 | 
182 |     int multiplier = _params.dynamic_grain ? 3 : 1;
183 |     int item_count = width_in_pixels;
184 | 
185 |     // add some safety margin and align it
186 |     item_count += 255;
187 |     item_count &= 0xffffff80;
188 | 
189 |     item_count *= height_in_pixels;
190 | 
191 |     _grain_buffer_y = generate_grain_buffer(
192 |         item_count * multiplier,
193 |         _params.random_algo_grain,
194 |         seed,
195 |         _params.random_param_grain,
196 |         _params.grainY);
197 | 
198 |     // we always generate a full-sized buffer to simplify offset calculation
199 |     _grain_buffer_c = generate_grain_buffer(
200 |         item_count * multiplier,
201 |         _params.random_algo_grain,
202 |         seed,
203 |         _params.random_param_grain,
204 |         _params.grainC);
205 | 
206 |     if (_params.dynamic_grain)
207 |     {
208 |         // Pre-generate offset here so that result is deterministic even if we request frame in different order
209 |         _grain_buffer_offsets = (int*)malloc(sizeof(int) * _video_info.Frames);
210 |         for (int i = 0; i < _video_info.Frames; i++)
211 |         {
212 |             int offset = item_count + random(RANDOM_ALGORITHM_UNIFORM, seed, item_count, DEFAULT_RANDOM_PARAM);
213 |             offset &= 0xfffffff0; // align to 16-byte for SSE codes
214 | 
215 |             assert(offset >= 0);
216 | 
217 |             _grain_buffer_offsets[i] = offset;
218 |         }
219 |     }
220 | }
221 | 
222 | f3kdb_core_t::f3kdb_core_t(DSVideoInfo vi, const f3kdb_params_t params, OPTIMIZATION_MODE opt) :
223 |     _process_plane_impl(NULL),
224 |     _y_info(NULL),
225 |     _cb_info(NULL),
226 |     _cr_info(NULL),
227 |     _grain_buffer_y(NULL),
228 |     _grain_buffer_c(NULL),
229 |     _grain_buffer_offsets(NULL),
230 |     _video_info(vi),
231 |     _opt(opt),
232 |     _params(params)
233 | {
234 |     this->init();
235 | }
236 | 
237 | f3kdb_core_t::~f3kdb_core_t()
238 | {
239 |     destroy_frame_luts();
240 | }
241 | 
242 | static __inline int select_impl_index(int sample_mode, bool blur_first)
243 | {
244 |     assert(sample_mode != 0);
245 |     return sample_mode * 2 + (blur_first ? 0 : 1) - 1;
246 | }
247 | 
248 | void f3kdb_core_t::init(void) 
249 | {
250 |     init_context(&_y_context);
251 |     init_context(&_cb_context);
252 |     init_context(&_cr_context);
253 | 
254 |     init_frame_luts();
255 | 
256 |     const process_plane_impl_t* impl_table = process_plane_impls[_params.dither_algo][(int)_opt];
257 |     _process_plane_impl = impl_table[select_impl_index(_params.sample_mode, _params.blur_first)];
258 | }
259 | 
260 | void f3kdb_core_t::process_plane(int frame_index, int plane, unsigned char* dst_frame_ptr, int dst_pitch, const unsigned char* src_frame_ptr, int src_pitch)
261 | {
262 |     process_plane_params params;
263 | 
264 |     memset(&params, 0, sizeof(process_plane_params));
265 | 
266 |     params.src_plane_ptr = src_frame_ptr;
267 |     params.src_pitch = src_pitch;
268 | 
269 |     params.dst_plane_ptr = dst_frame_ptr;
270 |     params.dst_pitch = dst_pitch;
271 | 
272 |     params.input_mode = _video_info.Format.BitsPerSample == 8 ? LOW_BIT_DEPTH : HIGH_BIT_DEPTH_INTERLEAVED;
273 |     params.input_depth = _video_info.Format.BitsPerSample;
274 |     params.output_mode = _params.output_depth <= 8 ? LOW_BIT_DEPTH : HIGH_BIT_DEPTH_INTERLEAVED;
275 |     params.output_depth = _params.output_depth;
276 |     params.angle_boost = _params.angle_boost;
277 |     params.max_angle = _params.max_angle;
278 | 
279 |     params.plane = plane;
280 |     
281 |     params.width_subsampling = plane == 0 ? 0 : _video_info.Format.SSW;
282 |     params.height_subsampling = plane == 0 ? 0 : _video_info.Format.SSH;
283 | 
284 |     params.plane_width_in_pixels = plane == 0 ? _video_info.Width : (_video_info.Width >> _video_info.Format.SSW);
285 |     params.plane_height_in_pixels = plane == 0 ? _video_info.Height : (_video_info.Height >> _video_info.Format.SSH);
286 | 
287 |     params.info_stride = get_frame_lut_stride(params.plane_width_in_pixels);
288 |     params.grain_buffer_stride = get_frame_lut_stride(params.plane_width_in_pixels);
289 | 
290 |     process_plane_context* context;
291 | 
292 |     int grain_setting = 0;
293 | 
294 |     switch (plane)
295 |     {
296 |     case 0:
297 |         params.info_ptr_base = _y_info;
298 |         params.threshold = _params.Y;
299 |         params.threshold1 = _params.Y_1;
300 |         params.threshold2 = _params.Y_2;
301 |         params.pixel_max = _params.keep_tv_range ? TV_RANGE_Y_MAX : FULL_RANGE_Y_MAX;
302 |         params.pixel_min = _params.keep_tv_range ? TV_RANGE_Y_MIN : FULL_RANGE_Y_MIN;
303 |         params.grain_buffer = _grain_buffer_y;
304 |         grain_setting = _params.grainY;
305 |         context = &_y_context;
306 |         break;
307 |     case 1:
308 |         params.info_ptr_base = _cb_info;
309 |         params.threshold = _params.Cb;
310 |         params.threshold1 = _params.Cb_1;
311 |         params.threshold2 = _params.Cb_2;
312 |         params.pixel_max = _params.keep_tv_range ? TV_RANGE_C_MAX : FULL_RANGE_C_MAX;
313 |         params.pixel_min = _params.keep_tv_range ? TV_RANGE_C_MIN : FULL_RANGE_C_MIN;
314 |         params.grain_buffer = _grain_buffer_c;
315 |         grain_setting = _params.grainC;
316 |         context = &_cb_context;
317 |         break;
318 |     case 2:
319 |         params.info_ptr_base = _cr_info;
320 |         params.threshold = _params.Cr;
321 |         params.threshold1 = _params.Cr_1;
322 |         params.threshold2 = _params.Cr_2;
323 |         params.pixel_max = _params.keep_tv_range ? TV_RANGE_C_MAX : FULL_RANGE_C_MAX;
324 |         params.pixel_min = _params.keep_tv_range ? TV_RANGE_C_MIN : FULL_RANGE_C_MIN;
325 |         params.grain_buffer = _grain_buffer_c;
326 |         grain_setting = _params.grainC;
327 |         context = &_cr_context;
328 |         break;
329 |     default:
330 |         abort();
331 |     }
332 |     
333 |     if (_grain_buffer_offsets)
334 |     {
335 |         params.grain_buffer += _grain_buffer_offsets[frame_index % _video_info.Frames];
336 |     }
337 | 
338 |     bool copy_plane = false;
339 |     if (_video_info.Format.BitsPerSample == _params.output_depth &&
340 |         grain_setting == 0 &&
341 |         params.threshold == 0 && params.threshold1 == 0 && params.threshold2 == 0)
342 |     {
343 |         copy_plane = true;
344 |     }
345 | 
346 |     if (copy_plane) {
347 |         // no need to process
348 |         int line_size = params.get_src_width();
349 |         auto src = src_frame_ptr;
350 |         auto dst = dst_frame_ptr;
351 |         if (line_size == src_pitch && src_pitch == dst_pitch)
352 |         {
353 |             memcpy(dst, src, line_size * params.get_src_height());
354 |         } else {
355 |             for (int row = 0; row < params.get_src_height(); row++) 
356 |             {
357 |                 memcpy(dst, src, line_size);
358 |                 src += src_pitch;
359 |                 dst += dst_pitch;
360 |             }
361 |         }
362 |         return;
363 |     }
364 | 
365 |     _process_plane_impl(params, context);
366 | }
367 | 


--------------------------------------------------------------------------------
/VCL2/vectormath_common.h:
--------------------------------------------------------------------------------
  1 | /***************************  vectormath_common.h   ****************************
  2 | * Author:        Agner Fog
  3 | * Date created:  2014-04-18
  4 | * Last modified: 2022-07-20
  5 | * Version:       2.02.00
  6 | * Project:       vector classes
  7 | * Description:
  8 | * Header file containing common code for inline version of mathematical functions.
  9 | *
 10 | * For detailed instructions, see VectorClass.pdf
 11 | *
 12 | * (c) Copyright 2014-2022 Agner Fog.
 13 | * Apache License version 2.0 or later.
 14 | ******************************************************************************/
 15 | 
 16 | #ifndef VECTORMATH_COMMON_H
 17 | #define VECTORMATH_COMMON_H  2
 18 | 
 19 | #ifdef VECTORMATH_LIB_H
 20 | #error conflicting header files. More than one implementation of mathematical functions included
 21 | #endif
 22 | 
 23 | #include <cmath>
 24 | 
 25 | #ifndef VECTORCLASS_H
 26 | #include "vectorclass.h"
 27 | #endif
 28 | 
 29 | #if VECTORCLASS_H < 20200
 30 | #error Incompatible versions of vector class library mixed
 31 | #endif
 32 | 
 33 | 
 34 | /******************************************************************************
 35 |                     Define NAN payload values
 36 | ******************************************************************************/
 37 | #define NAN_LOG 0x101  // logarithm for x<0
 38 | #define NAN_POW 0x102  // negative number raised to non-integer power
 39 | #define NAN_HYP 0x104  // acosh for x<1 and atanh for abs(x)>1
 40 | 
 41 | 
 42 | /******************************************************************************
 43 |                     Define mathematical constants
 44 | ******************************************************************************/
 45 | #define VM_PI       3.14159265358979323846           // pi
 46 | #define VM_PI_2     1.57079632679489661923           // pi / 2
 47 | #define VM_PI_4     0.785398163397448309616          // pi / 4
 48 | #define VM_SQRT2    1.41421356237309504880           // sqrt(2)
 49 | #define VM_LOG2E    1.44269504088896340736           // 1/log(2)
 50 | #define VM_LOG10E   0.434294481903251827651          // 1/log(10)
 51 | #define VM_LOG210   3.321928094887362347808          // log2(10)
 52 | #define VM_LN2      0.693147180559945309417          // log(2)
 53 | #define VM_LN10     2.30258509299404568402           // log(10)
 54 | #define VM_SMALLEST_NORMAL  2.2250738585072014E-308  // smallest normal number, double
 55 | #define VM_SMALLEST_NORMALF 1.17549435E-38f          // smallest normal number, float
 56 | 
 57 | 
 58 | #ifdef VCL_NAMESPACE
 59 | namespace VCL_NAMESPACE {
 60 | #endif
 61 | 
 62 | /******************************************************************************
 63 |       templates for producing infinite and nan in desired vector type
 64 | ******************************************************************************/
 65 | template <typename VTYPE>
 66 | static inline VTYPE infinite_vec();
 67 | 
 68 | template <>
 69 | inline Vec2d infinite_vec<Vec2d>() {
 70 |     return infinite2d();
 71 | }
 72 | 
 73 | template <>
 74 | inline Vec4f infinite_vec<Vec4f>() {
 75 |     return infinite4f();
 76 | }
 77 | 
 78 | #if MAX_VECTOR_SIZE >= 256
 79 | 
 80 | template <>
 81 | inline Vec4d infinite_vec<Vec4d>() {
 82 |     return infinite4d();
 83 | }
 84 | 
 85 | template <>
 86 | inline Vec8f infinite_vec<Vec8f>() {
 87 |     return infinite8f();
 88 | }
 89 | 
 90 | #endif // MAX_VECTOR_SIZE >= 256
 91 | 
 92 | #if MAX_VECTOR_SIZE >= 512
 93 | 
 94 | template <>
 95 | inline Vec8d infinite_vec<Vec8d>() {
 96 |     return infinite8d();
 97 | }
 98 | 
 99 | template <>
100 | inline Vec16f infinite_vec<Vec16f>() {
101 |     return infinite16f();
102 | }
103 | 
104 | #endif // MAX_VECTOR_SIZE >= 512
105 | 
106 | 
107 | 
108 | /******************************************************************************
109 | *                 Detect NAN codes
110 | *
111 | * These functions return the code hidden in a NAN. The sign bit is ignored
112 | ******************************************************************************/
113 | 
114 | static inline Vec4ui nan_code(Vec4f const x) {
115 |     Vec4ui a = Vec4ui(reinterpret_i(x));
116 |     Vec4ui const n = 0x007FFFFF;
117 |     return select(Vec4ib(is_nan(x)), a & n, 0);
118 | }
119 | 
120 | // This function returns the code hidden in a NAN. The sign bit is ignored
121 | static inline Vec2uq nan_code(Vec2d const x) {
122 |     Vec2uq a = Vec2uq(reinterpret_i(x));
123 |     return select(Vec2qb(is_nan(x)), a << 12 >> (12+29), 0);
124 | }
125 | 
126 | #if MAX_VECTOR_SIZE >= 256
127 | 
128 | // This function returns the code hidden in a NAN. The sign bit is ignored
129 | static inline Vec8ui nan_code(Vec8f const x) {
130 |     Vec8ui a = Vec8ui(reinterpret_i(x));
131 |     Vec8ui const n = 0x007FFFFF;
132 |     return select(Vec8ib(is_nan(x)), a & n, 0);
133 | }
134 | 
135 | // This function returns the code hidden in a NAN. The sign bit is ignored
136 | static inline Vec4uq nan_code(Vec4d const x) {
137 |     Vec4uq a = Vec4uq(reinterpret_i(x));
138 |     return select(Vec4qb(is_nan(x)), a << 12 >> (12+29), 0);
139 | }
140 | 
141 | #endif // MAX_VECTOR_SIZE >= 256
142 | #if MAX_VECTOR_SIZE >= 512
143 | 
144 | // This function returns the code hidden in a NAN. The sign bit is ignored
145 | static inline Vec16ui nan_code(Vec16f const x) {
146 |     Vec16ui a = Vec16ui(reinterpret_i(x));
147 |     Vec16ui const n = 0x007FFFFF;
148 |     return select(Vec16ib(is_nan(x)), a & n, 0);
149 | }
150 | 
151 | // This function returns the code hidden in a NAN. The sign bit is ignored
152 | static inline Vec8uq nan_code(Vec8d const x) {
153 |     Vec8uq a = Vec8uq(reinterpret_i(x));
154 |     return select(Vec8qb(is_nan(x)), a << 12 >> (12+29), 0);
155 | }
156 | 
157 | #endif // MAX_VECTOR_SIZE >= 512
158 | 
159 | 
160 | /******************************************************************************
161 |                   templates for polynomials
162 | Using Estrin's scheme to make shorter dependency chains and use FMA, starting
163 | longest dependency chains first.
164 | ******************************************************************************/
165 | 
166 | // template <typedef VECTYPE, typedef CTYPE>
167 | template <class VTYPE, class CTYPE>
168 | static inline VTYPE polynomial_2(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2) {
169 |     // calculates polynomial c2*x^2 + c1*x + c0
170 |     // VTYPE may be a vector type, CTYPE is a scalar type
171 |     VTYPE x2 = x * x;
172 |     //return = x2 * c2 + (x * c1 + c0);
173 |     return mul_add(x2, c2, mul_add(x, c1, c0));
174 | }
175 | 
176 | template<class VTYPE, class CTYPE>
177 | static inline VTYPE polynomial_3(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3) {
178 |     // calculates polynomial c3*x^3 + c2*x^2 + c1*x + c0
179 |     // VTYPE may be a vector type, CTYPE is a scalar type
180 |     VTYPE x2 = x * x;
181 |     //return (c2 + c3*x)*x2 + (c1*x + c0);
182 |     return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0));
183 | }
184 | 
185 | template<class VTYPE, class CTYPE>
186 | static inline VTYPE polynomial_4(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4) {
187 |     // calculates polynomial c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
188 |     // VTYPE may be a vector type, CTYPE is a scalar type
189 |     VTYPE x2 = x * x;
190 |     VTYPE x4 = x2 * x2;
191 |     //return (c2+c3*x)*x2 + ((c0+c1*x) + c4*x4);
192 |     return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + c4*x4);
193 | }
194 | 
195 | template<class VTYPE, class CTYPE>
196 | static inline VTYPE polynomial_4n(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3) {
197 |     // calculates polynomial 1*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
198 |     // VTYPE may be a vector type, CTYPE is a scalar type
199 |     VTYPE x2 = x * x;
200 |     VTYPE x4 = x2 * x2;
201 |     //return (c2+c3*x)*x2 + ((c0+c1*x) + x4);
202 |     return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + x4);
203 | }
204 | 
205 | template<class VTYPE, class CTYPE>
206 | static inline VTYPE polynomial_5(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5) {
207 |     // calculates polynomial c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
208 |     // VTYPE may be a vector type, CTYPE is a scalar type
209 |     VTYPE x2 = x * x;
210 |     VTYPE x4 = x2 * x2;
211 |     //return (c2+c3*x)*x2 + ((c4+c5*x)*x4 + (c0+c1*x));
212 |     return mul_add(mul_add(c3, x, c2), x2, mul_add(mul_add(c5, x, c4), x4, mul_add(c1, x, c0)));
213 | }
214 | 
215 | template<class VTYPE, class CTYPE>
216 | static inline VTYPE polynomial_5n(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4) {
217 |     // calculates polynomial 1*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
218 |     // VTYPE may be a vector type, CTYPE is a scalar type
219 |     VTYPE x2 = x * x;
220 |     VTYPE x4 = x2 * x2;
221 |     //return (c2+c3*x)*x2 + ((c4+x)*x4 + (c0+c1*x));
222 |     return mul_add(mul_add(c3, x, c2), x2, mul_add(c4 + x, x4, mul_add(c1, x, c0)));
223 | }
224 | 
225 | template<class VTYPE, class CTYPE>
226 | static inline VTYPE polynomial_6(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6) {
227 |     // calculates polynomial c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
228 |     // VTYPE may be a vector type, CTYPE is a scalar type
229 |     VTYPE x2 = x * x;
230 |     VTYPE x4 = x2 * x2;
231 |     //return  (c4+c5*x+c6*x2)*x4 + ((c2+c3*x)*x2 + (c0+c1*x));
232 |     return mul_add(mul_add(c6, x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)));
233 | }
234 | 
235 | template<class VTYPE, class CTYPE>
236 | static inline VTYPE polynomial_6n(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5) {
237 |     // calculates polynomial 1*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
238 |     // VTYPE may be a vector type, CTYPE is a scalar type
239 |     VTYPE x2 = x * x;
240 |     VTYPE x4 = x2 * x2;
241 |     //return  (c4+c5*x+x2)*x4 + ((c2+c3*x)*x2 + (c0+c1*x));
242 |     return mul_add(mul_add(c5, x, c4 + x2), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)));
243 | }
244 | 
245 | template<class VTYPE, class CTYPE>
246 | static inline VTYPE polynomial_7(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7) {
247 |     // calculates polynomial c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
248 |     // VTYPE may be a vector type, CTYPE is a scalar type
249 |     VTYPE x2 = x * x;
250 |     VTYPE x4 = x2 * x2;
251 |     //return  ((c6+c7*x)*x2 + (c4+c5*x))*x4 + ((c2+c3*x)*x2 + (c0+c1*x));
252 |     return mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)));
253 | }
254 | 
255 | template<class VTYPE, class CTYPE>
256 | static inline VTYPE polynomial_8(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8) {
257 |     // calculates polynomial c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
258 |     // VTYPE may be a vector type, CTYPE is a scalar type
259 |     VTYPE x2 = x  * x;
260 |     VTYPE x4 = x2 * x2;
261 |     VTYPE x8 = x4 * x4;
262 |     //return  ((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8*x8 + (c2+c3*x)*x2 + (c0+c1*x));
263 |     return mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
264 |         mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + c8*x8));
265 | }
266 | 
267 | template<class VTYPE, class CTYPE>
268 | static inline VTYPE polynomial_9(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9) {
269 |     // calculates polynomial c9*x^9 + c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
270 |     // VTYPE may be a vector type, CTYPE is a scalar type
271 |     VTYPE x2 = x  * x;
272 |     VTYPE x4 = x2 * x2;
273 |     VTYPE x8 = x4 * x4;
274 |     //return  (((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8+c9*x)*x8) + ((c2+c3*x)*x2 + (c0+c1*x));
275 |     return mul_add(mul_add(c9, x, c8), x8, mul_add(
276 |         mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
277 |         mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))));
278 | }
279 | 
280 | template<class VTYPE, class CTYPE>
281 | static inline VTYPE polynomial_10(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10) {
282 |     // calculates polynomial c10*x^10 + c9*x^9 + c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
283 |     // VTYPE may be a vector type, CTYPE is a scalar type
284 |     VTYPE x2 = x  * x;
285 |     VTYPE x4 = x2 * x2;
286 |     VTYPE x8 = x4 * x4;
287 |     //return  (((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8+c9*x+c10*x2)*x8) + ((c2+c3*x)*x2 + (c0+c1*x));
288 |     return mul_add(mul_add(x2, c10, mul_add(c9, x, c8)), x8,
289 |         mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
290 |             mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))));
291 | }
292 | 
293 | template<class VTYPE, class CTYPE>
294 | static inline VTYPE polynomial_13(VTYPE const x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10, CTYPE c11, CTYPE c12, CTYPE c13) {
295 |     // calculates polynomial c13*x^13 + c12*x^12 + ... + c1*x + c0
296 |     // VTYPE may be a vector type, CTYPE is a scalar type
297 |     VTYPE x2 = x  * x;
298 |     VTYPE x4 = x2 * x2;
299 |     VTYPE x8 = x4 * x4;
300 |     return mul_add(
301 |         mul_add(
302 |             mul_add(c13, x, c12), x4,
303 |             mul_add(mul_add(c11, x, c10), x2, mul_add(c9, x, c8))), x8,
304 |         mul_add(
305 |             mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
306 |             mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))));
307 | }
308 | 
309 | 
310 | template<class VTYPE, class CTYPE>
311 | static inline VTYPE polynomial_13m(VTYPE const x, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10, CTYPE c11, CTYPE c12, CTYPE c13) {
312 |     // calculates polynomial c13*x^13 + c12*x^12 + ... + x + 0
313 |     // VTYPE may be a vector type, CTYPE is a scalar type
314 |     VTYPE x2 = x  * x;
315 |     VTYPE x4 = x2 * x2;
316 |     VTYPE x8 = x4 * x4;
317 |     // return  ((c8+c9*x) + (c10+c11*x)*x2 + (c12+c13*x)*x4)*x8 + (((c6+c7*x)*x2 + (c4+c5*x))*x4 + ((c2+c3*x)*x2 + x));
318 |     return mul_add(
319 |         mul_add(mul_add(c13, x, c12), x4, mul_add(mul_add(c11, x, c10), x2, mul_add(c9, x, c8))), x8,
320 |         mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, x)));
321 | }
322 | 
323 | #ifdef VCL_NAMESPACE
324 | }
325 | #endif
326 | 
327 | #endif
328 | 


--------------------------------------------------------------------------------