├── Collections ├── LUM.py ├── SuperRes.py ├── descale_verifyer.vpy ├── examples │ ├── BilateralGPU_cupy │ │ ├── bilateral.cu │ │ └── bilateral_gpu_cupy.vpy │ ├── Dpid_cupy │ │ ├── dpid.cu │ │ └── dpid_cupy.vpy │ ├── Expr_msvc │ │ ├── Expr.vpy │ │ └── Source_template.cpp │ ├── FFDNet_TensorRT │ │ ├── README.md │ │ ├── benchmark.py │ │ ├── build_engine.py │ │ ├── ffdnet_test.vpy │ │ ├── utils.py │ │ └── vs_ffdnet.py │ ├── KNLMeasCL_cupy │ │ ├── knlm.cu │ │ ├── knlm_cupy.vpy │ │ ├── knlm_mem_inefficient.cu │ │ └── knlm_mem_inefficient_cupy.vpy │ ├── NLH_cupy │ │ ├── NLH_cupy.vpy │ │ └── kernel.cu │ ├── SigmaFilter_cupy │ │ ├── sigma_filter.cu │ │ └── sigma_filter_cupy.vpy │ ├── SigmaFilter_cython │ │ ├── setup.py │ │ ├── sigma_filter.pyx │ │ └── sigma_filter_cython.vpy │ ├── Super-xBR_cupy │ │ ├── super-xbr.cu │ │ └── super-xbr_cupy.vpy │ ├── sigma_filter_numba.vpy │ ├── super_resolution_mxnet.vpy │ └── super_resolution_opencv.vpy ├── muvsfunc_misc.py ├── muvsfunc_numpy.py ├── net_interp.py └── resize.py ├── README.md ├── muvs.py └── muvsfunc.py /Collections/LUM.py: -------------------------------------------------------------------------------- 1 | """LUM filters for VapourSynth 2 | 3 | Ref: 4 | [1] Hardie, R. C., & Boncelet, C. (1993). LUM filters: a class of rank-order-based filters for smoothing and sharpening. IEEE transactions on signal processing, 41(3), 1061-1076. 5 | """ 6 | 7 | def lum_smoother(input, k): 8 | low = muf.Sort(input, k, mode='min') 9 | max = muf.Sort(input, k, mode='max') 10 | 11 | return haf.Clamp(input, max, min) # or just core.rgvs.RemoveGrain(input, k - 1) 12 | 13 | 14 | def lum_sharper(input, l=2): 15 | if l not in range(1, 6): 16 | raise ValueError("\'l\' must be in [1, 5] !") 17 | 18 | low1 = muf.Sort(input, l, mode='min') 19 | high1 = muf.Sort(input, l, mode='max') 20 | 21 | return core.std.Expr([input, low1, high1], ['x y z + 2 / <= x y min x z max ?']) 22 | 23 | 24 | def lum_filter(input, k=3, l=4): 25 | if (not isinstance(l, int)) or (not isinstance(l, int)) or (l < k): 26 | raise ValueError("\'k\' and \'l\' must be in [1, 5] and \'k\' must be not greater than \'l\'!") 27 | 28 | low_k = muf.Sort(input, k, mode='min') 29 | low_l = muf.Sort(input, l, mode='min') 30 | high_l = muf.Sort(input, l, mode='max') 31 | high_k = muf.Sort(input, k, mode='max') 32 | 33 | return core.std.Expr([input, low_k, low_l, high_l, high_k], ['x z a + 2 / <= x y < y x z min ? x b > b x a max ? ?', '']) 34 | 35 | 36 | def asymmetric_lum_filter(input, k=3, l=4, q=6, r=7): 37 | if (not isinstance(k, int)) or (not isinstance(l, int)) or (not isinstance(q, int)) or (not isinstance(r, int)) or (not 1 <= k <= l <= q <= r <= 9): 38 | raise ValueError("\'k\', \'l\', \'q\' and \'r\' must be in [1, 9] in ascending order!") 39 | 40 | order_k = muf.Sort(input, k, mode='min') 41 | order_l = muf.Sort(input, l, mode='min') 42 | order_q = muf.Sort(input, q, mode='min') 43 | order_r = muf.Sort(input, r, mode='min') 44 | 45 | return core.std.Expr([input, order_k, order_l, order_q, order_r], 46 | ['x z a + 2 / <= x y < y x z min ? x b > b x a max ? ?']) -------------------------------------------------------------------------------- /Collections/SuperRes.py: -------------------------------------------------------------------------------- 1 | # SuperRes1(): Super Resolution 2 | # SuperRes2(): Super Resolution with nnedi3 upsampling 3 | 4 | # SuperRes(): Super Resolution with NLMeans filtering and user-defined resampling 5 | """Example of using nnedi3() as a main upsampling filter: 6 | 7 | import nnedi3_resample as nnrs 8 | from functools import partial 9 | 10 | input = ... 11 | target_width = ... 12 | target_height = ... 13 | upsampleFilter = partial(nnrs.nnedi3_resample, target_width=target_width, target_height=target_height) 14 | superResolution = SuperRes(input, target_width, target_height, upsampleFilter1=upsampleFilter) 15 | 16 | """ 17 | 18 | # Appears to behave naturally when used to enhance textures during upsampling, though there would be lots of aliasing after filtering 19 | 20 | # 16bit integer clip is required 21 | 22 | # Main function 23 | def SuperRes(lowRes, width, height, fltPass=3, upsampleFilter1=None, upsampleFilter2=None, downsampleFilter=None, useNLMeans=True, **knlm_args): 24 | if upsampleFilter1 is None: 25 | def upsampleFilter1(input): 26 | return core.fmtc.resample(input, width, height) 27 | if upsampleFilter2 is None: 28 | def upsampleFilter2(input): 29 | return core.fmtc.resample(input, width, height) 30 | if downsampleFilter is None: 31 | def downsampleFilter(input): 32 | return core.fmtc.resample(input, lowRes.width, lowRes.height) 33 | 34 | def computeError(input): 35 | return core.std.MakeDiff(lowRes, downsampleFilter(input)) 36 | 37 | highRes = upsampleFilter1(lowRes) 38 | for i in range(fltPass): 39 | diff = upsampleFilter2(computeError(highRes)) 40 | if useNLMeans: 41 | diff = core.knlm.KNLMeansCL(diff, rclip=highRes, **knlm_args) 42 | highRes = core.std.MergeDiff(highRes, diff) 43 | return highRes 44 | 45 | 46 | # Wrap functions 47 | def SuperRes1(lowRes, w, h, fltPass=3, useNLMeans=True, knlm_args=dict(), **fmtc_args): 48 | from functools import partial 49 | 50 | upsampleFilter = partial(core.fmtc.resample, w=w, h=h, **fmtc_args) 51 | 52 | downsampleFilter = partial(core.fmtc.resample, w=lowRes.width, h=lowRes.height, **fmtc_args) 53 | 54 | return SuperRes(lowRes, w, h, fltPass, upsampleFilter, upsampleFilter, downsampleFilter, useNLMeans, **knlm_args) 55 | 56 | def SuperRes2(lowRes, w, h, fltPass=3, useNLMeans=True, nnedi3_args=dict(), knlm_args=dict(), **fmtc_args): 57 | from functools import partial 58 | import nnedi3_resample as nnrs 59 | 60 | upsampleFilter1 = partial(nnrs.nnedi3_resample, target_width=w, target_height=h, **nnedi3_args) 61 | 62 | upsampleFilter2 = partial(core.fmtc.resample, w=w, h=h, **fmtc_args) 63 | 64 | downsampleFilter = partial(core.fmtc.resample, w=lowRes.width, h=lowRes.height, **fmtc_args) 65 | 66 | return SuperRes(lowRes, w, h, fltPass, upsampleFilter1, upsampleFilter2, downsampleFilter, useNLMeans, **knlm_args) -------------------------------------------------------------------------------- /Collections/descale_verifyer.vpy: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/himesaka-noa/descale-verifier/blob/master/descale_verify.py 2 | 3 | import vapoursynth as vs 4 | import numpy as np 5 | try: 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | except BaseException: 9 | import matplotlib 10 | matplotlib.use('Agg') 11 | import matplotlib.pyplot as plt 12 | from datetime import datetime 13 | from functools import partial 14 | import sys 15 | 16 | 17 | core = vs.core 18 | 19 | # parameters 20 | src = core.lsmas.LWLibavSource(r"00004.m2ts") 21 | src = core.std.SelectEvery(src, 100, 0) 22 | src = core.resize.Point(src, format=vs.GRAYS) 23 | 24 | descaled_width = 1280 25 | descaled_height = 720 26 | kernel = "bicubic" 27 | a, b = 0, 0.5 28 | 29 | 30 | if sys.platform != "win32": 31 | save_filename = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 32 | else: 33 | save_filename = datetime.now().strftime("%y-%m-%d %H-%M-%S") 34 | 35 | 36 | # functions 37 | def calc_statistics(original, w, h, dw, dh, kernel, a, b): 38 | if kernel == 'bicubic': 39 | descaled = core.descale.Debicubic(original, dw, dh, b=a, c=b) 40 | rescaled = core.resize.Bicubic(descaled, w, h, filter_param_a=a, filter_param_b=b) 41 | elif kernel == 'bilinear': 42 | descaled = core.descale.Debilinear(original, dw, dh) 43 | rescaled = core.resize.Bilinear(descaled, w, h) 44 | elif kernel == 'lanczos': 45 | descaled = core.descale.Delanczos(original, dw, dh, taps=int(a)) 46 | rescaled = core.resize.Lanczos(descaled, w, h, filter_param_a=int(a)) 47 | elif kernel == 'spline16': 48 | descaled = core.descale.Despline16(original, dw, dh) 49 | rescaled = core.resize.Spline16(descaled, w, h) 50 | elif kernel == 'spline36': 51 | descaled = core.descale.Despline36(original, dw, dh) 52 | rescaled = core.resize.Spline36(descaled, w, h) 53 | else: 54 | raise NotImplementedError(f"Kernel {kernel} is not implemented.") 55 | 56 | return core.std.PlaneStats(original, rescaled) 57 | 58 | 59 | def create_plot(data, save_filename): 60 | fig, ax = plt.subplots() 61 | t = np.arange(data.shape[0]) 62 | ax.plot(t, data) 63 | ax.set(xlabel='frames', ylabel='relative difference', title='Descale Error') 64 | ax.grid() 65 | fig.savefig(f"{save_filename}.png") 66 | 67 | 68 | def output_statistics(clip, save_filename): 69 | values = np.zeros(clip.num_frames) 70 | remaining_frames = clip.num_frames 71 | 72 | def filter_core(n, f, clip): 73 | values[n] = f.props["PlaneStatsDiff"] 74 | 75 | nonlocal remaining_frames 76 | remaining_frames -= 1 77 | 78 | if remaining_frames == 0: 79 | create_plot(values, save_filename) 80 | 81 | return clip 82 | 83 | clip = core.std.FrameEval(clip, partial(filter_core, clip=clip), clip) 84 | return clip 85 | 86 | 87 | # process 88 | clip = calc_statistics(src, src.width, src.height, descaled_width, descaled_height, kernel, a, b) 89 | clip = output_statistics(clip, save_filename) 90 | clip.set_output() 91 | -------------------------------------------------------------------------------- /Collections/examples/BilateralGPU_cupy/bilateral.cu: -------------------------------------------------------------------------------- 1 | // naive implementation of CUDA-accelerated (NN/SNN) Bilateral filter 2 | 3 | // modified from 4 | // https://github.com/opencv/opencv_contrib/blob/82733fe56b13401519ace101dc4d724f0a83f535/modules/cudaimgproc/perf/perf_bilateral_filter.cpp 5 | 6 | 7 | #define WIDTH $width 8 | #define HEIGHT $height 9 | #define SIGMA_S ${sigma_s}f 10 | #define SIGMA_R ${sigma_r}f 11 | #define SIGMA ${sigma}f 12 | #define HALF_KERNEL_SIZE ${half_kernel_size} 13 | #define SNN ${snn} 14 | 15 | #ifndef MIN 16 | #define MIN(a,b) (((a)<(b))?(a):(b)) 17 | #endif 18 | 19 | #ifndef MAX 20 | #define MAX(a,b) (((a)>(b))?(a):(b)) 21 | #endif 22 | 23 | extern "C" 24 | __global__ void bilateral(const float * __restrict__ src, float * __restrict__ dst) { 25 | const int x = threadIdx.x + blockIdx.x * blockDim.x; 26 | const int y = threadIdx.y + blockIdx.y * blockDim.y; 27 | 28 | if (x >= WIDTH || y >= HEIGHT) 29 | return; 30 | 31 | const float center = src[y * WIDTH + x]; 32 | 33 | float sum1 = 0; 34 | float sum2 = 0; 35 | 36 | for (int cy = MAX(y - HALF_KERNEL_SIZE, 0); cy <= MIN(y + HALF_KERNEL_SIZE, HEIGHT - 1); ++cy) 37 | for (int cx = MAX(x - HALF_KERNEL_SIZE, 0); cx <= MIN(x + HALF_KERNEL_SIZE, WIDTH - 1); ++cx) { 38 | const float space = (x - cx) * (x - cx) + (y - cy) * (y - cy); 39 | 40 | const float value = src[cy * WIDTH + cx]; 41 | 42 | #if SNN 43 | const float weight = expf(space * SIGMA_S + 44 | fabsf((value - center) * (value - center) - SIGMA) * SIGMA_R); 45 | #else 46 | const float weight = expf(space * SIGMA_S + (value - center) * (value - center) * SIGMA_R); 47 | #endif 48 | 49 | sum1 += weight * value; 50 | sum2 += weight; 51 | } 52 | 53 | dst[y * WIDTH + x] = sum1 / sum2; 54 | } 55 | -------------------------------------------------------------------------------- /Collections/examples/BilateralGPU_cupy/bilateral_gpu_cupy.vpy: -------------------------------------------------------------------------------- 1 | """Bilateral-GPU in VapourSynth""" 2 | 3 | from string import Template 4 | 5 | import cupy as cp 6 | import vapoursynth as vs 7 | from vapoursynth import core 8 | import muvsfunc_numpy as mufnp 9 | 10 | # Load source clip. Only GRAYS is supported 11 | src = core.std.BlankClip(format=vs.GRAYS) 12 | 13 | # params of bilateral filter. See documentation at https://github.com/HomeOfVapourSynthEvolution/VapourSynth-Bilateral 14 | sigmaS = 3.0 15 | sigmaR = 0.02 16 | # params of SNN (Statistical Nearest Neighbors) sampling strategiy 17 | # ref: I. Frosio, J. Kautz, Statistical Nearest Neighbors for Image Denoising, IEEE Trans. Image Processing, 2019. 18 | sigma = 0 # 0.0003 19 | 20 | # other params 21 | half_kernel_size = round(sigmaS * 2) 22 | blksize = (32, 8) # dimensions of the CUDA thread block 23 | 24 | # whether to enable '--use_fast_math' in NVRTC runtime compilation 25 | # to make use of fast math operations 26 | fast = False 27 | 28 | # pre-processing 29 | snn = int(sigma > 0) # whether to use SNN sampling strategy 30 | 31 | if src.format.id != vs.GRAYS: 32 | raise vs.Error("Bilateral: Only 32-bit float grayscale is supported!") 33 | 34 | w, h = src.width, src.height 35 | 36 | # source code of CUDA kernel 37 | with open('bilateral.cu', 'r') as f: 38 | kernel_source_code = f.read() 39 | 40 | kernel_source_code = Template(kernel_source_code) 41 | kernel_source_code = kernel_source_code.substitute( 42 | width=w, height=h, sigma_s=-0.5/(sigmaS**2), sigma_r=-0.5/(sigmaR**2), 43 | sigma=sigma, snn=snn, half_kernel_size=half_kernel_size) 44 | 45 | 46 | if fast: 47 | kernel = cp.RawKernel(kernel_source_code, 'bilateral', 48 | options=('--use_fast_math', )) 49 | else: 50 | kernel = cp.RawKernel(kernel_source_code, 'bilateral') 51 | 52 | # create NumPy function 53 | def bilateral_core(h_img, kernel): 54 | # h_img must be a 2-D image 55 | 56 | d_img = cp.asarray(h_img) 57 | d_out = cp.empty_like(d_img) 58 | 59 | kernel(((w + blksize[0] - 1)//blksize[0], (h + blksize[1] - 1)//blksize[1]), blksize, (d_img, d_out)) 60 | 61 | h_out = cp.asnumpy(d_out) 62 | 63 | return h_out 64 | 65 | # process 66 | res = mufnp.numpy_process(src, bilateral_core, kernel=kernel) 67 | 68 | res.set_output() 69 | -------------------------------------------------------------------------------- /Collections/examples/Dpid_cupy/dpid.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Nicolas Weber and Sandra C. Amend / GCC / TU-Darmstadt. All rights reserved. 2 | // Use of this source code is governed by the BSD 3-Clause license that can be 3 | // found in the LICENSE file. 4 | // modified by WolframRhodium 5 | 6 | #define THREADS 128 7 | #define WSIZE 32 8 | #define TSIZE (THREADS / WSIZE) 9 | 10 | #define TX threadIdx.x 11 | #define PX (blockIdx.x * TSIZE + (TX / WSIZE)) 12 | #define PY blockIdx.y 13 | 14 | #define WTHREAD (TX % WSIZE) 15 | #define WARP (TX / WSIZE) 16 | 17 | #define LAMBDA ((float) (${lamda})) 18 | #define IWIDTH (${iwidth}) 19 | #define IHEIGHT (${iheight}) 20 | #define OWIDTH (${owidth}) 21 | #define OHEIGHT (${oheight}) 22 | #define PWIDTH ((float) (${pwidth})) 23 | #define PHEIGHT ((float) (${pheight})) 24 | 25 | #define SX (fmaxf(PX * PWIDTH, 0.0f)) 26 | #define EX (fminf((PX + 1) * PWIDTH, IWIDTH)) 27 | #define SY (fmaxf(PY * PHEIGHT, 0.0f)) 28 | #define EY (fminf((PY + 1) * PHEIGHT, IHEIGHT)) 29 | #define SXR (__float2uint_rd(SX)) 30 | #define SYR (__float2uint_rd(SY)) 31 | #define EXR (__float2uint_ru(EX)) 32 | #define EYR (__float2uint_ru(EY)) 33 | #define XCOUNT (EXR - SXR) 34 | #define YCOUNT (EYR - SYR) 35 | #define PIXELCOUNT (XCOUNT * YCOUNT) 36 | 37 | //------------------------------------------------------------------- 38 | // DEVICE 39 | //------------------------------------------------------------------- 40 | __device__ __forceinline__ void normalize(float4& var) 41 | { 42 | var.x /= var.w; 43 | var.y /= var.w; 44 | var.z /= var.w; 45 | var.w = 1.0f; 46 | } 47 | 48 | //------------------------------------------------------------------- 49 | __device__ __forceinline__ void add(float4& output, const ${dtype}3& color, const float factor) 50 | { 51 | output.x += color.x * factor; 52 | output.y += color.y * factor; 53 | output.z += color.z * factor; 54 | output.w += factor; 55 | } 56 | 57 | //------------------------------------------------------------------- 58 | __device__ __forceinline__ void add(float4& output, const float4& color) 59 | { 60 | output.x += color.x; 61 | output.y += color.y; 62 | output.z += color.z; 63 | output.w += color.w; 64 | } 65 | 66 | //------------------------------------------------------------------- 67 | __device__ __forceinline__ float lambda(const float dist) 68 | { 69 | if (LAMBDA == 0.0f) 70 | return 1.0f; 71 | else if (LAMBDA == 1.0f) 72 | return dist; 73 | 74 | return powf(dist, LAMBDA); 75 | } 76 | 77 | //------------------------------------------------------------------- 78 | __device__ __forceinline__ void operator+=(float4& output, const float4 value) 79 | { 80 | output.x += value.x; 81 | output.y += value.y; 82 | output.z += value.z; 83 | output.w += value.w; 84 | } 85 | 86 | //------------------------------------------------------------------- 87 | __device__ __forceinline__ float contribution(float f, const unsigned int x, const unsigned int y) 88 | { 89 | if (x < SX) 90 | f *= 1.0f - (SX - x); 91 | 92 | if ((x + 1.0f) > EX) 93 | f *= 1.0f - ((x + 1.0f) - EX); 94 | 95 | if (y < SY) 96 | f *= 1.0f - (SY - y); 97 | 98 | if ((y + 1.0f) > EY) 99 | f *= 1.0f - ((y + 1.0f) - EY); 100 | 101 | return f; 102 | } 103 | 104 | //------------------------------------------------------------------- 105 | // taken from: https://devblogs.nvidia.com/parallelforall/faster-parallel-reductions-kepler/ 106 | __device__ __forceinline__ float4 __shfl_down(const float4 var, const unsigned int srcLane, const unsigned int width = 32) 107 | { 108 | float4 output; 109 | 110 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later 111 | output.x = __shfl_down_sync(0xFFFFFFFFU, var.x, srcLane, width); 112 | output.y = __shfl_down_sync(0xFFFFFFFFU, var.y, srcLane, width); 113 | output.z = __shfl_down_sync(0xFFFFFFFFU, var.z, srcLane, width); 114 | output.w = __shfl_down_sync(0xFFFFFFFFU, var.w, srcLane, width); 115 | #else 116 | output.x = __shfl_down(var.x, srcLane, width); 117 | output.y = __shfl_down(var.y, srcLane, width); 118 | output.z = __shfl_down(var.z, srcLane, width); 119 | output.w = __shfl_down(var.w, srcLane, width); 120 | #endif 121 | 122 | return output; 123 | } 124 | 125 | //------------------------------------------------------------------- 126 | __device__ __forceinline__ void reduce(float4& value) 127 | { 128 | value += __shfl_down(value, 16); 129 | value += __shfl_down(value, 8); 130 | value += __shfl_down(value, 4); 131 | value += __shfl_down(value, 2); 132 | value += __shfl_down(value, 1); 133 | } 134 | 135 | //------------------------------------------------------------------- 136 | __device__ __forceinline__ float distance(const float4& avg, const ${dtype}3& color) 137 | { 138 | const float x = avg.x - color.x; 139 | const float y = avg.y - color.y; 140 | const float z = avg.z - color.z; 141 | 142 | return sqrtf(x * x + y * y + z * z); 143 | // return sqrtf((x * x + y * y + z * z) / 3.0f) / PIXEL_MAX; // L2-Norm / sqrt(255^2 * 3) 144 | } 145 | 146 | //------------------------------------------------------------------- 147 | extern "C" 148 | __global__ void kernelGuidance(const ${dtype}3* __restrict__ input, ${dtype}3* __restrict__ patches) 149 | { 150 | if (PX >= OWIDTH || PY >= OHEIGHT) 151 | return; 152 | 153 | // init 154 | float4 color = { 0 }; 155 | 156 | // iterate pixels 157 | for (unsigned int i = WTHREAD; i < PIXELCOUNT; i += WSIZE) 158 | { 159 | const unsigned int x = SXR + (i % XCOUNT); 160 | const unsigned int y = SYR + (i / XCOUNT); 161 | 162 | const float f = contribution(1.0f, x, y); 163 | 164 | const ${dtype}3& pixel = input[x + y * IWIDTH]; 165 | 166 | add(color, make_float4(pixel.x * f, pixel.y * f, pixel.z * f, f)); 167 | } 168 | 169 | // reduce warps 170 | reduce(color); 171 | 172 | // store results 173 | if ((TX % 32) == 0) 174 | { 175 | normalize(color); 176 | patches[PX + PY * OWIDTH] = make_${dtype}3(color.x, color.y, color.z); 177 | } 178 | } 179 | 180 | //------------------------------------------------------------------- 181 | __device__ __forceinline__ float4 calcAverage(const ${dtype}3* __restrict__ patches) 182 | { 183 | const float corner = 1.0f; 184 | const float edge = 2.0f; 185 | const float center = 4.0f; 186 | 187 | // calculate average color 188 | float4 avg = { 0.f }; 189 | 190 | // TOP 191 | if (PY > 0) 192 | { 193 | if (PX > 0) 194 | add(avg, patches[(PX - 1) + (PY - 1) * OWIDTH], corner); 195 | 196 | add(avg, patches[(PX)+(PY - 1) * OWIDTH], edge); 197 | 198 | if ((PX + 1) < OWIDTH) 199 | add(avg, patches[(PX + 1) + (PY - 1) * OWIDTH], corner); 200 | } 201 | 202 | // LEFT 203 | if (PX > 0) 204 | add(avg, patches[(PX - 1) + (PY)* OWIDTH], edge); 205 | 206 | // CENTER 207 | add(avg, patches[(PX)+(PY)* OWIDTH], center); 208 | 209 | // RIGHT 210 | if ((PX + 1) < OWIDTH) 211 | add(avg, patches[(PX + 1) + (PY)* OWIDTH], edge); 212 | 213 | // BOTTOM 214 | if ((PY + 1) < OHEIGHT) 215 | { 216 | if (PX > 0) 217 | add(avg, patches[(PX - 1) + (PY + 1) * OWIDTH], corner); 218 | 219 | add(avg, patches[(PX)+(PY + 1) * OWIDTH], edge); 220 | 221 | if ((PX + 1) < OWIDTH) 222 | add(avg, patches[(PX + 1) + (PY + 1) * OWIDTH], corner); 223 | } 224 | 225 | normalize(avg); 226 | 227 | return avg; 228 | } 229 | 230 | //------------------------------------------------------------------- 231 | extern "C" 232 | __global__ void kernelDownsampling(const ${dtype}3* __restrict__ input, const ${dtype}3* __restrict__ patches, ${dtype}3* __restrict__ output) 233 | { 234 | if (PX >= OWIDTH || PY >= OHEIGHT) return; 235 | 236 | // init 237 | const float4 avg = calcAverage(patches); 238 | 239 | float4 color = { 0.f }; 240 | 241 | // iterate pixels 242 | for (unsigned int i = WTHREAD; i < PIXELCOUNT; i += WSIZE) 243 | { 244 | const unsigned int x = SXR + (i % XCOUNT); 245 | const unsigned int y = SYR + (i / XCOUNT); 246 | 247 | const ${dtype}3& pixel = input[x + y * IWIDTH]; 248 | float f = distance(avg, pixel); 249 | 250 | f = lambda(f); 251 | f = contribution(f, x, y); 252 | 253 | add(color, pixel, f); 254 | } 255 | 256 | // reduce warp 257 | reduce(color); 258 | 259 | if (WTHREAD == 0) 260 | { 261 | ${dtype}3& ref = output[PX + PY * OWIDTH]; 262 | 263 | if (color.w == 0.0f) 264 | ref = make_${dtype}3(avg.x, avg.y, avg.z); 265 | else 266 | { 267 | normalize(color); 268 | ref = make_${dtype}3(color.x, color.y, color.z); 269 | } 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /Collections/examples/Dpid_cupy/dpid_cupy.vpy: -------------------------------------------------------------------------------- 1 | """core.dpid.Dpid() in CuPy""" 2 | 3 | from string import Template 4 | 5 | import cupy as cp 6 | import vapoursynth as vs 7 | from vapoursynth import core 8 | import muvsfunc_numpy as mufnp 9 | 10 | 11 | # Load source clip. Only RGB24/RGB48/RGBS is supported 12 | src = core.std.BlankClip(format=vs.RGB24) 13 | 14 | 15 | # params of core.dpid.Dpid() 16 | width = src.width // 2 17 | height = src.height // 2 18 | _lambda = 1.0 19 | 20 | # whether to enable '--use_fast_math' in NVRTC runtime compilation 21 | # to make use of fast math operations 22 | fast = False 23 | 24 | 25 | # pre-processing 26 | if src.format.color_family != vs.RGB: 27 | raise TypeError("'src' must be a RGB clip.") 28 | 29 | 30 | if src.format.sample_type == vs.FLOAT: 31 | dtype = 'float' 32 | 33 | elif src.format.bits_per_sample == 8: 34 | dtype = 'uchar' 35 | 36 | else: 37 | dtype = 'ushort' 38 | 39 | 40 | # load CUDA kernel 41 | with open('dpid.cu', 'r') as f: 42 | kernel_source_code = f.read() 43 | 44 | kernel_source_code = Template(kernel_source_code) 45 | kernel_source_code = kernel_source_code.substitute( 46 | iwidth=src.width, iheight=src.height, owidth=width, oheight=height, 47 | pwidth=src.width / width, pheight=src.height / height, lamda=_lambda, 48 | dtype=dtype) 49 | 50 | 51 | if fast: 52 | kernelGuidance = cp.RawKernel(code=kernel_source_code, name='kernelGuidance', 53 | options=('--use_fast_math', )) 54 | kernelDownsampling = cp.RawKernel(code=kernel_source_code, name='kernelDownsampling', 55 | options=('--use_fast_math', )) 56 | else: 57 | kernelGuidance = cp.RawKernel(code=kernel_source_code, name='kernelGuidance') 58 | kernelDownsampling = cp.RawKernel(code=kernel_source_code, name='kernelDownsampling') 59 | 60 | 61 | # create NumPy function 62 | def dpid_core(h_input, width, height, kernelGuidance, kernelDownsampling): 63 | d_input = cp.asarray(h_input) 64 | d_output = cp.zeros((height, width, 3), dtype=h_input.dtype) 65 | d_guidance = cp.zeros((height, width, 3), dtype=h_input.dtype) 66 | 67 | kernelGuidance((width // 4, height, 1), (128, 1, 1), (d_input, d_guidance)) 68 | kernelDownsampling((width // 4, height, 1), (128, 1, 1), (d_input, d_guidance, d_output)) 69 | 70 | h_out = cp.asnumpy(d_output) 71 | 72 | return h_out 73 | 74 | 75 | # process 76 | res = mufnp.numpy_process( 77 | [core.std.BlankClip(src, width=width, height=height), src], 78 | dpid_core, width=width, height=height, 79 | kernelGuidance=kernelGuidance, kernelDownsampling=kernelDownsampling, 80 | input_per_plane=False, output_per_plane=False, omit_first_clip=True) 81 | 82 | """ 83 | if src.format.sample_type == vs.INTEGER: 84 | res = core.dpid.Dpid(src, width=width, height=height, _lambda=_lambda) 85 | 86 | else: # src.format.sample_type == vs.FLOAT 87 | res = core.dpid.Dpid(src.fmtc.bitdepth(bits=16), width=width, height=height, _lambda=_lambda) 88 | res = core.fmtc.bitdepth(res, bits=src.format.bits_per_sample) 89 | """ 90 | 91 | 92 | res.set_output() 93 | -------------------------------------------------------------------------------- /Collections/examples/Expr_msvc/Expr.vpy: -------------------------------------------------------------------------------- 1 | """ 2 | this project is inspired by https://github.com/Endilll/exprcpp 3 | it dynamically generates code for a plugin (Expr in this case) 4 | 5 | ** the code here is badly written, don't use it** 6 | """ 7 | 8 | 9 | import os 10 | import tempfile 11 | from string import Template 12 | 13 | import vapoursynth as vs 14 | from vapoursynth import core 15 | 16 | 17 | def compile_plugin(filenames, vs_include_dir): 18 | from distutils.msvccompiler import MSVCCompiler 19 | msvc = MSVCCompiler() 20 | 21 | msvc.add_include_dir(vs_include_dir) 22 | 23 | tempdir=tempfile.gettempdir() 24 | 25 | msvc.compile(filenames, extra_postargs=["/O2", "/EHsc"], output_dir=tempdir) 26 | msvc.link_shared_lib([os.path.join(tempdir, "Source.obj")], output_libname="expr", output_dir=tempdir) 27 | core.std.LoadPlugin(os.path.join(tempdir, "expr.dll")) 28 | 29 | 30 | def Expr(clips, func_impl, func_name, planes=None, vs_include_dir=r"D:\VapourSynth\sdk\include\vapoursynth"): 31 | # assertions 32 | for clip in clips[1:]: 33 | assert ( 34 | clip.format.id == clips[0].format.id and 35 | clip.width == clips[0].width and 36 | clip.height == clips[0].height and 37 | clip.num_frames == clips[0].num_frames 38 | ) 39 | 40 | # completes code from template 41 | num_inputs = len(clips) 42 | 43 | clip = clips[0] 44 | if clip.format.sample_type == vs.FLOAT: 45 | t = "float" 46 | elif clip.format.bits_per_sample == 8: 47 | t = "uint8_t" 48 | else: 49 | t = "uint16_t" 50 | inputs = ", ".join(f"srcp[{i}][x]" for i in range(num_inputs)) 51 | 52 | if planes is None: 53 | planes = list(range(clip.format.num_planes)) 54 | planes = "{" + ", ".join(("1" if i in planes else "0") for i in range(3)) + "}" 55 | 56 | with open("Source_template.cpp", 'r') as f: 57 | code_template = f.read() 58 | 59 | code = ( 60 | Template(code_template) 61 | .substitute( 62 | num_inputs=num_inputs, t=t, inputs=inputs, func_name=func_name, 63 | planes=planes, func_impl=func_impl) 64 | ) 65 | 66 | # generates final source code 67 | with open("Source.cpp", 'w') as f: 68 | f.write(code) 69 | 70 | compile_plugin(("Source.cpp",), vs_include_dir) 71 | 72 | return core.expr.Expr(clips) 73 | 74 | 75 | # test clips 76 | src1 = core.std.BlankClip(format=vs.YUV420P8, color=[0, 13, 29]) 77 | src2 = core.std.BlankClip(format=vs.YUV420P8, color=[93, 128, 247]) 78 | 79 | 80 | # usage 81 | func_impl = """ 82 | T add(T x, T y) { 83 | return x + y - (T) 128; 84 | } 85 | """ 86 | 87 | res = Expr([src1, src2], func_impl, func_name="add", planes=[0, 2]) 88 | 89 | res.set_output() 90 | -------------------------------------------------------------------------------- /Collections/examples/Expr_msvc/Source_template.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define kNumInputs ${num_inputs} 6 | #define T ${t} 7 | #define kInputs ${inputs} 8 | #define kFunction ${func_name} 9 | const int kProcess[3] = ${planes}; 10 | 11 | ${func_impl} 12 | 13 | typedef struct { 14 | VSNodeRef *node[kNumInputs]; 15 | const VSVideoInfo *vi; 16 | } ExprData; 17 | 18 | static void VS_CC ExprInit(VSMap *in, VSMap *out, void **instanceData, VSNode *node, VSCore *core, const VSAPI *vsapi) { 19 | ExprData *d = (ExprData *) * instanceData; 20 | vsapi->setVideoInfo(d->vi, 1, node); 21 | } 22 | 23 | static const VSFrameRef *VS_CC ExprGetFrame(int n, int activationReason, void **instanceData, void **frameData, VSFrameContext *frameCtx, VSCore *core, const VSAPI *vsapi) { 24 | ExprData *d = (ExprData *) * instanceData; 25 | 26 | if (activationReason == arInitial) { 27 | for (int i = 0; i < kNumInputs; ++i) 28 | vsapi->requestFrameFilter(n, d->node[i], frameCtx); 29 | } else if (activationReason == arAllFramesReady) { 30 | const VSFrameRef *src[kNumInputs] = {}; 31 | for (int i = 0; i < kNumInputs; ++i) 32 | src[i] = vsapi->getFrameFilter(n, d->node[i], frameCtx); 33 | 34 | const VSFormat *fi = d->vi->format; 35 | int height = vsapi->getFrameHeight(src[0], 0); 36 | int width = vsapi->getFrameWidth(src[0], 0); 37 | 38 | int planes[3] = { 0, 1, 2 }; 39 | const VSFrameRef *srcf[3] = { kProcess[0] ? nullptr : src[0], kProcess[1] ? nullptr : src[0], kProcess[2] ? nullptr : src[0] }; 40 | VSFrameRef *dst = vsapi->newVideoFrame2(fi, width, height, srcf, planes, src[0], core); 41 | 42 | for (int plane = 0; plane < d->vi->format->numPlanes; plane++) { 43 | if (!kProcess[plane]) 44 | continue; 45 | 46 | const T *srcp[kNumInputs] = {}; 47 | for (int i = 0; i < kNumInputs; ++i) 48 | srcp[i] = (const T*) vsapi->getReadPtr(src[i], plane); 49 | 50 | int src_stride = vsapi->getStride(src[0], plane); 51 | T *dstp = (T*) vsapi->getWritePtr(dst, plane); 52 | int dst_stride = vsapi->getStride(dst, plane); 53 | int h = vsapi->getFrameHeight(src[0], plane); 54 | int w = vsapi->getFrameWidth(src[0], plane); 55 | 56 | for (int y = 0; y < h; y++) { 57 | for (int x = 0; x < w; x++) { 58 | dstp[x] = kFunction(kInputs); 59 | } 60 | 61 | dstp += dst_stride; 62 | for (int i = 0; i < kNumInputs; ++i) 63 | srcp[i] += src_stride; 64 | } 65 | } 66 | 67 | for (int i = 0; i < kNumInputs; ++i) 68 | vsapi->freeFrame(src[i]); 69 | 70 | return dst; 71 | } 72 | 73 | return 0; 74 | } 75 | 76 | static void VS_CC ExprFree(void *instanceData, VSCore *core, const VSAPI *vsapi) { 77 | ExprData *d = (ExprData *)instanceData; 78 | for (int i = 0; i < kNumInputs; ++i) 79 | vsapi->freeNode(d->node[i]); 80 | free(d); 81 | } 82 | 83 | static void VS_CC ExprCreate(const VSMap *in, VSMap *out, void *userData, VSCore *core, const VSAPI *vsapi) { 84 | ExprData d; 85 | ExprData *data; 86 | 87 | for (int i = 0; i < kNumInputs; ++i) { 88 | auto node = vsapi->propGetNode(in, "clips", i, 0); 89 | auto vi = vsapi->getVideoInfo(node); 90 | if (!isConstantFormat(vi)) { 91 | vsapi->setError(out, "Expr: only constant format input supported"); 92 | for (int j = 0; j < i; ++j) 93 | vsapi->freeNode(d.node[j]); 94 | return; 95 | } 96 | d.node[i] = node; 97 | } 98 | 99 | d.vi = vsapi->getVideoInfo(d.node[0]); 100 | 101 | data = (ExprData *) malloc(sizeof(d)); 102 | *data = d; 103 | 104 | vsapi->createFilter(in, out, "Expr", ExprInit, ExprGetFrame, ExprFree, fmParallel, 0, data, core); 105 | } 106 | 107 | 108 | VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) { 109 | configFunc("exprtest", "expr", "Expr test", VAPOURSYNTH_API_VERSION, 1, plugin); 110 | registerFunc("Expr", "clips:clip[];", ExprCreate, 0, plugin); 111 | } 112 | -------------------------------------------------------------------------------- /Collections/examples/FFDNet_TensorRT/README.md: -------------------------------------------------------------------------------- 1 | # Instructions 2 | 1. Install [CUDA-Python](https://github.com/NVIDIA/cuda-python). 3 | 4 | 2. Install TensorRT Python API. [Install-guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). PyCUDA is not required. 5 | 6 | 3. Download [FFDNet model](https://github.com/HolyWu/vs-ffdnet/blob/master/vsffdnet/ffdnet_color.pth) from HolyWu/vs-ffdnet. 7 | 8 | 4. Run `build_engine.py` to create serialized TensorRT engine. 9 | 10 | "The generated plan files are **not portable** across platforms or TensorRT versions and are specific to the exact GPU model they were built on", according to [TensorRT Developer Guide](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work). This sample further assumes that the dimensions of the video are fixed and known before engine creation. 11 | 12 | 5. (Optionally) Run `benchmark.py` or `trtexec --loadEngine="ffdnet.engine" --useCudaGraph` to test the engine's raw performance. 13 | 14 | `benchmark.py` writes a DOT file "ffdnet.dot" describing inference graph structure when `use_cuda_graph=True`. The DOT file can be visualized by running `dot -Tsvg ffdnet.dot > ffdnet.svg`. 15 | 16 | 6. Run `ffdnet_test.vpy` to test in VapourSynth. 17 | 18 | -------------------------------------------------------------------------------- /Collections/examples/FFDNet_TensorRT/benchmark.py: -------------------------------------------------------------------------------- 1 | from cuda import cuda 2 | import tensorrt as trt 3 | from utils import * 4 | 5 | 6 | def _get_bindings( 7 | context: trt.IExecutionContext, 8 | num_bindings: int 9 | ) -> List[UniqueResource]: 10 | 11 | bindings = [] 12 | for i in range(num_bindings): 13 | binding = checkError(cuda.cuMemAlloc(context.get_strides(i)[0] * 4)) 14 | binding = UniqueResource(binding, cuda.cuMemFree, binding) 15 | bindings.append(binding) 16 | return bindings 17 | 18 | 19 | def benchmark( 20 | width: int, 21 | height: int, 22 | iter: int = 5, 23 | use_cuda_graph: bool = False, 24 | logger: trt.Logger = trt.Logger(trt.Logger.VERBOSE) 25 | ) -> None: 26 | 27 | cuda_context = init_cuda() 28 | 29 | runtime = trt.Runtime(logger) 30 | 31 | with open(f"ffdnet_{width}_{height}.engine", "rb") as f: 32 | engine = runtime.deserialize_cuda_engine(f.read()) 33 | 34 | execution_context = engine.create_execution_context() 35 | 36 | _bindings = _get_bindings(execution_context, engine.num_bindings) 37 | bindings = [binding.obj for binding in _bindings] 38 | 39 | stream = checkError(cuda.cuStreamCreate(cuda.CUstream_flags.CU_STREAM_NON_BLOCKING.value)) 40 | stream = UniqueResource(stream, cuda.cuStreamDestroy, stream) 41 | 42 | start = checkError(cuda.cuEventCreate(cuda.CUevent_flags.CU_EVENT_DEFAULT.value)) 43 | start = UniqueResource(start, cuda.cuEventDestroy, start) 44 | 45 | end = checkError(cuda.cuEventCreate(cuda.CUevent_flags.CU_EVENT_DEFAULT.value)) 46 | end = UniqueResource(end, cuda.cuEventDestroy, end) 47 | 48 | def execute(): 49 | execution_context.execute_async_v2(bindings, stream_handle=stream.obj) 50 | 51 | if use_cuda_graph: 52 | checkError(cuda.cuStreamBeginCapture( 53 | stream.obj, cuda.CUstreamCaptureMode.CU_STREAM_CAPTURE_MODE_RELAXED)) 54 | 55 | execute() 56 | 57 | graph = checkError(cuda.cuStreamEndCapture(stream.obj)) 58 | graphexec, error_node = checkError(cuda.cuGraphInstantiate( 59 | graph, logBuffer=b"", bufferSize=0)) 60 | graphexec = UniqueResource(graphexec, cuda.cuGraphExecDestroy, graphexec) 61 | checkError(cuda.cuGraphDebugDotPrint( 62 | graph, b"ffdnet.dot", 63 | cuda.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE.value)) 64 | checkError(cuda.cuGraphDestroy(graph)) 65 | 66 | for _ in range(iter): 67 | checkError(cuda.cuEventRecord(start.obj, stream.obj)) 68 | 69 | if use_cuda_graph: 70 | checkError(cuda.cuGraphLaunch(graphexec.obj, stream.obj)) 71 | else: 72 | execute() 73 | 74 | checkError(cuda.cuEventRecord(end.obj, stream.obj)) 75 | checkError(cuda.cuEventSynchronize(end.obj)) 76 | 77 | duration = checkError(cuda.cuEventElapsedTime(start.obj, end.obj)) 78 | 79 | print(f"duration: {duration} ms") 80 | 81 | 82 | if __name__ == "__main__": 83 | benchmark(width=1920, height=1080, iter=10, use_cuda_graph=False) 84 | -------------------------------------------------------------------------------- /Collections/examples/FFDNet_TensorRT/build_engine.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import tensorrt as trt 3 | from utils import * 4 | 5 | 6 | def build_engine( 7 | width: int, 8 | height: int, 9 | args_dict: Dict, 10 | max_workspace_size: int = int(1.6 * 1024 ** 3), 11 | logger: trt.Logger = trt.Logger(trt.Logger.VERBOSE) 12 | ) -> None: 13 | 14 | assert width % 2 == 0 and height % 2 == 0 15 | 16 | builder = trt.Builder(logger) 17 | builder.max_batch_size = 1 18 | 19 | flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) 20 | network = builder.create_network(flags=flags) 21 | network.name = "ffdnet" 22 | 23 | input = network.add_input( 24 | name="input", dtype=trt.float32, shape=(1, 3, height, width)) 25 | sigma = network.add_input( 26 | name="sigma", dtype=trt.float32, shape=(1, 1, height // 2, width // 2)) 27 | 28 | input_down = pixel_unshuffle(network, input, downscale_factor=2) 29 | network_input = network.add_concatenation([input_down, sigma]).get_output(0) 30 | 31 | conv = convolution( 32 | network, network_input, 13, 96, 3, 33 | kernel=args_dict["model.0.weight"].numpy(), 34 | bias=args_dict["model.0.bias"].numpy()) 35 | relu = network.add_activation(conv, trt.ActivationType.RELU).get_output(0) 36 | 37 | for i in range(1, 11): 38 | conv = convolution( 39 | network, relu, 96, 96, 3, 40 | kernel=args_dict[f"model.{i*2}.weight"].numpy(), 41 | bias=args_dict[f"model.{i*2}.bias"].numpy()) 42 | 43 | relu = network.add_activation(conv, trt.ActivationType.RELU).get_output(0) 44 | 45 | conv = convolution( 46 | network, relu, 96, 12, 3, 47 | kernel=args_dict[f"model.22.weight"].numpy(), 48 | bias=args_dict[f"model.22.bias"].numpy()) 49 | 50 | output = pixel_shuffle(network, conv, upscale_factor=2) 51 | 52 | network.mark_output(output) 53 | 54 | config = builder.create_builder_config() 55 | config.max_workspace_size = max_workspace_size 56 | with open("timing_cache.buffer", "rb") as cache_f: 57 | cache = config.create_timing_cache(cache_f.read()) 58 | config.set_timing_cache(cache=cache, ignore_mismatch=False) 59 | 60 | output = builder.build_serialized_network(network, config) 61 | 62 | with open("timing_cache.buffer", "wb") as cache_f: 63 | cache_f.write(cache.serialize()) 64 | 65 | with open(f"ffdnet_{width}_{height}.engine", "wb") as f: 66 | f.write(output) 67 | 68 | 69 | if __name__ == "__main__": 70 | import torch 71 | 72 | # https://github.com/HolyWu/vs-ffdnet/blob/master/vsffdnet/ffdnet_color.pth 73 | args_dict = torch.load("ffdnet_color.pth") 74 | 75 | build_engine(width=1920, height=1080, args_dict=args_dict) 76 | -------------------------------------------------------------------------------- /Collections/examples/FFDNet_TensorRT/ffdnet_test.vpy: -------------------------------------------------------------------------------- 1 | import vapoursynth as vs 2 | from vapoursynth import core 3 | 4 | import sys 5 | sys.path.append(".") 6 | import vs_ffdnet 7 | 8 | src = core.lsmas.LWLibavSource(r'PV02.mkv') 9 | src = core.resize.Bicubic(src, 1920, 1080, format=vs.RGBS, matrix_in_s="709") 10 | res = vs_ffdnet.FFDNet(src, sigma=5.0, use_cuda_graph=False) 11 | 12 | res.set_output() 13 | 14 | -------------------------------------------------------------------------------- /Collections/examples/FFDNet_TensorRT/utils.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import numpy as np 4 | from cuda import cuda 5 | import tensorrt as trt 6 | 7 | 8 | def checkError(args): 9 | err, *ret = args 10 | 11 | if isinstance(err, cuda.CUresult): 12 | if err != cuda.CUresult.CUDA_SUCCESS: 13 | raise RuntimeError(f"Cuda Error: {err}") 14 | else: 15 | raise RuntimeError(f"Unknown error type: {err}") 16 | 17 | if len(ret) == 0: 18 | return 19 | elif len(ret) == 1: 20 | return ret[0] 21 | else: 22 | return ret 23 | 24 | 25 | class UniqueResource: 26 | def __init__(self, obj, func, *args, **kwargs): 27 | self.obj = obj 28 | self._func = [func] 29 | self._args = [args] 30 | self._kwargs = [kwargs] 31 | 32 | def register(self, func, *args, **kwargs): 33 | """ register a finalizer """ 34 | 35 | self._func.append(func) 36 | self._args.append(args) 37 | self._kwargs.append(kwargs) 38 | 39 | def __del__(self): 40 | # calls finalizers in reversed order 41 | it = zip(reversed(self._func), reversed(self._args), reversed(self._kwargs)) 42 | 43 | for func, args, kwargs in it: 44 | func(*args, **kwargs) 45 | 46 | 47 | def init_cuda(): 48 | checkError(cuda.cuInit(0)) 49 | device = checkError(cuda.cuDeviceGet(0)) 50 | 51 | context = checkError(cuda.cuDevicePrimaryCtxRetain(device)) 52 | context = UniqueResource(context, cuda.cuDevicePrimaryCtxRelease, device) 53 | 54 | checkError(cuda.cuCtxPushCurrent(context.obj)) 55 | context.register(cuda.cuCtxPopCurrent) 56 | 57 | return device, context 58 | 59 | 60 | def convolution( 61 | network: trt.INetworkDefinition, 62 | input: trt.ITensor, 63 | in_channels: int, 64 | out_channels: int, 65 | kernel_size: int, 66 | kernel: Optional[np.array] = None, 67 | bias: Optional[np.array] = None 68 | ) -> trt.ITensor: 69 | 70 | if kernel is None: 71 | kernel = np.empty( 72 | (out_channels, in_channels, kernel_size, kernel_size), 73 | dtype=np.float32) 74 | 75 | if bias is None: 76 | bias = np.zeros(out_channels, dtype=np.float32) 77 | 78 | output = network.add_convolution_nd( 79 | input=input, num_output_maps=out_channels, 80 | kernel_shape=(kernel_size, kernel_size), 81 | kernel=kernel, bias=bias) 82 | 83 | output.padding_nd = (kernel_size // 2, kernel_size // 2) 84 | output.stride_nd = (1, 1) 85 | 86 | return output.get_output(0) 87 | 88 | 89 | def pixel_unshuffle( 90 | network: trt.INetworkDefinition, 91 | input: trt.ITensor, 92 | downscale_factor: int 93 | ) -> trt.ITensor: 94 | 95 | n, ic, ih, iw = input.shape 96 | assert ih % downscale_factor == 0 and ih % downscale_factor == 0 97 | oc = ic * (downscale_factor ** 2) 98 | oh = ih // downscale_factor 99 | ow = iw // downscale_factor 100 | 101 | reshape = network.add_shuffle(input) 102 | reshape.reshape_dims = trt.Dims([n, ic, oh, downscale_factor, ow, downscale_factor]) 103 | reshape.second_transpose = trt.Permutation([0, 1, 3, 5, 2, 4]) 104 | 105 | reshape = network.add_shuffle(reshape.get_output(0)) 106 | reshape.reshape_dims = trt.Dims([n, oc, oh, ow]) 107 | 108 | return reshape.get_output(0) 109 | 110 | 111 | def pixel_shuffle( 112 | network: trt.INetworkDefinition, 113 | input: trt.ITensor, 114 | upscale_factor: int 115 | ) -> trt.ITensor: 116 | 117 | n, ic, ih, iw = input.shape 118 | assert ic % (upscale_factor ** 2) == 0 119 | oc = ic // (upscale_factor ** 2) 120 | oh = ih * upscale_factor 121 | ow = iw * upscale_factor 122 | 123 | reshape = network.add_shuffle(input) 124 | reshape.reshape_dims = trt.Dims([n, oc, upscale_factor, upscale_factor, ih, iw]) 125 | reshape.second_transpose = trt.Permutation([0, 1, 4, 2, 5, 3]) 126 | 127 | reshape = network.add_shuffle(reshape.get_output(0)) 128 | reshape.reshape_dims = trt.Dims([n, oc, oh, ow]) 129 | 130 | return reshape.get_output(0) 131 | 132 | -------------------------------------------------------------------------------- /Collections/examples/FFDNet_TensorRT/vs_ffdnet.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | 3 | import vapoursynth as vs 4 | from vapoursynth import core 5 | 6 | import numpy as np 7 | import tensorrt as trt 8 | 9 | from utils import * 10 | 11 | 12 | _is_api4: bool = hasattr(vs, "__api_version__") and vs.__api_version__.api_major == 4 13 | 14 | 15 | def _get_array(frame, plane, read=True): 16 | if not read and frame.readonly: 17 | raise ValueError("Frame is readonly") 18 | 19 | if _is_api4: 20 | return frame[plane] 21 | else: 22 | if read: 23 | return frame.get_read_array(plane) 24 | else: 25 | return frame.get_write_array(plane) 26 | 27 | 28 | _cuda_context = init_cuda() 29 | 30 | 31 | def FFDNet( 32 | clip: vs.VideoNode, 33 | sigma: float = 5.0, 34 | use_cuda_graph: bool = False, 35 | logger: trt.Logger = trt.Logger(trt.Logger.WARNING) 36 | ) -> vs.VideoNode: 37 | 38 | assert clip.format.id == vs.RGBS 39 | width, height = clip.width, clip.height 40 | 41 | sigma /= 255 42 | 43 | runtime = trt.Runtime(logger) 44 | 45 | with open(f"ffdnet_{width}_{height}.engine", "rb") as f: 46 | engine = runtime.deserialize_cuda_engine(f.read()) 47 | 48 | execution_context = engine.create_execution_context() 49 | input_size = execution_context.get_strides(0)[0] * 4 50 | input_shape = execution_context.get_binding_shape(0) 51 | sigma_size = execution_context.get_strides(1)[0] * 4 52 | sigma_shape = execution_context.get_binding_shape(1) 53 | output_size = execution_context.get_strides(2)[0] * 4 54 | output_shape = execution_context.get_binding_shape(2) 55 | 56 | h_sigma = checkError(cuda.cuMemHostAlloc( 57 | sigma_size, cuda.CU_MEMHOSTALLOC_WRITECOMBINED)) 58 | h_sigma = UniqueResource(h_sigma, cuda.cuMemFreeHost, h_sigma) 59 | h_sigma_pointer = ctypes.cast( 60 | ctypes.c_void_p(h_sigma.obj), ctypes.POINTER(ctypes.c_float)) 61 | h_sigma_array = np.ctypeslib.as_array( 62 | h_sigma_pointer, shape=(sigma_size // 4,)).reshape(sigma_shape) 63 | 64 | d_sigma = checkError(cuda.cuMemAlloc(sigma_size)) 65 | d_sigma = UniqueResource(d_sigma, cuda.cuMemFree, d_sigma) 66 | 67 | h_input = checkError(cuda.cuMemHostAlloc( 68 | input_size, cuda.CU_MEMHOSTALLOC_WRITECOMBINED)) 69 | h_input = UniqueResource(h_input, cuda.cuMemFreeHost, h_input) 70 | h_input_pointer = ctypes.cast( 71 | ctypes.c_void_p(h_input.obj), ctypes.POINTER(ctypes.c_float)) 72 | h_input_array = np.ctypeslib.as_array( 73 | h_input_pointer, shape=(input_size // 4,)).reshape(input_shape) 74 | 75 | d_input = checkError(cuda.cuMemAlloc(input_size)) 76 | d_input = UniqueResource(d_input, cuda.cuMemFree, d_input) 77 | 78 | d_output = checkError(cuda.cuMemAlloc(output_size)) 79 | d_output = UniqueResource(d_output, cuda.cuMemFree, d_output) 80 | 81 | h_output = checkError(cuda.cuMemAllocHost(output_size)) 82 | h_output = UniqueResource(h_output, cuda.cuMemFreeHost, h_output) 83 | h_output_pointer = ctypes.cast( 84 | ctypes.c_void_p(h_output.obj), ctypes.POINTER(ctypes.c_float)) 85 | h_output_array = np.ctypeslib.as_array( 86 | h_output_pointer, shape=(output_size // 4,)).reshape(output_shape) 87 | 88 | stream = checkError(cuda.cuStreamCreate( 89 | cuda.CUstream_flags.CU_STREAM_NON_BLOCKING.value)) 90 | stream = UniqueResource(stream, cuda.cuStreamDestroy, stream) 91 | 92 | h_sigma_array[...] = sigma 93 | checkError(cuda.cuMemcpyHtoDAsync( 94 | d_sigma.obj, h_sigma.obj, sigma_size, stream.obj)) 95 | 96 | def execute(): 97 | checkError(cuda.cuMemcpyHtoDAsync( 98 | d_input.obj, h_input.obj, input_size, stream.obj)) 99 | 100 | execution_context.execute_async_v2( 101 | [d_input.obj, d_sigma.obj, d_output.obj], 102 | stream_handle=stream.obj) 103 | 104 | checkError(cuda.cuMemcpyDtoHAsync( 105 | h_output.obj, d_output.obj, output_size, stream.obj)) 106 | 107 | if use_cuda_graph: 108 | checkError(cuda.cuStreamBeginCapture( 109 | stream.obj, cuda.CUstreamCaptureMode.CU_STREAM_CAPTURE_MODE_RELAXED)) 110 | 111 | execute() 112 | 113 | graph = checkError(cuda.cuStreamEndCapture(stream.obj)) 114 | graphexec, error_node = checkError(cuda.cuGraphInstantiate( 115 | graph, logBuffer=b"", bufferSize=0)) 116 | graphexec = UniqueResource(graphexec, cuda.cuGraphExecDestroy, graphexec) 117 | checkError(cuda.cuGraphDestroy(graph)) 118 | 119 | def inference_core(n, f): 120 | for i in range(3): 121 | h_input_array[0, i, :, :] = np.asarray(_get_array(f, plane=i, read=True)) 122 | 123 | if use_cuda_graph: 124 | checkError(cuda.cuGraphLaunch(graphexec.obj, stream.obj)) 125 | else: 126 | execute() 127 | 128 | fout = f.copy() 129 | fout.get_write_array(0) # triggers COW 130 | checkError(cuda.cuStreamSynchronize(stream.obj)) 131 | 132 | for i in range(3): 133 | np.asarray(_get_array(fout, plane=i, read=False))[...] = h_output_array[0, i, :, :] 134 | 135 | return fout 136 | 137 | return core.std.ModifyFrame(clip, clips=[clip], selector=inference_core) 138 | -------------------------------------------------------------------------------- /Collections/examples/KNLMeasCL_cupy/knlm.cu: -------------------------------------------------------------------------------- 1 | // original OpenCL implementation: https://github.com/Khanattila/KNLMeansCL/blob/27f95992e2344586b745d013eafa010764c78979/KNLMeansCL/NLMKernel.cpp#L67-L406 2 | 3 | #define VI_DIM_X ${width} 4 | #define VI_DIM_Y ${height} 5 | 6 | #define NLM_S ${s} 7 | #define NLM_H ((float) ${h}) 8 | #define NLM_WMODE ${wmode} 9 | #define NLM_WREF ((float) ${wref}) 10 | 11 | #define NLM_NORM (255.0f * 255.0f) 12 | #define NLM_LEGACY 3.0f 13 | #define NLM_S_SIZE ((2 * NLM_S + 1) * (2 * NLM_S + 1)) 14 | #define NLM_H2_INV_NORM (NLM_NORM / (NLM_LEGACY * NLM_H * NLM_H * NLM_S_SIZE)) 15 | 16 | #define HRZ_BLOCK_X ${hrz_block_x} 17 | #define HRZ_BLOCK_Y ${hrz_block_y} 18 | #define HRZ_RESULT ${hrz_result} 19 | #define VRT_BLOCK_X ${vrt_block_x} 20 | #define VRT_BLOCK_Y ${vrt_block_y} 21 | #define VRT_RESULT ${vrt_result} 22 | 23 | #ifndef MIN 24 | #define MIN(a,b) (((a)<(b))?(a):(b)) 25 | #endif 26 | 27 | #ifndef MAX 28 | #define MAX(a,b) (((a)>(b))?(a):(b)) 29 | #endif 30 | 31 | #define CLAMPX(x) (MIN(MAX(x, 0), VI_DIM_X - 1)) 32 | #define CLAMPY(y) (MIN(MAX(y, 0), VI_DIM_Y - 1)) 33 | 34 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later 35 | #include 36 | namespace cg = cooperative_groups; 37 | #endif 38 | 39 | extern "C" __global__ 40 | void nlmDistance(const float * __restrict__ U1, float * __restrict__ U4a, 41 | const int qx, const int qy) { 42 | 43 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 44 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 45 | 46 | if (x >= VI_DIM_X || y >= VI_DIM_Y) 47 | return; 48 | 49 | const int gidx = y * VI_DIM_X + x; 50 | 51 | // #if defined(NLM_CLIP_REF_LUMA) 52 | const float u1 = U1[gidx]; 53 | const float u1_pq = U1[CLAMPY(y + qy) * VI_DIM_X + CLAMPX(x + qx)]; 54 | 55 | const float val = 3.0f * ((u1 - u1_pq) * (u1 - u1_pq)); 56 | // #endif 57 | 58 | U4a[gidx] = val; 59 | } 60 | 61 | extern "C" __global__ 62 | void nlmHorizontal(const float * __restrict__ U4a, float * __restrict__ U4b) { 63 | 64 | __shared__ float buffer[HRZ_BLOCK_Y][(HRZ_RESULT + 2) * HRZ_BLOCK_X]; 65 | 66 | const int x = (blockIdx.x * HRZ_RESULT - 1) * HRZ_BLOCK_X + threadIdx.x; 67 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 68 | 69 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later 70 | // Handle to thread block group 71 | cg::thread_block cta = cg::this_thread_block(); 72 | #endif 73 | 74 | for (int i = 0; i <= 1 + HRZ_RESULT; i++) 75 | buffer[threadIdx.y][threadIdx.x + i * HRZ_BLOCK_X] = 76 | U4a[y * VI_DIM_X + CLAMPX(x + i * HRZ_BLOCK_X)]; 77 | 78 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later 79 | cta.sync(); 80 | #else 81 | __syncthreads(); 82 | #endif 83 | 84 | for (int i = 1; i <= HRZ_RESULT; i++) { 85 | if ((x + i * HRZ_BLOCK_X >= VI_DIM_X) || y >= VI_DIM_Y) 86 | return; 87 | 88 | float sum = 0.0f; 89 | 90 | for (int j = -NLM_S; j <= NLM_S; j++) 91 | sum += buffer[threadIdx.y][threadIdx.x + i * HRZ_BLOCK_X + j]; 92 | 93 | U4b[y * VI_DIM_X + (x + i * HRZ_BLOCK_X)] = sum; // (x + i * HRZ_BLOCK_X) >= 0 94 | } 95 | } 96 | 97 | extern "C" __global__ 98 | void nlmVertical(const float * __restrict__ U4b, float * __restrict__ U4a) { 99 | 100 | __shared__ float buffer[VRT_BLOCK_X][(VRT_RESULT + 2) * VRT_BLOCK_Y + 1]; 101 | 102 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 103 | const int y = (blockIdx.y * VRT_RESULT - 1) * VRT_BLOCK_Y + threadIdx.y; 104 | 105 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later 106 | // Handle to thread block group 107 | cg::thread_block cta = cg::this_thread_block(); 108 | #endif 109 | 110 | for (int i = 0; i <= 1 + VRT_RESULT; i++) 111 | buffer[threadIdx.x][threadIdx.y + i * VRT_BLOCK_Y] = 112 | U4b[CLAMPY(y + i * VRT_BLOCK_Y) * VI_DIM_X + x]; 113 | 114 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later 115 | cta.sync(); 116 | #else 117 | __syncthreads(); 118 | #endif 119 | 120 | for (int i = 1; i <= HRZ_RESULT; i++) { 121 | if (x >= VI_DIM_X || (y + i * VRT_BLOCK_Y) >= VI_DIM_Y) 122 | return; 123 | 124 | float sum = 0.0f; 125 | 126 | for (int j = -NLM_S; j <= NLM_S; j++) 127 | sum += buffer[threadIdx.x][threadIdx.y + i * VRT_BLOCK_Y + j]; 128 | 129 | #if NLM_WMODE == 0 130 | // #if defined(NLM_WMODE_WELSCH) 131 | const float val = expf(-sum * NLM_H2_INV_NORM); 132 | #elif NLM_WMODE == 1 133 | // #if defined(NLM_WMODE_BISQUARE_A) 134 | const float val = fdimf(1.0f, sum * NLM_H2_INV_NORM); 135 | #elif NLM_WMODE == 2 136 | // #if defined(NLM_WMODE_BISQUARE_B) 137 | const float val = powf(fdimf(1.0f, sum * NLM_H2_INV_NORM), 2.0f); 138 | #elif NLM_WMODE == 3 139 | // #if defined(NLM_WMODE_BISQUARE_C) 140 | const float val = powf(fdimf(1.0f, sum * NLM_H2_INV_NORM), 8.0f); 141 | #endif 142 | 143 | U4a[(y + i * VRT_BLOCK_Y) * VI_DIM_X + x] = val; // (y + i * VRT_BLOCK_Y) >= 0 144 | } 145 | } 146 | 147 | extern "C" __global__ 148 | void nlmAccumulation(const float * __restrict__ U1a, float * __restrict__ U2a, 149 | float * __restrict__ U2b, const float * __restrict__ U4a, float * __restrict__ U5, 150 | const int qx, const int qy) { 151 | 152 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 153 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 154 | 155 | if (x >= VI_DIM_X || y >= VI_DIM_Y) 156 | return; 157 | 158 | const int gidx = y * VI_DIM_X + x; 159 | 160 | const float u4 = U4a[gidx]; 161 | const float u4_mq = U4a[CLAMPY(y - qy) * VI_DIM_X + CLAMPX(x - qx)]; 162 | U5[gidx] = fmaxf(u4, fmaxf(u4_mq, U5[gidx])); 163 | 164 | // #if (NLM_CHANNELS == 1) 165 | const float u1_pq = U1a[CLAMPY(y + qy) * VI_DIM_X + CLAMPX(x + qx)]; 166 | const float u1_mq = U1a[CLAMPY(y - qy) * VI_DIM_X + CLAMPX(x - qx)]; 167 | 168 | U2a[gidx] += (u4 * u1_pq) + (u4_mq * u1_mq); 169 | U2b[gidx] += (u4 + u4_mq); 170 | // #endif 171 | } 172 | 173 | extern "C" __global__ 174 | void nlmFinish(const float * __restrict__ U1a, float * __restrict__ U1z, 175 | const float * __restrict__ U2a, const float * __restrict__ U2b, 176 | const float * __restrict__ U5) { 177 | 178 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 179 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 180 | 181 | if (x >= VI_DIM_X || y >= VI_DIM_Y) 182 | return; 183 | 184 | const int gidx = y * VI_DIM_X + x; 185 | const float m = NLM_WREF * U5[gidx]; 186 | 187 | // #if (NLM_CHANNELS == 1) 188 | const float u1 = U1a[gidx]; 189 | const float u2a = U2a[gidx]; 190 | const float u2b = U2b[gidx]; 191 | 192 | const float den = m + u2b; 193 | const float val = (u1 * m + u2a) / den; 194 | 195 | U1z[gidx] = val; 196 | // #endif 197 | } 198 | -------------------------------------------------------------------------------- /Collections/examples/KNLMeasCL_cupy/knlm_cupy.vpy: -------------------------------------------------------------------------------- 1 | """Reimplementation of KNLMeansCL(Non-local Means) in CuPy""" 2 | 3 | from string import Template 4 | 5 | import cupy as cp 6 | import vapoursynth as vs 7 | from vapoursynth import core 8 | import muvsfunc_numpy as mufnp 9 | 10 | 11 | # Load source clip. Only GRAYS is supported 12 | src = core.std.BlankClip(format=vs.GRAYS) 13 | 14 | # params of KNLMeansCL. Documentation: https://github.com/Khanattila/KNLMeansCL/wiki/Filter-description 15 | # d = 0 # only spatial processing is implemented. 16 | a = 2 17 | s = 4 18 | h = 1.2 19 | channels = 'Y' # only grayscale filtering is implemented 20 | wmode = 0 21 | wref = 1.0 22 | rclip = None # not implemented 23 | ocl_x = 16 # local work group width of the separable convolution kernel 24 | ocl_y = 8 # local work group height of the separable convolution kernel 25 | ocl_r = 3 # number of processed pixel for work-item 26 | 27 | # whether to enable '--use_fast_math' in NVRTC runtime compilation 28 | # to make use of fast math operations 29 | fast = False 30 | 31 | 32 | # pre-processing 33 | if src.format.id != vs.GRAYS: 34 | raise TypeError('Only 32-bit float grayscale input is supported!') 35 | 36 | # CUDA kernel execution configuration 37 | dst_block = (32, 32, 1) # used by 'nlmDistance' 38 | hrz_block = (ocl_x, ocl_y, 1) # used by 'nlmHorizontal' 39 | vrt_block = (ocl_x, ocl_y, 1) # used by 'nlmVertical' 40 | work_block = (32, 32, 1) # used by 'nlmAccumulation' and 'nlmFinish' 41 | 42 | # load CUDA kernel 43 | with open('knlm.cu', 'r') as f: 44 | kernel_source_code = f.read() 45 | 46 | kernel_source_code = Template(kernel_source_code) 47 | kernel_source_code = kernel_source_code.substitute( 48 | width=src.width, height=src.height, s=s, h=h, wmode=wmode, wref=wref, 49 | hrz_block_x=ocl_x, hrz_block_y=ocl_y, hrz_result=ocl_r, 50 | vrt_block_x=ocl_x, vrt_block_y=ocl_y, vrt_result=ocl_r) 51 | 52 | if fast: 53 | nlmDistance = cp.RawKernel(kernel_source_code, 'nlmDistance', 54 | options=('--use_fast_math', )) 55 | nlmHorizontal = cp.RawKernel(kernel_source_code, 'nlmHorizontal', 56 | options=('--use_fast_math', )) 57 | nlmVertical = cp.RawKernel(kernel_source_code, 'nlmVertical', 58 | options=('--use_fast_math', )) 59 | nlmAccumulation = cp.RawKernel(kernel_source_code, 'nlmAccumulation', 60 | options=('--use_fast_math', )) 61 | nlmFinish = cp.RawKernel(kernel_source_code, 'nlmFinish', 62 | options=('--use_fast_math', )) 63 | else: 64 | nlmDistance = cp.RawKernel(kernel_source_code, 'nlmDistance') 65 | nlmHorizontal = cp.RawKernel(kernel_source_code, 'nlmHorizontal') 66 | nlmVertical = cp.RawKernel(kernel_source_code, 'nlmVertical') 67 | nlmAccumulation = cp.RawKernel(kernel_source_code, 'nlmAccumulation') 68 | nlmFinish = cp.RawKernel(kernel_source_code, 'nlmFinish') 69 | 70 | 71 | # create NumPy function 72 | def nlm_core(h_img, a, nlmDistance, nlmHorizontal, nlmVertical, nlmAccumulation, nlmFinish): 73 | U1a = cp.asarray(h_img) 74 | h, w = U1a.shape 75 | 76 | U2a = cp.zeros_like(U1a) 77 | U2b = cp.zeros_like(U1a) 78 | U1z = cp.empty_like(U1a) 79 | U4a = cp.empty_like(U1a) 80 | U4b = cp.empty_like(U1a) 81 | U5 = cp.full_like(U1a, fill_value=1.1920928955078125e-7) # CL_FLT_EPSILON 82 | 83 | # Spatial processing 84 | for j in range(-a, a+1): 85 | for i in range(-a, a+1): 86 | if (j * (2 * a + 1) + i < 0): 87 | nlmDistance(((w + dst_block[0] - 1) // dst_block[0], (h + dst_block[1] - 1) // dst_block[1], 1), dst_block, (U1a, U4a, i, j)) 88 | nlmHorizontal(((w + hrz_block[0] - 1) // hrz_block[0], (h + hrz_block[1] - 1) // hrz_block[1], 1), hrz_block, (U4a, U4b)) 89 | nlmVertical(((w + vrt_block[0] - 1) // vrt_block[0], (h + vrt_block[1] - 1) // vrt_block[1]), vrt_block, (U4b, U4a)) 90 | nlmAccumulation(((w + work_block[0] - 1) // work_block[0], (h + work_block[1] - 1) // work_block[1]), work_block, (U1a, U2a, U2b, U4a, U5, i, j)) 91 | 92 | nlmFinish(((w + work_block[0] - 1) // work_block[0], (h + work_block[1] - 1) // work_block[1]), work_block, (U1a, U1z, U2a, U2b, U5)) 93 | 94 | h_out = cp.asnumpy(U1z) 95 | 96 | return h_out 97 | 98 | 99 | # process 100 | res = mufnp.numpy_process( 101 | src, nlm_core, a=a, 102 | nlmDistance=nlmDistance, nlmHorizontal=nlmHorizontal, 103 | nlmVertical=nlmVertical, nlmAccumulation=nlmAccumulation, 104 | nlmFinish=nlmFinish) 105 | 106 | """ 107 | res = core.knlm.KNLMeansCL( 108 | src, d=0, a=a, s=s, h=h, channels='Y', wmode=wmode, rclip=None, 109 | device_type='GPU', ocl_x=ocl_x, ocl_y=ocl_y, ocl_r=ocl_r, info=False) 110 | """ 111 | 112 | res.set_output() 113 | -------------------------------------------------------------------------------- /Collections/examples/KNLMeasCL_cupy/knlm_mem_inefficient.cu: -------------------------------------------------------------------------------- 1 | // original OpenCL implementation: https://github.com/Khanattila/KNLMeansCL/blob/27f95992e2344586b745d013eafa010764c78979/KNLMeansCL/NLMKernel.cpp#L67-L406 2 | 3 | #define WIDTH ${width} 4 | #define HEIGHT ${height} 5 | 6 | #define NLM_A ${a} 7 | #define NLM_S ${s} 8 | #define NLM_H ((float) ${h}) 9 | #define NLM_WMODE ${wmode} 10 | #define NLM_WREF ((float) ${wref}) 11 | 12 | #define NLM_NORM (255.0f * 255.0f) 13 | #define NLM_LEGACY 3.0f 14 | #define NLM_S_SIZE ((2 * NLM_S + 1) * (2 * NLM_S + 1)) 15 | #define NLM_H2_INV_NORM (NLM_NORM / (NLM_LEGACY * NLM_H * NLM_H * NLM_S_SIZE)) 16 | 17 | #define HRZ_BLOCK_X ${hrz_block_x} 18 | #define HRZ_BLOCK_Y ${hrz_block_y} 19 | #define HRZ_RESULT ${hrz_result} 20 | #define VRT_BLOCK_X ${vrt_block_x} 21 | #define VRT_BLOCK_Y ${vrt_block_y} 22 | #define VRT_RESULT ${vrt_result} 23 | 24 | #ifndef MIN 25 | #define MIN(a,b) (((a)<(b))?(a):(b)) 26 | #endif 27 | 28 | #ifndef MAX 29 | #define MAX(a,b) (((a)>(b))?(a):(b)) 30 | #endif 31 | 32 | #define CLAMPX(x) (MIN(MAX(x, 0), WIDTH - 1)) 33 | #define CLAMPY(y) (MIN(MAX(y, 0), HEIGHT - 1)) 34 | 35 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later 36 | #include 37 | namespace cg = cooperative_groups; 38 | #endif 39 | 40 | extern "C" __global__ 41 | void nlmDistance(const float U1[HEIGHT][WIDTH], float U4a[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH]) { 42 | 43 | int x = blockIdx.x * blockDim.x + threadIdx.x; 44 | int y = blockIdx.y * blockDim.y + threadIdx.y; 45 | 46 | if (x >= WIDTH || y >= HEIGHT) 47 | return; 48 | 49 | for (int qy = -NLM_A; qy <= NLM_A; qy++) 50 | for (int qx = -NLM_A; qx <= NLM_A; qx++) 51 | if (qy * (2 * NLM_A + 1) + qx < 0) { 52 | // #if defined(NLM_CLIP_REF_LUMA) 53 | float diff = U1[y][x] - U1[CLAMPY(y + qy)][CLAMPX(x + qx)]; 54 | 55 | float val = 3.0f * diff * diff; 56 | // #endif 57 | 58 | U4a[qy+NLM_A][qx+NLM_A][y][x] = val; 59 | } 60 | } 61 | 62 | extern "C" __global__ 63 | void nlmHorizontal(const float U4a[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH], float U4b[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH]) { 64 | 65 | __shared__ float buffer[HRZ_BLOCK_Y][(HRZ_RESULT + 2) * HRZ_BLOCK_X]; 66 | 67 | const int x = (blockIdx.x * HRZ_RESULT - 1) * HRZ_BLOCK_X + threadIdx.x; 68 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 69 | 70 | cg::thread_block cta = cg::this_thread_block(); 71 | 72 | for (int qy = -NLM_A; qy <= NLM_A; qy++) 73 | for (int qx = -NLM_A; qx <= NLM_A; qx++) { 74 | if (qy * (2 * NLM_A + 1) + qx < 0) { 75 | for (int i = 0; i <= 1 + HRZ_RESULT; i++) 76 | buffer[threadIdx.y][threadIdx.x + i * HRZ_BLOCK_X] = 77 | U4a[qy+NLM_A][qx+NLM_A][y][CLAMPX(x + i * HRZ_BLOCK_X)]; 78 | } 79 | 80 | cta.sync(); 81 | 82 | if (qy * (2 * NLM_A + 1) + qx < 0) { 83 | for (int i = 1; i <= HRZ_RESULT; i++) { 84 | if ((x + i * HRZ_BLOCK_X < WIDTH) && y < HEIGHT) { 85 | float sum = 0.0f; 86 | 87 | for (int j = -NLM_S; j <= NLM_S; j++) 88 | sum += buffer[threadIdx.y][threadIdx.x + i * HRZ_BLOCK_X + j]; 89 | 90 | U4b[qy+NLM_A][qx+NLM_A][y][x + i * HRZ_BLOCK_X] = sum; // (x + i * HRZ_BLOCK_X) >= 0 91 | } 92 | } 93 | } 94 | } 95 | } 96 | 97 | extern "C" __global__ 98 | void nlmVertical(const float U4b[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH], float U4a[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH]) { 99 | 100 | __shared__ float buffer[VRT_BLOCK_X][(VRT_RESULT + 2) * VRT_BLOCK_Y + 1]; 101 | 102 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 103 | const int y = (blockIdx.y * VRT_RESULT - 1) * VRT_BLOCK_Y + threadIdx.y; 104 | 105 | 106 | cg::thread_block cta = cg::this_thread_block(); 107 | 108 | for (int qy = -NLM_A; qy <= NLM_A; qy++) 109 | for (int qx = -NLM_A; qx <= NLM_A; qx++) { 110 | if (qy * (2 * NLM_A + 1) + qx < 0) { 111 | for (int i = 0; i <= 1 + VRT_RESULT; i++) 112 | buffer[threadIdx.x][threadIdx.y + i * VRT_BLOCK_Y] = 113 | U4b[qy+NLM_A][qx+NLM_A][CLAMPY(y + i * VRT_BLOCK_Y)][x]; 114 | } 115 | 116 | cta.sync(); 117 | 118 | if (qy * (2 * NLM_A + 1) + qx < 0) { 119 | for (int i = 1; i <= HRZ_RESULT; i++) { 120 | if (x < WIDTH && (y + i * VRT_BLOCK_Y) < HEIGHT) { 121 | float sum = 0.0f; 122 | 123 | for (int j = -NLM_S; j <= NLM_S; j++) 124 | sum += buffer[threadIdx.x][threadIdx.y + i * VRT_BLOCK_Y + j]; 125 | 126 | #if NLM_WMODE == 0 127 | // #if defined(NLM_WMODE_WELSCH) 128 | const float val = expf(-sum * NLM_H2_INV_NORM); 129 | #elif NLM_WMODE == 1 130 | // #if defined(NLM_WMODE_BISQUARE_A) 131 | const float val = fdimf(1.0f, sum * NLM_H2_INV_NORM); 132 | #elif NLM_WMODE == 2 133 | // #if defined(NLM_WMODE_BISQUARE_B) 134 | const float val = powf(fdimf(1.0f, sum * NLM_H2_INV_NORM), 2.0f); 135 | #elif NLM_WMODE == 3 136 | // #if defined(NLM_WMODE_BISQUARE_C) 137 | const float val = powf(fdimf(1.0f, sum * NLM_H2_INV_NORM), 8.0f); 138 | #endif 139 | 140 | U4a[qy+NLM_A][qx+NLM_A][y + i * VRT_BLOCK_Y][x] = val; // (y + i * VRT_BLOCK_Y) >= 0 141 | } 142 | } 143 | } 144 | } 145 | } 146 | 147 | extern "C" __global__ 148 | void nlmAccumulation_Finish(const float U1a[HEIGHT][WIDTH], float U1z[HEIGHT][WIDTH], 149 | const float U4a[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH]) { 150 | 151 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 152 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 153 | 154 | if (x >= WIDTH || y >= HEIGHT) 155 | return; 156 | 157 | float u5 = 1.1920928955078125e-7; // CL_FLT_EPSILON 158 | float u2a = 0.f; 159 | float u2b = 0.f; 160 | 161 | for (int qy = -NLM_A; qy <= NLM_A; qy++) 162 | for (int qx = -NLM_A; qx <= NLM_A; qx++) 163 | if (qy * (2 * NLM_A + 1) + qx < 0) { 164 | float u4 = U4a[qy+NLM_A][qx+NLM_A][y][x]; 165 | float u4_mq = U4a[qy+NLM_A][qx+NLM_A][CLAMPY(y - qy)][CLAMPX(x - qx)]; 166 | u5 = fmaxf(u4, fmaxf(u4_mq, u5)); 167 | 168 | // #if (NLM_CHANNELS == 1) 169 | float u1_pq = U1a[CLAMPY(y + qy)][CLAMPX(x + qx)]; 170 | float u1_mq = U1a[CLAMPY(y - qy)][CLAMPX(x - qx)]; 171 | 172 | u2a += (u4 * u1_pq) + (u4_mq * u1_mq); 173 | u2b += (u4 + u4_mq); 174 | // #endif 175 | } 176 | 177 | float m = NLM_WREF * u5; 178 | float den = m + u2b; 179 | 180 | U1z[y][x] = (U1a[y][x] * m + u2a) / (m + u2b); 181 | } 182 | -------------------------------------------------------------------------------- /Collections/examples/KNLMeasCL_cupy/knlm_mem_inefficient_cupy.vpy: -------------------------------------------------------------------------------- 1 | """Reimplementation of KNLMeansCL(Non-local Means) in CuPy""" 2 | 3 | from string import Template 4 | 5 | import cupy as cp 6 | import vapoursynth as vs 7 | from vapoursynth import core 8 | import muvsfunc_numpy as mufnp 9 | 10 | 11 | # Load source clip. Only GRAYS is supported 12 | src = core.std.BlankClip(format=vs.GRAYS) 13 | src = core.imwri.Read(r"C:\Users\KurtWu\Desktop\1\300_gray.png").fmtc.bitdepth(bits=32) 14 | 15 | # params of KNLMeansCL. Documentation: https://github.com/Khanattila/KNLMeansCL/wiki/Filter-description 16 | # d = 0 # only spatial processing is implemented. 17 | a = 2 18 | s = 4 19 | h = 1.2 20 | channels = 'Y' # only grayscale filtering is implemented 21 | wmode = 0 22 | wref = 1.0 23 | rclip = None # not implemented 24 | ocl_x = 16 # local work group width of the separable convolution kernel 25 | ocl_y = 8 # local work group height of the separable convolution kernel 26 | ocl_r = 3 # number of processed pixel for work-item 27 | 28 | # whether to enable '--use_fast_math' in NVRTC runtime compilation 29 | # to make use of fast math operations 30 | fast = False 31 | 32 | 33 | # pre-processing 34 | if src.format.id != vs.GRAYS: 35 | raise TypeError('Only 32-bit float grayscale input is supported!') 36 | 37 | # CUDA kernel execution configuration 38 | dst_block = (32, 32, 1) # used by 'nlmDistance' 39 | hrz_block = (ocl_x, ocl_y, 1) # used by 'nlmHorizontal' 40 | vrt_block = (ocl_x, ocl_y, 1) # used by 'nlmVertical' 41 | work_block = (32, 32, 1) # used by 'nlmAccumulation' and 'nlmFinish' 42 | 43 | # load CUDA kernel 44 | with open('knlm_mem_inefficient.cu', 'r') as f: 45 | kernel_source_code = f.read() 46 | 47 | kernel_source_code = Template(kernel_source_code) 48 | kernel_source_code = kernel_source_code.substitute( 49 | width=src.width, height=src.height, a=a, s=s, h=h, wmode=wmode, wref=wref, 50 | hrz_block_x=ocl_x, hrz_block_y=ocl_y, hrz_result=ocl_r, 51 | vrt_block_x=ocl_x, vrt_block_y=ocl_y, vrt_result=ocl_r) 52 | 53 | if fast: 54 | nlmDistance = cp.RawKernel(kernel_source_code, 'nlmDistance', 55 | options=('--use_fast_math', )) 56 | nlmHorizontal = cp.RawKernel(kernel_source_code, 'nlmHorizontal', 57 | options=('--use_fast_math', )) 58 | nlmVertical = cp.RawKernel(kernel_source_code, 'nlmVertical', 59 | options=('--use_fast_math', )) 60 | nlmAccumulation_Finish = cp.RawKernel(kernel_source_code, 'nlmAccumulation_Finish', 61 | options=('--use_fast_math', )) 62 | else: 63 | nlmDistance = cp.RawKernel(kernel_source_code, 'nlmDistance') 64 | nlmHorizontal = cp.RawKernel(kernel_source_code, 'nlmHorizontal') 65 | nlmVertical = cp.RawKernel(kernel_source_code, 'nlmVertical') 66 | nlmAccumulation_Finish = cp.RawKernel(kernel_source_code, 'nlmAccumulation_Finish') 67 | 68 | 69 | # create NumPy function 70 | def nlm_core(h_img, a, nlmDistance, nlmHorizontal, nlmVertical, nlmAccumulation_Finish): 71 | U1a = cp.asarray(h_img) 72 | h, w = U1a.shape 73 | 74 | U4a = cp.empty((2*a+1, 2*a+1, h, w)) 75 | U4b = cp.empty((2*a+1, 2*a+1, h, w)) 76 | U1z = cp.empty_like(U1a) 77 | 78 | # Spatial processing 79 | nlmDistance(((w + dst_block[0] - 1) // dst_block[0], (h + dst_block[1] - 1) // dst_block[1], 1), dst_block, (U1a, U4a)) 80 | nlmHorizontal(((w + hrz_block[0] - 1) // hrz_block[0], (h + hrz_block[1] - 1) // hrz_block[1], 1), hrz_block, (U4a, U4b)) 81 | nlmVertical(((w + vrt_block[0] - 1) // vrt_block[0], (h + vrt_block[1] - 1) // vrt_block[1]), vrt_block, (U4b, U4a)) 82 | nlmAccumulation_Finish(((w + work_block[0] - 1) // work_block[0], (h + work_block[1] - 1) // work_block[1]), work_block, (U1a, U1z, U4a)) 83 | 84 | h_out = cp.asnumpy(U1z) 85 | 86 | return h_out 87 | 88 | 89 | # process 90 | res = mufnp.numpy_process( 91 | src, nlm_core, a=a, 92 | nlmDistance=nlmDistance, nlmHorizontal=nlmHorizontal, 93 | nlmVertical=nlmVertical, 94 | nlmAccumulation_Finish=nlmAccumulation_Finish) 95 | 96 | """ 97 | res = core.knlm.KNLMeansCL( 98 | src, d=0, a=a, s=s, h=h, channels='Y', wmode=wmode, rclip=None, 99 | device_type='GPU', ocl_x=ocl_x, ocl_y=ocl_y, ocl_r=ocl_r, info=False) 100 | """ 101 | 102 | res.set_output() 103 | -------------------------------------------------------------------------------- /Collections/examples/NLH_cupy/NLH_cupy.vpy: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of NLH (NLH: A Blind Pixel-level Non-local Method for Real-world Image Denoising) in CuPy 3 | 4 | Ref: 5 | [1] Hou, Y., Xu, J., Liu, M., Liu, G., Liu, L., Zhu, F., & Shao, L. (2019). 6 | NLH: A Blind Pixel-level Non-local Method for Real-world Image Denoising. 7 | arXiv preprint arXiv:1906.06834. 8 | 9 | """ 10 | 11 | from string import Template 12 | 13 | import cupy as cp 14 | import vapoursynth as vs 15 | from vapoursynth import core 16 | 17 | import muvsfunc_numpy as mufnp 18 | 19 | 20 | # Load source clip. Only GRAYS is supported 21 | src = core.std.BlankClip(format=vs.GRAYS) 22 | 23 | # params of NLH 24 | # d = 0 # only spatial processing is implemented. 25 | a = 2 26 | s = 4 27 | h = 1.6 28 | h2 = 1.6 29 | 30 | # whether to enable '--use_fast_math' in NVRTC runtime compilation 31 | # to make use of fast math operations 32 | fast = False 33 | 34 | # CUDA kernel execution configuration 35 | work_block = (16, 16, 1) 36 | 37 | 38 | # pre-processing 39 | if src.format.id != vs.GRAYS: 40 | raise TypeError('Only 32-bit float grayscale input is supported!') 41 | 42 | 43 | # load CUDA kernel 44 | with open('kernel.cu', 'r') as f: 45 | kernel_source_code = f.read() 46 | 47 | kernel_source_code = Template(kernel_source_code) 48 | kernel_source_code = kernel_source_code.substitute( 49 | width=src.width, height=src.height, a=a, s=s, h=h, h2=h2) 50 | 51 | if fast: 52 | compute = cp.RawKernel(kernel_source_code, 'compute', options=('--use_fast_math', )) 53 | else: 54 | compute = cp.RawKernel(kernel_source_code, 'compute') 55 | 56 | 57 | # create NumPy function 58 | def nlm_core(h_src, compute): 59 | d_src = cp.asarray(h_src) 60 | h, w = h_src.shape 61 | 62 | d_dst = cp.empty_like(d_src) 63 | 64 | compute(((w + work_block[0] - 1) // work_block[0], (h + work_block[1] - 1) // work_block[1]), work_block, (d_src, d_dst)) 65 | 66 | h_out = cp.asnumpy(d_dst) 67 | 68 | return h_out 69 | 70 | # process 71 | res = mufnp.numpy_process(src, nlm_core, compute=compute) 72 | 73 | # feisty2's CPU implementation (https://github.com/IFeelBloated/NLMeans-PM/tree/f2539968e3ded41588cc18b3c2a984f42b79e4a9) 74 | # "ref" is not currently implemented on the CUDA version 75 | # res = core.test.Test(src, a=a, s=s, h=h, h2=h2) 76 | 77 | 78 | res.set_output() 79 | -------------------------------------------------------------------------------- /Collections/examples/NLH_cupy/kernel.cu: -------------------------------------------------------------------------------- 1 | #define WIDTH ${width} 2 | #define HEIGHT ${height} 3 | #define NLM_A ${a} 4 | #define NLM_S ${s} 5 | #define NLM_H ((float) (${h} / 79.636080791869483631941455867052)) 6 | #define NLM_H2 ((float) (${h2} / 79.636080791869483631941455867052)) 7 | 8 | 9 | #define GET(pointer, y0, x0) pointer[max(min((y0), HEIGHT-1), 0) * WIDTH + max(min((x0), WIDTH-1), 0)] 10 | #define PatchMatrix(y0, x0) GET(srcp, y-NLM_A-NLM_S + (y0) / (2*NLM_A+1) + (x0) / (2*NLM_S+1), x-NLM_A-NLM_S + (y0) % (2*NLM_A+1) + (x0) % (2*NLM_S+1)) 11 | #define Square(x) ((x) * (x)) 12 | 13 | #define PatchSize Square(2 * NLM_S + 1) 14 | #define SearchSize Square(2 * NLM_A + 1) 15 | 16 | extern "C" __global__ 17 | void compute(const float * __restrict__ srcp, float * __restrict__ dstp) { 18 | int x = blockDim.x * blockIdx.x + threadIdx.x; 19 | int y = blockDim.y * blockIdx.y + threadIdx.y; 20 | 21 | if (x >= WIDTH || y >= HEIGHT) 22 | return; 23 | 24 | float PatchWeights[SearchSize]; 25 | 26 | // CalculatePatchWeights 27 | float NormalizingConstant = 0.f; 28 | for (int i = 0; i < SearchSize; i++) { 29 | float SSE = 0.f; 30 | for (int j = 0; j < PatchSize; j++) 31 | SSE += Square(PatchMatrix(i, j) - PatchMatrix(SearchSize / 2, j)); 32 | float Weight = expf(-SSE / Square(NLM_H)); 33 | PatchWeights[i] = Weight; 34 | NormalizingConstant += Weight; 35 | } 36 | 37 | for (int i = 0; i < SearchSize; i++) { 38 | PatchWeights[i] /= NormalizingConstant; 39 | } 40 | 41 | // CalculatePositionWeights & Aggregate 42 | float Result = 0.f; 43 | NormalizingConstant = 0.f; 44 | for (int j = 0; j < PatchSize; j++) { 45 | float SSE = 0.f; 46 | for (int i = 0; i < SearchSize; i++) 47 | SSE += PatchWeights[i] * Square(PatchMatrix(i, j) - PatchMatrix(i, PatchSize / 2)); 48 | float Weight = expf(-SSE / Square(NLM_H2)); 49 | Result += Weight * PatchMatrix(SearchSize / 2, j); 50 | NormalizingConstant += Weight; 51 | } 52 | 53 | GET(dstp, y, x) = Result / NormalizingConstant; 54 | } 55 | -------------------------------------------------------------------------------- /Collections/examples/SigmaFilter_cupy/sigma_filter.cu: -------------------------------------------------------------------------------- 1 | #define WIDTH ${width} 2 | #define HEIGHT ${height} 3 | 4 | #define RADIUS ${radius} 5 | #define THRESHOLD ((float) ${threshold}) 6 | 7 | #ifndef MIN 8 | #define MIN(a,b) (((a)<(b))?(a):(b)) 9 | #endif 10 | 11 | #ifndef MAX 12 | #define MAX(a,b) (((a)>(b))?(a):(b)) 13 | #endif 14 | 15 | #define CLAMPX(x) (MIN(MAX(x, 0), WIDTH - 1)) 16 | #define CLAMPY(y) (MIN(MAX(y, 0), HEIGHT - 1)) 17 | 18 | extern "C" __global__ 19 | void sigmaFilter(const float * __restrict__ src, float * __restrict__ dst) { 20 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 21 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 22 | 23 | if (x >= WIDTH || y >= HEIGHT) 24 | return; 25 | 26 | const float center = src[y * WIDTH + x]; 27 | 28 | float sum = 0.0f; 29 | int count = 0; 30 | 31 | for (int j = -RADIUS; j <= RADIUS; j++) 32 | for (int i = -RADIUS; i <= RADIUS; i++) { 33 | const float val = src[CLAMPY(y + j) * WIDTH + CLAMPX(x + i)]; 34 | 35 | if (fabsf(val - center) < THRESHOLD) { 36 | sum += val; 37 | count += 1; 38 | } 39 | } 40 | 41 | dst[y * WIDTH + x] = sum / count; 42 | } 43 | -------------------------------------------------------------------------------- /Collections/examples/SigmaFilter_cupy/sigma_filter_cupy.vpy: -------------------------------------------------------------------------------- 1 | """Sigma Filter in CuPy""" 2 | 3 | from string import Template 4 | 5 | import cupy as cp 6 | import vapoursynth as vs 7 | from vapoursynth import core 8 | import muvsfunc_numpy as mufnp 9 | 10 | 11 | # Load source clip. Only GRAYS is supported 12 | src = core.std.BlankClip(format=vs.GRAYS) 13 | 14 | # params of mufnp.SigmaFilter() 15 | radius = 3 16 | thr = 0.01 17 | 18 | # whether to enable '--use_fast_math' in NVRTC runtime compilation 19 | # to make use of fast math operations 20 | fast = False 21 | 22 | 23 | # pre-processing 24 | if src.format.id != vs.GRAYS: 25 | raise TypeError('Only 32-bit float grayscale input is supported!') 26 | 27 | # CUDA kernel execution configuration 28 | blksize = (16, 8, 1) 29 | 30 | # load CUDA kernel 31 | with open('sigma_filter.cu', 'r') as f: 32 | kernel_source_code = f.read() 33 | 34 | kernel_source_code = Template(kernel_source_code) 35 | kernel_source_code = kernel_source_code.substitute( 36 | width=src.width, height=src.height, radius=radius, threshold=thr) 37 | 38 | if fast: 39 | kernel = cp.RawKernel(kernel_source_code, 'sigmaFilter', 40 | options=('--use_fast_math', )) 41 | else: 42 | kernel = cp.RawKernel(kernel_source_code, 'sigmaFilter') 43 | 44 | # create NumPy function 45 | def sigma_filter_core(h_img): 46 | d_img = cp.asarray(h_img) 47 | h, w = d_img.shape 48 | 49 | d_out = cp.empty_like(d_img) 50 | 51 | kernel(((w + blksize[0] - 1)//blksize[0], (h + blksize[1] - 1)//blksize[1]), blksize, (d_img, d_out)) 52 | 53 | h_out = cp.asnumpy(d_out) 54 | 55 | return h_out 56 | 57 | 58 | # process 59 | res = mufnp.numpy_process(src, sigma_filter_core) 60 | 61 | # res = mufnp.SigmaFilter(src, radius=radius, thr=thr) 62 | 63 | res.set_output() -------------------------------------------------------------------------------- /Collections/examples/SigmaFilter_cython/setup.py: -------------------------------------------------------------------------------- 1 | # To build, run `python setup.py build_ext --inplace` 2 | 3 | from distutils.core import setup 4 | from distutils.extension import Extension 5 | from Cython.Build import cythonize 6 | 7 | 8 | ext_modules = [Extension(name="sigma_filter", sources=["sigma_filter.pyx"])] 9 | 10 | setup(name='sigma_filter', ext_modules=cythonize(module_list=ext_modules, language_level=3)) 11 | -------------------------------------------------------------------------------- /Collections/examples/SigmaFilter_cython/sigma_filter.pyx: -------------------------------------------------------------------------------- 1 | # cython: boundscheck=False, initializedcheck=False, language_level=3, nonecheck=False, overflowcheck=False, wraparound=False 2 | 3 | cimport cython 4 | from cython cimport view 5 | 6 | 7 | cdef Py_ssize_t clamp(const Py_ssize_t val, const Py_ssize_t low, const Py_ssize_t high) nogil: 8 | return min(max(val, low), high) 9 | 10 | 11 | cpdef void sigma_filter( 12 | const float [:, ::view.contiguous] src, float [:, ::view.contiguous] dst, 13 | const int radius, const float threshold): 14 | """Sigma filter""" 15 | 16 | cdef Py_ssize_t height = src.shape[0] 17 | cdef Py_ssize_t width = src.shape[1] 18 | 19 | cdef float center, val, acc 20 | cdef int count, x, y, i, j 21 | 22 | with nogil: 23 | for y in range(height): 24 | for x in range(width): 25 | center = src[y, x] 26 | 27 | acc = 0. 28 | count = 0 29 | 30 | for j in range(-radius, radius + 1): 31 | for i in range(-radius, radius + 1): 32 | val = src[clamp(y + j, 0, height - 1), clamp(x + i, 0, width - 1)] 33 | 34 | if abs(center - val) < threshold: 35 | acc += val 36 | count += 1 37 | 38 | dst[y, x] = acc / count 39 | -------------------------------------------------------------------------------- /Collections/examples/SigmaFilter_cython/sigma_filter_cython.vpy: -------------------------------------------------------------------------------- 1 | """Sigma Filter in Cython""" 2 | 3 | import vapoursynth as vs 4 | from vapoursynth import core 5 | from functools import partial 6 | 7 | # To build, run `python setup.py build_ext --inplace` 8 | from sigma_filter import sigma_filter 9 | 10 | is_api4: bool = hasattr(vs, "__api_version__") and vs.__api_version__.api_major == 4 11 | 12 | def get_array(frame, plane, read=True): 13 | if not read and frame.readonly: 14 | raise ValueError("Frame is readonly") 15 | 16 | if is_api4: 17 | return frame[plane] 18 | else: 19 | if read: 20 | return frame.get_read_array(plane) 21 | else: 22 | return frame.get_write_array(plane) 23 | 24 | 25 | # Load source clip. Only GRAYS is supported 26 | src = core.std.BlankClip(format=vs.GRAYS) 27 | 28 | # params of mufnp.SigmaFilter() 29 | radius = 3 30 | thr = 0.01 31 | 32 | parallel = True 33 | 34 | 35 | # pre-processing 36 | if src.format.id != vs.GRAYS: 37 | raise TypeError('Only 32-bit float grayscale input is supported!') 38 | 39 | 40 | def executor(n, f, radius, thr): 41 | fout = f.copy() 42 | 43 | src = get_array(f, 0) 44 | dst = get_array(fout, 0, read=False) 45 | 46 | sigma_filter(src, dst, radius, thr) 47 | 48 | return fout 49 | 50 | selector = partial(executor, radius=radius, thr=thr) 51 | # process 52 | if parallel: 53 | res = core.std.FrameEval(src, lambda n: core.std.ModifyFrame(src, src, selector)) 54 | else: 55 | res = core.std.ModifyFrame(src, src, selector) 56 | 57 | # res = mufnp.SigmaFilter(src, radius=radius, thr=thr) 58 | 59 | res.set_output() 60 | -------------------------------------------------------------------------------- /Collections/examples/Super-xBR_cupy/super-xbr.cu: -------------------------------------------------------------------------------- 1 | /* 2 | CUDA port of Super-xBR image upscaling algorithm by WolframRhodium 3 | 4 | The algorithm is modified for data parallelism 5 | 6 | Source: https://pastebin.com/cbH8ZQQT 7 | 8 | ******* Super XBR Scaler ******* 9 | 10 | Copyright (c) 2016 Hyllian - sergiogdb@gmail.com 11 | 12 | Permission is hereby granted, free of charge, to any person obtaining a copy 13 | of this software and associated documentation files (the "Software"), to deal 14 | in the Software without restriction, including without limitation the rights 15 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 16 | copies of the Software, and to permit persons to whom the Software is 17 | furnished to do so, subject to the following conditions: 18 | 19 | The above copyright notice and this permission notice shall be included in 20 | all copies or substantial portions of the Software. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 28 | THE SOFTWARE. 29 | */ 30 | 31 | #define IWIDTH (${width}) 32 | #define IHEIGHT (${height}) 33 | #define OWIDTH (IWIDTH * 2) 34 | #define OHEIGHT (IHEIGHT * 2) 35 | 36 | #define WGT1 ((float) ${wgt1}) // 0.129633f 37 | #define WGT2 ((float) ${wgt2}) // 0.175068f 38 | #define W1 (-WGT1) 39 | #define W2 (WGT1 + 0.5f) 40 | #define W3 (-WGT2) 41 | #define W4 (WGT2 + 0.5f) 42 | 43 | __device__ __forceinline__ int clamp(int x, int floor, int ceil) { 44 | return max(floor, min(x, ceil)); 45 | } 46 | 47 | __device__ __forceinline__ float3 min4(float3 a, float3 b, float3 c, float3 d) { 48 | return make_float3( 49 | fminf(fminf(a.x, b.x), fminf(c.x, d.x)), 50 | fminf(fminf(a.y, b.y), fminf(c.y, d.y)), 51 | fminf(fminf(a.z, b.z), fminf(c.z, d.z))); 52 | } 53 | 54 | __device__ __forceinline__ float3 max4(float3 a, float3 b, float3 c, float3 d) { 55 | return make_float3( 56 | fmaxf(fmaxf(a.x, b.x), fmaxf(c.x, d.x)), 57 | fmaxf(fmaxf(a.y, b.y), fmaxf(c.y, d.y)), 58 | fmaxf(fmaxf(a.z, b.z), fmaxf(c.z, d.z))); 59 | } 60 | 61 | __device__ __forceinline__ float3 clamp(float3 x, float3 floor, float3 ceil) { 62 | return make_float3( 63 | fmaxf(floor.x, fminf(x.x, ceil.x)), 64 | fmaxf(floor.y, fminf(x.y, ceil.y)), 65 | fmaxf(floor.z, fminf(x.z, ceil.z))); 66 | } 67 | 68 | __device__ __forceinline__ float df(float a, float b) { 69 | return fabsf(a - b); 70 | } 71 | 72 | __device__ __forceinline__ float3 operator+(float3 a, float3 b) 73 | { 74 | return make_float3( 75 | a.x + b.x, 76 | a.y + b.y, 77 | a.z + b.z); 78 | } 79 | 80 | __device__ __forceinline__ float3 operator*(float a, float3 b) 81 | { 82 | return make_float3( 83 | a * b.x, 84 | a * b.y, 85 | a * b.z); 86 | } 87 | 88 | __device__ __forceinline__ float diagonal_edge(const float mat[][4], const float *wp) { 89 | float dw1 = wp[0]*(df(mat[0][2], mat[1][1]) + df(mat[1][1], mat[2][0]) + df(mat[1][3], mat[2][2]) + df(mat[2][2], mat[3][1])) + \ 90 | wp[1]*(df(mat[0][3], mat[1][2]) + df(mat[2][1], mat[3][0])) + \ 91 | wp[2]*(df(mat[0][3], mat[2][1]) + df(mat[1][2], mat[3][0])) + \ 92 | wp[3]*(df(mat[1][2], mat[2][1])) + \ 93 | wp[4]*(df(mat[0][2], mat[2][0]) + df(mat[1][3], mat[3][1])) + \ 94 | wp[5]*(df(mat[0][1], mat[1][0]) + df(mat[2][3], mat[3][2])); 95 | 96 | float dw2 = wp[0]*(df(mat[0][1], mat[1][2]) + df(mat[1][2], mat[2][3]) + df(mat[1][0], mat[2][1]) + df(mat[2][1], mat[3][2])) + \ 97 | wp[1]*(df(mat[0][0], mat[1][1]) + df(mat[2][2], mat[3][3])) + \ 98 | wp[2]*(df(mat[0][0], mat[2][2]) + df(mat[1][1], mat[3][3])) + \ 99 | wp[3]*df(mat[1][1], mat[2][2]) + \ 100 | wp[4]*(df(mat[1][0], mat[3][2]) + df(mat[0][1], mat[2][3])) + \ 101 | wp[5]*(df(mat[0][2], mat[1][3]) + df(mat[2][0], mat[3][1])); 102 | 103 | return (dw1 - dw2); 104 | } 105 | 106 | extern "C" 107 | __global__ void super_xbr_pass1(const float3 * __restrict__ src, float3 * __restrict__ dst) { 108 | // src: W x H, dst: 2W x 2H 109 | // x: 0:W:1, y: 0:H:1 110 | 111 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 112 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 113 | 114 | if (x >= IWIDTH || y >= IHEIGHT) 115 | return; 116 | 117 | // copy pixels to output image 118 | dst[((y * 2) * OWIDTH) + (x * 2)] = src[y * IWIDTH + x]; 119 | dst[((y * 2) * OWIDTH) + (x * 2 + 1)] = src[y * IWIDTH + x]; 120 | dst[((y * 2 + 1) * OWIDTH) + (x * 2)] = src[y * IWIDTH + x]; 121 | 122 | // init 123 | constexpr float wp[6] = { 2.0f, 1.0f, -1.0f, 4.0f, -1.0f, 1.0f }; 124 | 125 | float3 rgb_data[4][4]; 126 | float y_data[4][4]; 127 | 128 | // sample supporting pixels in original image 129 | for (int sy = -1; sy <= 2; ++sy) { 130 | const int csy = clamp(y + sy, 0, IHEIGHT - 1); 131 | 132 | for (int sx = -1; sx <= 2; ++sx) { 133 | // clamp pixel locations 134 | const int csx = clamp(x + sx, 0, IWIDTH - 1); 135 | 136 | // sample & add weighted components 137 | rgb_data[sy + 1][sx + 1] = src[csy * IWIDTH + csx]; 138 | 139 | y_data[sy + 1][sx + 1] = 0.2126f * rgb_data[sy + 1][sx + 1].x + 0.7152f * rgb_data[sy + 1][sx + 1].y + \ 140 | 0.0722f * rgb_data[sy + 1][sx + 1].z; 141 | } 142 | } 143 | 144 | const float3 min_sample = min4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]); 145 | const float3 max_sample = max4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]); 146 | 147 | const float d_edge = diagonal_edge(y_data, wp); 148 | 149 | const float3 rgb1 = W1 * (rgb_data[0][3] + rgb_data[3][0]) + W2 * (rgb_data[1][2] + rgb_data[2][1]); 150 | const float3 rgb2 = W1 * (rgb_data[0][0] + rgb_data[3][3]) + W2 * (rgb_data[1][1] + rgb_data[2][2]); 151 | 152 | // generate and write result 153 | float3 rgbf = (d_edge <= 0.0f) ? rgb1 : rgb2; 154 | 155 | // anti-ringing, clamp 156 | rgbf = clamp(rgbf, min_sample, max_sample); 157 | 158 | // output 159 | dst[((y * 2 + 1) * OWIDTH) + (x * 2 + 1)] = rgbf; 160 | } 161 | 162 | extern "C" 163 | __global__ void super_xbr_pass2(const float3 * __restrict__ src, float3 * __restrict__ dst) { 164 | // src: 2W x 2H, dst: 2W x 2H 165 | // x: 0:W:1, y: 0:H:1 166 | 167 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 168 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 169 | 170 | if (x >= IWIDTH || y >= IHEIGHT) 171 | return; 172 | 173 | // copy pixels to output image 174 | dst[((y * 2) * OWIDTH) + (x * 2)] = src[((y * 2) * OWIDTH) + (x * 2)]; 175 | dst[((y * 2 + 1) * OWIDTH) + (x * 2 + 1)] = src[((y * 2 + 1) * OWIDTH) + (x * 2 + 1)]; 176 | 177 | // init 178 | constexpr float wp[6] = { 2.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; 179 | 180 | float3 rgb_data[4][4]; 181 | float y_data[4][4]; 182 | 183 | // output: dst[((y * 2) * OWIDTH) + (x * 2 + 1)] 184 | { 185 | // sample supporting pixels in original image 186 | for (int sy = -1; sy <= 2; ++sy) { 187 | for (int sx = -1; sx <= 2; ++sx) { 188 | // clamp pixel locations 189 | const int csy = clamp((y * 2) + sx - sy, 0, OHEIGHT - 1); 190 | const int csx = clamp((x * 2) + sx + sy, 0, OWIDTH - 1); 191 | 192 | // sample & add weighted components 193 | rgb_data[sy + 1][sx + 1] = src[(csy * OWIDTH + csx)]; 194 | 195 | y_data[sy + 1][sx + 1] = 0.2126f * rgb_data[sy + 1][sx + 1].x + 0.7152f * rgb_data[sy + 1][sx + 1].y + \ 196 | 0.0722f * rgb_data[sy + 1][sx + 1].z; 197 | } 198 | } 199 | 200 | const float3 min_sample = min4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]); 201 | const float3 max_sample = max4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]); 202 | 203 | const float d_edge = diagonal_edge(y_data, wp); 204 | 205 | const float3 rgb1 = W3 * (rgb_data[0][3] + rgb_data[3][0]) + W4 * (rgb_data[1][2] + rgb_data[2][1]); 206 | const float3 rgb2 = W3 * (rgb_data[0][0] + rgb_data[3][3]) + W4 * (rgb_data[1][1] + rgb_data[2][2]); 207 | 208 | // generate and write result 209 | float3 rgbf = (d_edge <= 0.0f) ? rgb1 : rgb2; 210 | 211 | // anti-ringing, clamp 212 | rgbf = clamp(rgbf, min_sample, max_sample); 213 | 214 | // output 215 | dst[((y * 2) * OWIDTH) + (x * 2 + 1)] = rgbf; 216 | } 217 | 218 | // output: dst[((y * 2 + 1) * OWIDTH) + (x * 2)] 219 | { 220 | // sample supporting pixels in original image 221 | for (int sy = -1; sy <= 2; ++sy) { 222 | for (int sx = -1; sx <= 2; ++sx) { 223 | // clamp pixel locations 224 | const int csy = clamp((y * 2) + sx - sy + 1, 0, OHEIGHT - 1); 225 | const int csx = clamp((x * 2) + sx + sy - 1, 0, OWIDTH - 1); 226 | 227 | // sample & add weighted components 228 | rgb_data[sy + 1][sx + 1] = src[csy * OWIDTH + csx]; 229 | 230 | y_data[sy + 1][sx + 1] = 0.2126f * rgb_data[sy + 1][sx + 1].x + 0.7152f * rgb_data[sy + 1][sx + 1].y + \ 231 | 0.0722f * rgb_data[sy + 1][sx + 1].z; 232 | } 233 | } 234 | 235 | const float3 min_sample = min4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]); 236 | const float3 max_sample = max4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]); 237 | 238 | const float d_edge = diagonal_edge(y_data, wp); 239 | 240 | const float3 rgb1 = W3 * (rgb_data[0][3] + rgb_data[3][0]) + W4 * (rgb_data[1][2] + rgb_data[2][1]); 241 | const float3 rgb2 = W3 * (rgb_data[0][0] + rgb_data[3][3]) + W4 * (rgb_data[1][1] + rgb_data[2][2]); 242 | 243 | // generate and write result 244 | float3 rgbf = (d_edge <= 0.0f) ? rgb1 : rgb2; 245 | 246 | // anti-ringing, clamp 247 | rgbf = clamp(rgbf, min_sample, max_sample); 248 | 249 | // output 250 | dst[((y * 2 + 1) * OWIDTH) + (x * 2)] = rgbf; 251 | } 252 | } 253 | 254 | extern "C" 255 | __global__ void super_xbr_pass3(const float3 * __restrict__ src, float3 * __restrict__ dst) { 256 | // src: 2W x 2H, dst: 2W x 2H 257 | // x: 0:2W:1, y: 0:2H:1 258 | 259 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 260 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 261 | 262 | if (x >= OWIDTH || y >= OHEIGHT) 263 | return; 264 | 265 | // init 266 | constexpr float wp[6] = { 2.0f, 1.0f, -1.0f, 4.0f, -1.0f, 1.0f }; 267 | 268 | float3 rgb_data[4][4]; 269 | float y_data[4][4]; 270 | 271 | // sample supporting pixels in original image 272 | for (int sy = -2; sy <= 1; ++sy) { 273 | const int csy = clamp(y + sy, 0, OHEIGHT - 1); 274 | 275 | for (int sx = -2; sx <= 1; ++sx) { 276 | // clamp pixel locations 277 | const int csx = clamp(x + sx, 0, OWIDTH - 1); 278 | 279 | // sample & add weighted components 280 | rgb_data[sy + 2][sx + 2] = src[csy * OWIDTH + csx]; 281 | 282 | y_data[sy + 2][sx + 2] = 0.2126f * rgb_data[sy + 2][sx + 2].x + 0.7152f * rgb_data[sy + 2][sx + 2].y + \ 283 | 0.0722f * rgb_data[sy + 2][sx + 2].z; 284 | } 285 | } 286 | 287 | const float3 min_sample = min4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]); 288 | const float3 max_sample = max4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]); 289 | 290 | const float d_edge = diagonal_edge(y_data, wp); 291 | 292 | const float3 rgb1 = W1 * (rgb_data[0][3] + rgb_data[3][0]) + W2 * (rgb_data[1][2] + rgb_data[2][1]); 293 | const float3 rgb2 = W1 * (rgb_data[0][0] + rgb_data[3][3]) + W2 * (rgb_data[1][1] + rgb_data[2][2]); 294 | 295 | // generate and write result 296 | float3 rgbf = (d_edge <= 0.0f) ? rgb1 : rgb2; 297 | 298 | // anti-ringing, clamp 299 | rgbf = clamp(rgbf, min_sample, max_sample); 300 | 301 | // output 302 | dst[y * OWIDTH + x] = rgbf; 303 | } -------------------------------------------------------------------------------- /Collections/examples/Super-xBR_cupy/super-xbr_cupy.vpy: -------------------------------------------------------------------------------- 1 | """Super-xBR in CuPy""" 2 | 3 | from string import Template 4 | 5 | import cupy as cp 6 | import vapoursynth as vs 7 | from vapoursynth import core 8 | import muvsfunc_numpy as mufnp 9 | 10 | 11 | # Load source clip. Only RGBS is supported 12 | src = core.std.BlankClip(format=vs.RGBS) 13 | 14 | 15 | # params of Super-xBR 16 | # Super-xBR upscale an image by a factor of 2 17 | wgt1 = 0.129633 18 | wgt2 = 0.175068 19 | 20 | # whether to enable '--use_fast_math' in NVRTC runtime compilation 21 | # to make use of fast math operations 22 | fast = False 23 | 24 | # CUDA kernel execution configuration 25 | blk_size = (8, 8) 26 | 27 | # pre-processing 28 | if src.format.id != vs.RGBS: 29 | raise vs.Error("Super-xBR: Only 32-bit float RGB is supported!") 30 | 31 | 32 | # load CUDA kernel 33 | with open('super-xbr.cu', 'r') as f: 34 | kernel_source_code = f.read() 35 | 36 | kernel_source_code = Template(kernel_source_code) 37 | kernel_source_code = kernel_source_code.substitute( 38 | width=src.width, height=src.height, 39 | wgt1=wgt1, wgt2=wgt2) 40 | 41 | 42 | if fast: 43 | pass1 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass1', 44 | options=('--use_fast_math', '--std=c++11')) 45 | pass2 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass2', 46 | options=('--use_fast_math', '--std=c++11')) 47 | pass3 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass3', 48 | options=('--use_fast_math', '--std=c++11')) 49 | else: 50 | pass1 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass1', 51 | options=('--std=c++11', )) 52 | pass2 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass2', 53 | options=('--std=c++11', )) 54 | pass3 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass3', 55 | options=('--std=c++11', )) 56 | 57 | 58 | # create NumPy function 59 | def superxbr_core(h_input, pass1, pass2, pass3): 60 | h, w, _ = h_input.shape 61 | d_input = cp.asarray(h_input) 62 | 63 | d_output = cp.zeros((h * 2, w * 2, 3), dtype=h_input.dtype) 64 | d_tmp = cp.zeros((h * 2, w * 2, 3), dtype=h_input.dtype) 65 | 66 | pass1(((w + blk_size[0] - 1) // blk_size[0], (h + blk_size[1] - 1) // blk_size[1]), blk_size, (d_input, d_output)) 67 | pass2(((w + blk_size[0] - 1) // blk_size[0], (h + blk_size[1] - 1) // blk_size[1]), blk_size, (d_output, d_tmp)) 68 | pass3(((w * 2 + blk_size[0] - 1) // blk_size[0], (h * 2 + blk_size[1] - 1 )// blk_size[1]), blk_size, (d_tmp, d_output)) 69 | 70 | h_out = cp.asnumpy(d_output) 71 | 72 | return h_out 73 | 74 | 75 | # process 76 | res = mufnp.numpy_process( 77 | [core.std.BlankClip(src, width=src.width*2, height=src.height*2), src], 78 | superxbr_core, pass1=pass1, pass2=pass2, pass3=pass3, 79 | input_per_plane=False, output_per_plane=False, channels_last=True, 80 | omit_first_clip=True) 81 | 82 | 83 | res.set_output() -------------------------------------------------------------------------------- /Collections/examples/sigma_filter_numba.vpy: -------------------------------------------------------------------------------- 1 | """Sigma Filter in Numba""" 2 | 3 | from numba import jit, prange 4 | 5 | import vapoursynth as vs 6 | from vapoursynth import core 7 | import numpy as np 8 | from functools import partial 9 | 10 | 11 | # Load source clip. Only GRAYS is supported 12 | src = core.std.BlankClip(format=vs.GRAYS) 13 | 14 | # params of mufnp.SigmaFilter() 15 | radius = 3 16 | thr = 0.01 17 | 18 | 19 | # pre-processing 20 | if src.format.id != vs.GRAYS: 21 | raise TypeError('Only 32-bit float grayscale input is supported!') 22 | 23 | 24 | _is_api4: bool = hasattr(vs, "__api_version__") and vs.__api_version__.api_major == 4 25 | 26 | 27 | def _get_array(frame, plane, read=True): 28 | if not read and frame.readonly: 29 | raise ValueError("Frame is readonly") 30 | 31 | if _is_api4: 32 | return frame[plane] 33 | else: 34 | if read: 35 | return frame.get_read_array(plane) 36 | else: 37 | return frame.get_write_array(plane) 38 | 39 | 40 | def erase_module(func): 41 | """Erase '__module__' attribute of a user-defined function that breaks numba""" 42 | 43 | if hasattr(func, '__module__') and func.__module__ == '__vapoursynth__': 44 | func.__module__ = None 45 | 46 | return func 47 | 48 | 49 | @jit(nopython=True, nogil=True) 50 | @erase_module 51 | def clamp(val, low, high): 52 | return min(max(val, low), high) 53 | 54 | 55 | @jit(nopython=True, nogil=True, fastmath=True, parallel=False) 56 | @erase_module 57 | def sigma_filter(src, dst, radius, threshold): 58 | height = src.shape[0] 59 | width = src.shape[1] 60 | 61 | for y in prange(height): 62 | for x in range(width): 63 | center = src[y, x] 64 | acc = 0. 65 | count = 0 66 | 67 | for j in range(-radius, radius + 1): 68 | for i in range(-radius, radius + 1): 69 | val = src[clamp(y + j, 0, height - 1), clamp(x + i, 0, width - 1)] 70 | 71 | if abs(center - val) < threshold: 72 | acc += val 73 | count += 1 74 | 75 | dst[y, x] = acc / count 76 | 77 | return 78 | 79 | 80 | def executor(n, f, radius, thr): 81 | fout = f.copy() 82 | 83 | src = np.asarray(_get_array(f, 0, read=True)) 84 | dst = np.asarray(_get_array(fout, 0, read=False)) 85 | 86 | sigma_filter(src, dst, radius, thr) 87 | 88 | return fout 89 | 90 | 91 | # process 92 | res = core.std.ModifyFrame(src, src, partial(executor, radius=radius, thr=thr)) 93 | 94 | # res = mufnp.SigmaFilter(src, radius=radius, thr=thr) 95 | 96 | res.set_output() 97 | -------------------------------------------------------------------------------- /Collections/examples/super_resolution_mxnet.vpy: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' # 0: False 1: Default 2: Full 3 | 4 | import mxnet as mx 5 | 6 | import vapoursynth as vs 7 | from vapoursynth import core 8 | import muvsfunc as muf 9 | import muvsfunc_numpy as mufnp 10 | 11 | # super resolution using MXNet 12 | 13 | 14 | # global params 15 | src = core.std.BlankClip(width=720, height=480, length=1000, format=vs.RGBS) # can be RGB/YUV/GRAY 16 | sr_algorithm = 0 # 0: waifu2x, 1: VDSR (faster to slower) 17 | device_id = 0 # -1: CPU, 0, 1, ...: GPU 18 | 19 | 20 | # params of the algos 21 | # (download link for models: https://github.com/WolframRhodium/Super-Resolution-Zoo ) 22 | # use the information provided in "info.md" in the model's folder to set the paramter "sr_args" 23 | if sr_algorithm == 0: 24 | sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 25 | device_id=device_id, block_w=128, block_h=128, 26 | up_scale=2) 27 | 28 | 29 | # advanced I: padded upsampling to reduce blocking artifacts when small patch size is used 30 | 31 | r""" 32 | # both NumPy and C++ versions are available 33 | sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 34 | device_id=device_id, block_w=128, block_h=128, 35 | up_scale=2, pre_upscale=False, pad=(0,5,0,5), crop=(0,10,0,10)) 36 | """ 37 | 38 | r""" 39 | # only NumPy version is available 40 | sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 41 | device_id=device_id, block_w=128, block_h=128, 42 | up_scale=2, pre_upscale=False, pad=(5,5,5,5), crop=(10,10,10,10)) 43 | """ 44 | 45 | """ explanation: 46 | Suppose we want to upsample a 48x48 patch in an image. Let's denote such patch as X. 47 | The size of patch X is too small so that it's very likely to introduce blocking artifacts in the upsampled output. 48 | Thus we may want to feed the network with a 58x58 patch Y, where X is located at the center of Y. 49 | After processing by the network, we can then crop the output, to obtain an upsampled version of X with less blocking artifacts. 50 | 51 | Such procedure can be denoted as "pad=(5, 5, 5, 5), crop=(10, 10, 10, 10)". 52 | The value 5 is obtained by (58-48)/2=5, and the value 10 is obtained by 5*2=10, where 2 is the upsampling factor. 53 | Note that if pre_upscale is "True", the upsampling factor is always 1, regardless the value of "up_scale". 54 | """ 55 | 56 | 57 | # advanced II: multi-GPU data parallelism 58 | 59 | r""" 60 | # only C++ version is available 61 | 62 | # 2 GPUS 63 | sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 64 | device_id=[0, 1], block_w=128, block_h=128, 65 | up_scale=2) 66 | 67 | # 2 Queues 68 | sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 69 | device_id=[0, 0], block_w=128, block_h=128, 70 | up_scale=2) 71 | 72 | # 4 Queues 2 GPUs 73 | sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 74 | device_id=[0, 1, 0, 1], block_w=128, block_h=128, 75 | up_scale=2) 76 | """ 77 | 78 | # VDSR 79 | elif sr_algorithm == 1: 80 | sr_args = dict(model_filename=r'VDSR\pytorch-vdsr@twtygqyy\VDSR', 81 | device_id=device_id, block_w=128, block_h=128, 82 | up_scale=2, is_rgb_model=False, pre_upscale=True) 83 | 84 | 85 | # sr = mufnp.super_resolution(src, **sr_args) # using NumPy 86 | sr = muf.super_resolution(src, **sr_args) # using C++ plugin 87 | 88 | sr.set_output() -------------------------------------------------------------------------------- /Collections/examples/super_resolution_opencv.vpy: -------------------------------------------------------------------------------- 1 | # super resolution using OpenCV 2 | # note: the input image to the network is not cropped, which might triggers out-of-memory error. 3 | 4 | import os 5 | 6 | # Set OpenCL device in format `::` 7 | # examples: 'AMD:GPU:', ':GPU:1', 'Intel:CPU:', 8 | # https://github.com/opencv/opencv/wiki/OpenCL-optimizations#opencv-opencl-configuration-options 9 | os.environ['OPENCV_OPENCL_DEVICE'] = 'NVIDIA:GPU:' # use GPU to accelerate processing 10 | 11 | 12 | import vapoursynth as vs 13 | from vapoursynth import core 14 | import cv2 15 | import mvsfunc as mvf 16 | import muvsfunc_numpy as mufnp 17 | 18 | 19 | # global params 20 | src = core.std.BlankClip(width=640, height=360, length=1000, format=vs.RGBS) # can be RGB/YUV/GRAY 21 | sr_algorithm = 0 # 0: waifu2x, 1: IDN, 2: TSCN, 3: VDSR, 4: DBPN (faster to slower) 22 | 23 | if 'GPU' in os.environ['OPENCV_OPENCL_DEVICE']: 24 | if cv2.ocl.haveOpenCL() and cv2.ocl.useOpenCL(): 25 | backend = cv2.dnn.DNN_BACKEND_OPENCV 26 | target = cv2.dnn.DNN_TARGET_OPENCL # available on NVIDIA GPU since OpenCV 4.0.1, but only works on Intel GPU before OpenCV 3.4.2 27 | else: 28 | backend = cv.dnn.DNN_BACKEND_DEFAULT 29 | target = cv2.dnn.DNN_TARGET_CPU 30 | 31 | 32 | # params of the algos 33 | if sr_algorithm == 0: 34 | # https://github.com/php-opencv/php-opencv-examples/tree/master/models/waifu2x 35 | # other models can be found at 36 | # https://github.com/HomeOfVapourSynthEvolution/VapourSynth-Waifu2x-caffe/tree/master/Waifu2x-caffe/models 37 | sr_args = dict(prototxt=r'scale2.0x_model.prototxt', 38 | caffe_model=r'scale2.0x_model.caffemodel', up_scale=2, is_rgb_model=True, pad=(7,7,7,7)) 39 | 40 | elif sr_algorithm == 1: 41 | # https://github.com/Zheng222/IDN-Caffe/tree/master/test/caffemodel 42 | sr_args = dict(prototxt=r'IDN_x2_deploy.prototxt', 43 | caffe_model=r'IDN_x2.caffemodel', up_scale=2, is_rgb_model=False, pad=(1,1,1,1), crop=(1,2,1,2), 44 | upscale_uv=False, merge_residual=True) 45 | 46 | elif sr_algorithm == 2: 47 | # https://github.com/Zheng222/TSCN/tree/master/test 48 | sr_args = dict(prototxt=r'TSCN_x2_deploy.prototxt', 49 | caffe_model=r'TSCN_x2.caffemodel', up_scale=2, is_rgb_model=False) 50 | 51 | elif sr_algorithm == 3: 52 | # https://github.com/huangzehao/caffe-vdsr/tree/master/Train 53 | sr_args = dict(prototxt=r'VDSR_net_deploy.prototxt', 54 | caffe_model=r'VDSR_Adam.caffemodel', up_scale=2, is_rgb_model=False, pre_upscale=True, upscale_uv=False) 55 | 56 | elif sr_algorithm == 4: 57 | # https://github.com/alterzero/DBPN-caffe 58 | # https://drive.google.com/drive/folders/1ahbeoEHkjxoo4NV1wReOmpoRWbl448z-?usp=sharing 59 | sr_args = dict(prototxt=r'DBPN_mat_2x.prototxt', 60 | caffe_model=r'DBPN_2x.caffemodel', up_scale=2, is_rgb_model=True) 61 | 62 | 63 | # internel functions 64 | def channel_last(arr): 65 | """Convert a CHW array to HWC.""" 66 | ndim = arr.ndim 67 | return arr.swapaxes(ndim - 3, ndim - 2).swapaxes(ndim - 2, ndim - 1) 68 | 69 | 70 | def super_resolution_core(img, net, pad=None, crop=None): 71 | if pad is not None: 72 | img = cv2.copyMakeBorder(img, *pad, 1) 73 | 74 | blob = cv2.dnn.blobFromImage(img) 75 | 76 | net.setInput(blob, '') 77 | 78 | super_res = net.forward() 79 | 80 | if img.ndim == 2: 81 | if crop is not None: 82 | return super_res[0, 0, crop[0]:-crop[1], crop[2]:-crop[3]] 83 | else: 84 | return super_res[0, 0, :, :] 85 | else: 86 | # the output is BGR rather than RGB so channel reversal is needed 87 | if crop is not None: 88 | return channel_last(super_res[0, ::-1, crop[0]:-crop[1], crop[2]:-crop[3]]) 89 | else: 90 | return channel_last(super_res[0, ::-1, :, :]) 91 | 92 | 93 | def run_super_resolution(clip, prototxt, caffe_model, up_scale=2, is_rgb_model=True, pad=None, crop=None, backend=None, target=None): 94 | """ Super-Resolution without color family hadling 95 | """ 96 | 97 | net = cv2.dnn.readNetFromCaffe(prototxt, caffe_model) 98 | 99 | if backend is not None: 100 | net.setPreferableBackend(backend) 101 | 102 | if target is not None: 103 | net.setPreferableTarget(target) 104 | 105 | if up_scale != 1: 106 | blank = core.std.BlankClip(clip, width=clip.width*up_scale, height=clip.height*up_scale) 107 | super_res = mufnp.numpy_process([blank, clip], super_resolution_core, net=net, 108 | input_per_plane=(not is_rgb_model), output_per_plane=(not is_rgb_model), pad=pad, crop=crop, 109 | omit_first_clip=True) 110 | else: 111 | super_res = mufnp.numpy_process(clip, super_resolution_core, net=net, 112 | input_per_plane=(not is_rgb_model), output_per_plane=(not is_rgb_model), pad=pad, crop=crop) 113 | 114 | return super_res 115 | 116 | 117 | def super_resolution(clip, prototxt, caffe_model, up_scale=2, is_rgb_model=True, pad=None, crop=None, backend=None, target=None, pre_upscale=False, upscale_uv=False, merge_residual=False): 118 | """ Super-Resolution with color family hadling 119 | 120 | The color space of the output depends on the algorithm 121 | """ 122 | 123 | isGray = clip.format.color_family == vs.GRAY 124 | isRGB = clip.format.color_family == vs.RGB 125 | 126 | if is_rgb_model and not isRGB: 127 | clip = mvf.ToRGB(clip, depth=32) 128 | 129 | elif not is_rgb_model: 130 | if isRGB: 131 | clip = mvf.ToYUV(clip, depth=32) 132 | 133 | if not isGray and not upscale_uv: # isYUV/RGB and only upscale Y 134 | clip = mvf.GetPlane(clip) 135 | 136 | clip = mvf.Depth(clip, depth=32) 137 | 138 | if pre_upscale: 139 | clip = core.resize.Bicubic(clip, clip.width*up_scale, clip.height*up_scale, filter_param_a=0, filter_param_b=0.5) 140 | up_scale = 1 141 | 142 | super_res = run_super_resolution(clip, prototxt=prototxt, caffe_model=caffe_model, 143 | up_scale=up_scale, is_rgb_model=is_rgb_model, pad=pad, crop=crop, backend=backend, target=target) 144 | 145 | if merge_residual: 146 | low_res = core.resize.Bicubic(clip, super_res.width, super_res.height, filter_param_a=0, filter_param_b=0.5) 147 | super_res = core.std.Expr([super_res, low_res], ['x y +']) 148 | 149 | return super_res 150 | 151 | sr = super_resolution(src, **sr_args, backend=backend, target=target) 152 | 153 | # sr = core.caffe.Waifu2x(src, noise=-1, scale=2, cudnn=True, model=3) 154 | 155 | sr.set_output() -------------------------------------------------------------------------------- /Collections/muvsfunc_misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | Miscellaneous functions: 3 | GPS 4 | gauss 5 | freq_merge 6 | band_merge 7 | detail_enhancement 8 | SSR 9 | Wiener2 10 | tv 11 | BernsteinFilter 12 | GPA 13 | XDoG 14 | sbr_detail 15 | fade 16 | fast_mandelbrot 17 | """ 18 | 19 | import functools 20 | import math 21 | import vapoursynth as vs 22 | from vapoursynth import core 23 | import muvsfunc as muf 24 | import mvsfunc as mvf 25 | import typing 26 | 27 | _is_api4: bool = hasattr(vs, "__api_version__") and vs.__api_version__.api_major == 4 28 | 29 | def _get_array(frame, plane, read=True): 30 | if not read and frame.readonly: 31 | raise ValueError("Frame is readonly") 32 | 33 | if _is_api4: 34 | return frame[plane] 35 | else: 36 | if read: 37 | return frame.get_read_array(plane) 38 | else: 39 | return frame.get_write_array(plane) 40 | 41 | def GPS(clip, gamma=None): 42 | """Get Power Spectrum 43 | 44 | Args: 45 | gamma: It enables viewing small valued responses in the spectral display. 46 | 47 | """ 48 | 49 | w = clip.width 50 | h = clip.height 51 | max_w_h = max(w, h) 52 | 53 | clip = core.std.AddBorders(clip, right=max_w_h - w, bottom=max_w_h - h) 54 | clip = core.vcfreq.F2Quiver(clip, test=1, frad=16, fspec=[1,2,0,1,7], gamma=gamma) 55 | clip = core.std.CropRel(clip, 0, max_w_h // 2).resize.Bicubic(w, h) 56 | return clip 57 | 58 | 59 | def gauss(clip, sigma=None, algo=0): 60 | """Gaussian filter using tcanny 61 | Borrowed from https://github.com/IFeelBloated/Oyster 62 | 63 | Args: 64 | sigma: Standard deviation of gaussian. 65 | 66 | algo: (int) Algorithm. 0:auto, 1:tcanny.TCanny(mode=-1), 2:bilateral.Gaussian() 67 | 68 | """ 69 | 70 | if (algo == 0 and sigma is not None and sigma >= 10) or algo == 2: 71 | return core.bilateral.Gaussian(clip, sigma=sigma) 72 | else: # algo == 1 or (algo == 0 and (sigma is None or sigma < 10)) 73 | return core.tcanny.TCanny(clip, sigma=sigma, mode=-1) 74 | 75 | 76 | def freq_merge(src, flt, fun=None, **fun_args): 77 | """Replace high freq component in "src" with high freq component in "flt" 78 | Borrowed from https://github.com/IFeelBloated/Oyster 79 | 80 | Args: 81 | src, flt: Input. 82 | 83 | fun: (function) A low-pass filter. Default is gaussian. 84 | """ 85 | 86 | if fun is None or not callable(fun): 87 | fun = gauss 88 | 89 | low_src = fun(src, **fun_args) 90 | low_flt = fun(flt, **fun_args) 91 | return core.std.Expr([low_src, flt, low_flt], ['y z - x +']) 92 | 93 | 94 | def band_merge(src, flt, fun=None, fun_args1=None, fun_args2=None, cascade=True): 95 | """Replace frequencies within a certain range in "src" with frequencies within a certain range in "flt" 96 | 97 | Args: 98 | src, flt: I nput. 99 | 100 | fun: (function) A low-pass filter. Default is gaussian. 101 | 102 | cascade: (bool) Whether to cascade functions. Default is True. 103 | 104 | """ 105 | 106 | if fun is None or not callable(fun): 107 | fun = gauss 108 | 109 | if fun_args1 == None: 110 | fun_args1 = {} 111 | 112 | if fun_args2 == None: 113 | fun_args2 = {} 114 | 115 | low_src1 = fun(src, **fun_args1) 116 | low_src2 = fun(low_src1 if cascade else src, **fun_args2) 117 | low_flt1 = fun(flt, **fun_args1) 118 | low_flt2 = fun(low_flt1 if cascade else flt, **fun_args2) 119 | return core.std.Expr([low_flt1, low_flt2, src, low_src1, low_src2], ['x y - b + a - z +']) 120 | 121 | 122 | def detail_enhancement(clip, guidance=None, iter=3, radius=4, regulation=0.0005, fast=False, **args): 123 | """Novel detail enhancement filter using guided filter and defilter 124 | 125 | Args: 126 | clip: Gray scale. 127 | guidance: Guidance clip. 128 | 129 | """ 130 | 131 | return muf.DeFilter(clip, muf.GuidedFilter, guidance=guidance, radius=radius, regulation=regulation, fast=fast, iteration=iter, **args) 132 | 133 | 134 | def SSR(clip, sigma=50, full=None, **args): 135 | """Single-scale Retinex 136 | 137 | Args: 138 | clip: Input. Only the first plane will be processed. 139 | 140 | sigma: (int) Standard deviation of gaussian blur. Default is 50. 141 | 142 | full: (bool) Whether input clip is of full range. Default is None. 143 | 144 | Ref: 145 | [1] Jobson, D. J., Rahman, Z. U., & Woodell, G. A. (1997). Properties and performance of a center/surround retinex. IEEE transactions on image processing, 6(3), 451-462. 146 | 147 | """ 148 | 149 | bits = clip.format.bits_per_sample 150 | sampleType = clip.format.sample_type 151 | isGray = clip.format.color_family == vs.GRAY 152 | 153 | if not isGray: 154 | clip_src = clip 155 | clip = mvf.GetPlane(clip) 156 | 157 | lowFre = gauss(clip, sigma=sigma, **args) 158 | 159 | clip = mvf.Depth(clip, 32, fulls=full) 160 | lowFre = mvf.Depth(lowFre, 32, fulls=full) # core.bilateral.Gaussian() doesn't support float input. 161 | 162 | expr = 'x 1 + log y 1 + log -' 163 | clip = core.std.Expr([clip, lowFre], [expr]) 164 | 165 | stats = core.std.PlaneStats(clip, plane=[0]) 166 | 167 | # Dynamic range stretching 168 | def Stretch(n, f, clip, core): 169 | alpha = f.props['PlaneStatsMax'] - f.props['PlaneStatsMin'] 170 | beta = f.props['PlaneStatsMin'] 171 | 172 | expr = 'x {beta} - {alpha} /'.format(beta=beta, alpha=alpha) 173 | return core.std.Expr([clip], [expr]) 174 | 175 | clip = core.std.FrameEval(clip, functools.partial(Stretch, clip=clip, core=core), prop_src=stats) 176 | 177 | clip = mvf.Depth(clip, depth=bits, sample=sampleType, fulld=full) 178 | 179 | if not isGray: 180 | clip = core.std.ShufflePlanes([clip, clip_src], list(range(clip_src.format.num_planes)), clip_src.format.color_family) 181 | 182 | return clip 183 | 184 | 185 | def Wiener2(input, radius_v=3, radius_h=None, noise=None, **depth_args): 186 | """2-D adaptive noise-removal filtering. (wiener2 from MATLAB) 187 | 188 | Wiener2 lowpass filters an intensity image that has been degraded by constant power additive noise. 189 | Wiener2 uses a pixel-wise adaptive Wiener method based on statistics estimated from a local neighborhood of each pixel. 190 | 191 | Estimate of the additive noise power will not be returned. 192 | 193 | Args: 194 | input: Input clip. Only the first plane will be processed. 195 | 196 | radius_v, radius_h: (int) Size of neighborhoods to estimate the local image mean and standard deviation. The size is (radius_v*2-1) * (radius_h*2-1). 197 | If "radius_h" is None, it will be set to "radius_v". 198 | Default is 3. 199 | 200 | noise: (float) Variance of addictive noise. If it is not given, average of all the local estimated variances will be used. 201 | Default is {}. 202 | 203 | depth_args: (dict) Additional arguments passed to mvf.Depth() in the form of keyword arguments. 204 | Default is {}. 205 | 206 | Ref: 207 | [1] Lim, J. S. (1990). Two-dimensional signal and image processing. Englewood Cliffs, NJ, Prentice Hall, 1990, 710 p, p. 538, equations 9.26, 9.27, and 9.29. 208 | [2] 2-D adaptive noise-removal filtering - MATLAB wiener2 - MathWorks (https://www.mathworks.com/help/images/ref/wiener2.html) 209 | 210 | """ 211 | 212 | funcName = 'Wiener2' 213 | 214 | if not isinstance(input, vs.VideoNode) or input.format.num_planes > 1: 215 | raise TypeError(funcName + ': \"input\" must be a gray-scale/single channel clip!') 216 | 217 | bits = input.format.bits_per_sample 218 | sampleType = input.format.sample_type 219 | 220 | if radius_h is None: 221 | radius_h = radius_v 222 | 223 | input32 = mvf.Depth(input, depth=32, sample=vs.FLOAT, **depth_args) 224 | 225 | localMean = muf.BoxFilter(input32, radius_h+1, radius_v+1) 226 | localVar = muf.BoxFilter(core.std.Expr([input32], ['x dup *']), radius_h+1, radius_v+1) 227 | localVar = core.std.Expr([localVar, localMean], ['x y dup * -']) 228 | 229 | if noise is None: 230 | localVarStats = core.std.PlaneStats(localVar, plane=[0]) 231 | 232 | def FLT(n, f, clip, core, localMean, localVar): 233 | noise = f.props['PlaneStatsAverage'] 234 | 235 | return core.std.Expr([clip, localMean, localVar], ['y z {noise} - 0 max z {noise} max / x y - * +'.format(noise=noise)]) 236 | 237 | flt = core.std.FrameEval(input32, functools.partial(FLT, clip=input32, core=core, localMean=localMean, localVar=localVar), prop_src=[localVarStats]) 238 | else: 239 | flt = core.std.Expr([input32, localMean, localVar], ['y z {noise} - 0 max z {noise} max / x y - * +'.format(noise=noise)]) 240 | 241 | return mvf.Depth(flt, depth=bits, sample=sampleType, **depth_args) 242 | 243 | 244 | def tv(I, iter=5, dt=None, ep=1, lam=0, I0=None, C=0): 245 | """Total Variation Denoising 246 | 247 | Args: 248 | I: Input. Recommended to input floating type clip. 249 | 250 | iter: (int) Num of iterations. Default is 5. 251 | 252 | dt: (float) Time step. Default is ep/5. 253 | 254 | ep: (float) Epsilon (of gradient regularization). Default is 1. 255 | 256 | lam: (float) Fidelity term lambda. Default is 0. 257 | 258 | I0: (clip) Input (noisy) image. Default is "I". 259 | 260 | 261 | Ref: 262 | [1] Rudin, L. I., Osher, S., & Fatemi, E. (1992). Nonlinear total variation based noise removal algorithms. Physica D: Nonlinear Phenomena, 60(1-4), 259-268. 263 | [2] Total Variation Denoising : http://visl.technion.ac.il/~gilboa/PDE-filt/tv_denoising.html 264 | 265 | """ 266 | 267 | if dt is None: 268 | dt = ep / 5 269 | 270 | if I0 is None: 271 | I0 = I 272 | 273 | ep2 = ep * ep 274 | 275 | isFloat = I.format.sample_type == vs.FLOAT 276 | neutral = 0 if isFloat else muf.scale(128, I.format.bits_per_sample) 277 | 278 | for i in range(iter): 279 | I_x = core.std.Convolution(I, [-1, 0, 1], divisor=2, bias=neutral, mode='h') # correct 280 | I_y = core.std.Convolution(I, [-1, 0, 1], divisor=2, bias=neutral, mode='v') # correct 281 | I_xx = core.std.Convolution(I, [1, -2, 1], divisor=1 if isFloat else 4, bias=neutral, mode='h') # x4 282 | I_yy = core.std.Convolution(I, [1, -2, 1], divisor=1 if isFloat else 4, bias=neutral, mode='v') # x4 283 | Dp = core.std.Convolution(I, [1, 0, 0, 0, 0, 0, 0, 0, 1], divisor=2) 284 | Dm = core.std.Convolution(I, [0, 0, 1, 0, 0, 0, 1, 0, 0], divisor=2) 285 | I_xy = core.std.Expr([Dp, Dm], ['x y - 2 / {} +'.format(neutral)]) # correct 286 | 287 | if isFloat: 288 | expr = 'x {dt} a {ep2} z dup * + * 2 y * z * b * - c {ep2} y dup * + * + {ep2} y dup * + z dup * + 1.5 pow / {lam} d x - {C} + * + * +'.format(dt=dt, ep2=ep2, lam=lam, C=C) 289 | else: # isInteger 290 | expr = 'x {dt} a {neutral} - 4 * {ep2} z {neutral} - dup * + * 2 y {neutral} - * z {neutral} - * b {neutral} - * - c {neutral} - 4 * {ep2} y {neutral} - dup * + * + {ep2} y {neutral} - dup * + z {neutral} - dup * + 1.5 pow / {lam} d x - {C} + * + * +'.format(dt=dt, neutral=neutral, ep2=ep2, lam=lam, C=C) 291 | 292 | I = core.std.Expr([I, I_x, I_y, I_xx, I_xy, I_yy, I0], [expr]) 293 | 294 | return I 295 | 296 | 297 | def BernsteinFilter(clip, iter=30, **depth_args): 298 | """Bernstein Filter 299 | 300 | Bernstein filter is an efficient filter solver, which can implicitly minimize the mean curvature. 301 | 302 | Internal precision is always float. 303 | 304 | Args: 305 | clip: Input. 306 | 307 | iter: (int) Num of iterations. Default is 30 308 | 309 | depth_args: (dict) Additional arguments passed to mvf.Depth() in the form of keyword arguments. 310 | Default is {}. 311 | 312 | Ref: 313 | [1] Gong, Y. (2016, March). Bernstein filter: A new solver for mean curvature regularized models. In Acoustics, Speech and Signal Processing (ICASSP), 2016 IEEE International Conference on (pp. 1701-1705). IEEE. 314 | 315 | """ 316 | 317 | bits = clip.format.bits_per_sample 318 | sample = clip.format.sample_type 319 | 320 | clip = mvf.Depth(clip, depth=32, sample=vs.FLOAT, **depth_args) 321 | 322 | for i in range(iter): 323 | d1 = core.std.Convolution(clip, [1, -2, 1], divisor=2, mode='h') 324 | d2 = core.std.Convolution(clip, [1, -2, 1], divisor=2, mode='v') 325 | clip = core.std.Expr([clip, d1, d2], ['y abs z abs < x y + x z + ?']) 326 | 327 | return mvf.Depth(clip, depth=bits, sample=sample, **depth_args) 328 | 329 | 330 | def GPA(clip, sigmaS=3, sigmaR=0.15, mode=0, iteration=0, eps=1e-3, **depth_args): 331 | """Fast and Accurate Bilateral Filtering using Gaussian-Polynomial Approximation 332 | 333 | This filter approximates the bilateral filter when the range kernel is Gaussian. 334 | The exponential function of the weight function of bilateral filter is approximated, 335 | and the bilateral is therefore decomposed into a series of spatial convolutions. 336 | 337 | The number of iteration depends on the value of "sigmaR", which increases as "sigmaR" decreases. 338 | A small value of "sigmaR" may lead to presicion problem. 339 | 340 | All the internal calculations are done at 32-bit float. 341 | 342 | Part of description of bilateral filter is copied from 343 | https://github.com/HomeOfVapourSynthEvolution/VapourSynth-Bilateral 344 | 345 | Args: 346 | clip: Input clip. 347 | 348 | sigmaS: (float) Sigma of Gaussian function to calculate spatial weight. 349 | The scale of this parameter is equivalent to pixel distance. 350 | Larger sigmaS results in larger filtering radius as well as stronger smoothing. 351 | Default is 3. 352 | 353 | sigmaR: (float) Sigma of Gaussian function to calculate range weight. 354 | The scale of this parameter is the same as pixel value ranging in [0,1]. 355 | Smaller sigmaR preserves edges better, may also leads to weaker smoothing. 356 | It should be pointed out that a small "sigmaR" results in more iteration and higher error. 357 | Default is 0.15. 358 | 359 | mode: (0 or 1) 0: Guassian bilateral filter, 1: Box bilateral filter 360 | Default is 0. 361 | 362 | iteration: (int) Number of iteration or the order of approximation. 363 | If it is 0, it is calculated automatically according to "sigmaR" and "eps". 364 | Default is 0. 365 | 366 | eps: (float) Filtering Accuracy. 367 | Default is 1e-3. 368 | 369 | depth_args: (dict) Additional arguments passed to mvf.Depth(). 370 | Default is {}. 371 | 372 | Ref: 373 | [1] Chaudhury, K. N., & Dabhade, S. D. (2016). Fast and provably accurate bilateral filtering. IEEE Transactions on Image Processing, 25(6), 2519-2528. 374 | [2] http://www.mathworks.com/matlabcentral/fileexchange/56158 375 | 376 | """ 377 | 378 | def estimate_iteration(sigmaR, T, eps): 379 | if sigmaR > 70: 380 | return 10 381 | elif sigmaR < 5: 382 | return 800 383 | else: 384 | lam = (T / sigmaR) ** 2 385 | p = 1 + math.log(lam) 386 | q = -lam - math.log(eps) 387 | t = q / math.e / lam 388 | W = t - t ** 2 + 1.5 * t ** 3 - (8 / 3) * t ** 4 389 | N = min(max(q / W, 10), 300) 390 | 391 | if sigmaR < 30: 392 | for i in range(5): 393 | N -= (N * math.log(N) - p * N - q) / (math.log(N) + 1 - p) 394 | 395 | return math.ceil(N) 396 | 397 | T = 0.5 398 | bits = clip.format.bits_per_sample 399 | sampleType = clip.format.sample_type 400 | 401 | if mode == 0: # Gaussian bilateral filter 402 | Filter = functools.partial(core.tcanny.TCanny, sigma=sigmaS, mode=-1) 403 | else: # Box bilateral filter 404 | Filter = functools.partial(muf.BoxFilter, radius=sigmaS + 1) 405 | 406 | if iteration == 0: 407 | iteration = estimate_iteration(sigmaR * 255, T, eps) 408 | 409 | clip = mvf.Depth(clip, depth=32, sample=vs.FLOAT, **depth_args) 410 | 411 | H = core.std.Expr(clip, f'x {T} - {sigmaR} /') 412 | F = core.std.Expr(H, '-0.5 x dup * * exp') 413 | G = core.std.BlankClip(clip, color=[1] * clip.format.num_planes) 414 | P = core.std.BlankClip(clip, color=[0] * clip.format.num_planes) 415 | Q = core.std.BlankClip(clip, color=[0] * clip.format.num_planes) 416 | Fbar = Filter(F) 417 | 418 | for i in range(1, iteration+1): 419 | sqrt_i = math.sqrt(i) 420 | inv_sqrt_i = 1 / sqrt_i 421 | Q = core.std.Expr([Q, G, Fbar], 'x y z * +') 422 | F = core.std.Expr([H, F], f'x y * {inv_sqrt_i} *') 423 | Fbar = Filter(F) 424 | P = core.std.Expr([P, G, Fbar], f'x y z * {sqrt_i} * +') 425 | G = core.std.Expr([H, G], f'x y * {inv_sqrt_i} *') 426 | 427 | res = core.std.Expr([P, Q], f'x {sigmaR} * y 1e-5 + / {T} +') 428 | 429 | return mvf.Depth(res, depth=bits, sample=sampleType, **depth_args) 430 | 431 | 432 | def XDoG(clip, sigma=1.0, k=1.6, p=20, epsilon=0.7, lamda=0.01): 433 | """XDoG - An eXtended difference-of-Gaussian filter 434 | 435 | Args: 436 | clip: Input clip. 437 | 438 | sigma: (float) Strength of gaussian filter. 439 | Default is 1. 440 | 441 | k: (float) Amplifier of "sigma" for second gaussian filtering. 442 | Default is 1.6. 443 | 444 | p: (float) Amplifier of difference of gaussian. 445 | Default is 20. 446 | 447 | epsilon: (float, 0~1) Threshold of DoG response. Scaled automatically. 448 | Default is 0.7. 449 | 450 | lamda: (float) Multiplier in the thresholding function. 451 | Default is 0.01. 452 | 453 | Ref: 454 | [1] Winnemöller, H., Kyprianidis, J. E., & Olsen, S. C. (2012). XDoG: an extended difference-of-Gaussians compendium including advanced image stylization. Computers & Graphics, 36(6), 740-753. 455 | 456 | """ 457 | 458 | bits = clip.format.bits_per_sample 459 | peak = (1 << bits) - 1 460 | epsilon = muf.scale(epsilon, bits) 461 | 462 | f1 = core.tcanny.TCanny(clip, sigma=sigma, mode=-1) 463 | f2 = core.tcanny.TCanny(clip, sigma=sigma * k, mode=-1) 464 | 465 | return core.std.Expr([f1, f2], f'x y - {p} * x + {epsilon} >= 1 2 2 2 x y - {p} * x + {epsilon} - {lamda} * * exp 1 + / - ? {peak} *') 466 | 467 | 468 | def sbr_detail(clip, r=1, planes=None, mode=1): 469 | """sbr() inspired detail detection algorithm 470 | 471 | Code is modified from sbr() in https://github.com/HomeOfVapourSynthEvolution/havsfunc/blob/master/havsfunc.py. 472 | 473 | args: 474 | clip: RGB/YUV/Gray, 8..16 bit integer, 16..32 bit float. 475 | 476 | r: (int) Radius in pixels of the smoothing filter. 477 | Default is 1. 478 | 479 | planes: (int []) Whether to process the corresponding plane. 480 | By default, every plane will be processed. 481 | The unprocessed planes will be copied from "input". 482 | 483 | mode: (int, 0~2) Detail detection method, insensitive to sensitive. 484 | The result of mode 2 is a combination os mode 1 and mode 2. 485 | Default is 1. 486 | """ 487 | 488 | funcName = 'sbr_detail' 489 | 490 | if not isinstance(clip, vs.VideoNode): 491 | raise TypeError(funcName + ': This is not a clip') 492 | 493 | if planes is None: 494 | planes = list(range(clip.format.num_planes)) 495 | elif isinstance(planes, int): 496 | planes = [planes] 497 | 498 | if clip.format.sample_type == vs.INTEGER: 499 | neutral = 1 << (clip.format.bits_per_sample - 1) 500 | peak = (1 << clip.format.bits_per_sample) - 1 501 | else: # clip.format.sample_type == vs.FLOAT 502 | neutral = 0.5 503 | peak = 1.0 504 | 505 | matrix1 = [1, 2, 1, 2, 4, 2, 1, 2, 1] # RemoveGrain(11) 506 | matrix2 = [1, 1, 1, 1, 1, 1, 1, 1, 1] # RemoveGrain(20) 507 | 508 | RG11 = core.std.Convolution(clip, matrix=matrix1, planes=planes) 509 | for i in range(r - 1): 510 | RG11 = core.std.Convolution(RG11, matrix=matrix2, planes=planes) 511 | 512 | RG11D = core.std.MakeDiff(clip, RG11, planes=planes) 513 | 514 | RG11DS = core.std.Convolution(RG11D, matrix=matrix1, planes=planes) 515 | for i in range(r - 1): 516 | RG11DS = core.std.Convolution(RG11DS, matrix=matrix2, planes=planes) 517 | 518 | if mode == 0: 519 | expr = f'x y - x {neutral} - * 0 < {peak} 0 ?' 520 | elif mode == 1: 521 | expr = f'x y - abs x {neutral} - abs < {peak} 0 ?' 522 | elif mode == 2: 523 | expr = f'x y - x {neutral} - * 0 < x y - abs x {neutral} - abs < or {peak} 0 ?' 524 | 525 | detail_mask = core.std.Expr([RG11D, RG11DS], [expr if i in planes else '' for i in range(clip.format.num_planes)]) 526 | 527 | return detail_mask 528 | 529 | 530 | def fade(clip, start=0, end=None, mode='in', base=None): 531 | """Fade-in/out effect implementation 532 | 533 | args: 534 | clip: RGB/YUV/Gray, 8..16 bit integer, 16..32 bit float. 535 | 536 | start: (int) Frame number of started frame. 537 | Default is 0. 538 | 539 | end: (int) Frame number of ended frame. 540 | Default points to the last frame. 541 | 542 | mode: ("in" or "out") Fade mode. 543 | Default is "in". 544 | 545 | base: (clip) Base clip of fade effect. 546 | Default is black picture. 547 | """ 548 | 549 | funcName = 'fade' 550 | 551 | if not isinstance(clip, vs.VideoNode): 552 | raise TypeError(funcName + ': This is not a clip') 553 | 554 | if end is None: 555 | end = clip.num_frames - 1 556 | 557 | def fade_core(n, clip, start=None, end=None, mode=None, base=None): 558 | if n < start or n > end or end - start <= 0: 559 | return clip 560 | else: 561 | length = end - start 562 | 563 | if mode == 'in': 564 | i = (n - start) / length 565 | elif mode == 'out': 566 | i = (end - n) / length 567 | else: 568 | raise ValueError('Unknown fading mode.') 569 | 570 | if base is None: 571 | y_expr = 'x {} *'.format(i) 572 | 573 | if clip.format.color_family != vs.YUV or clip.format.sample_type == vs.FLOAT: 574 | return core.std.Expr([clip], [y_expr]) 575 | else: 576 | neutral = 1 << (clip.format.bits_per_sample - 1) 577 | uv_expr = 'x {} * {} +'.format(i, (1 - i) * neutral) 578 | return core.std.Expr([clip], [y_expr, uv_expr]) 579 | else: 580 | return core.std.Expr([clip, base], ['x {} * y {} * +'.format(i, 1 - i)]) 581 | 582 | return core.std.FrameEval(clip, functools.partial(fade_core, clip=clip, start=start, end=end, mode=mode, base=base)) 583 | 584 | 585 | def fast_mandelbrot(width=1920, height=1280, iterations=50, 586 | real_range=(-2, 1), imag_range=(-1, 1), c=0+0j, julia_set=False, backend=None): 587 | 588 | import array 589 | 590 | def meshgrid_core(n, f, low, high, horizontal): 591 | assert low < high, f"{low} < {high}" 592 | 593 | f = f.copy() 594 | mem_view = _get_array(f, plane=0, read=False) 595 | height, width = mem_view.shape 596 | 597 | if horizontal: 598 | data = array.array('f', (((high - low) * j / (width - 1) + low) for j in range(width))) 599 | 600 | for i in range(height): 601 | if _is_api4: 602 | for j in range(width): 603 | mem_view[i, j] = data[j] 604 | else: 605 | mem_view[i, :] = data 606 | else: 607 | for i in range(height): 608 | if _is_api4: 609 | for j in range(width): 610 | mem_view[i, j] = (low - high) * i / (height - 1) + high 611 | else: 612 | mem_view[i, :] = array.array('f', [(low - high) * i / (height - 1) + high]) * width 613 | 614 | return f 615 | 616 | c = complex(c) 617 | 618 | ones = core.std.BlankClip(format=vs.GRAYS, width=width, height=height, length=1, color=1) 619 | 620 | if hasattr(core, "akarin"): 621 | features = core.akarin.Version()["expr_features"] 622 | 623 | if b"X" in features and b"width" in features: 624 | z_real = core.akarin.Expr([ones], f"{real_range[1] - real_range[0]} X * width 1 - / {real_range[0]} +") 625 | else: 626 | z_real = core.std.ModifyFrame( 627 | ones, ones, 628 | functools.partial(meshgrid_core, horizontal=True, low=real_range[0], high=real_range[1])) 629 | 630 | if b"Y" in features and b"height" in features: 631 | z_imag = core.akarin.Expr([ones], f"{imag_range[0] - imag_range[1]} Y * height 1 - / {imag_range[1]} +") 632 | else: 633 | z_imag = core.std.ModifyFrame( 634 | ones, ones, 635 | functools.partial(meshgrid_core, horizontal=False, low=imag_range[0], high=imag_range[1])) 636 | 637 | if julia_set: 638 | inner = ( 639 | f"dup2 dup2 * dup0 + {c.imag} + " # new z_imag 640 | f"dup3 dup0 * dup3 dup0 * - {c.real} + " # new z_real 641 | "dup1 dup0 * dup1 dup0 * + 4 < " # mask 642 | "swap1 dup1 swap6 ? " # update z_real 643 | "swap4 swap1 dup1 swap4 ? " # update z_imag 644 | f"swap2 swap1 dup0 {1/iterations} - swap1 ? " # update counter 645 | ) 646 | 647 | expr = f"x y z {inner * iterations} 1 swap2 ? 1 swap2 ?" 648 | 649 | else: 650 | inner = ( 651 | "dup2 dup2 * dup0 + y + " # new z_imag 652 | "dup3 dup0 * dup3 dup0 * - x + " # new z_real 653 | "dup1 dup0 * dup1 dup0 * + 4 < " # mask 654 | "swap1 dup1 swap6 ? " # update z_real 655 | "swap4 swap1 dup1 swap4 ? " # update z_imag 656 | f"swap2 swap1 dup0 {1/iterations} - swap1 ? " # update counter 657 | ) 658 | 659 | expr = f"{c.real} {c.imag} z {inner * iterations} 1 swap2 ? 1 swap2 ?" 660 | 661 | if backend is None: 662 | if hasattr(core, "akarin"): 663 | return core.akarin.Expr([z_real, z_imag, ones], expr) 664 | else: 665 | return core.std.Expr([z_real, z_imag, ones], expr) 666 | else: 667 | return backend([z_real, z_imag, ones], expr) 668 | 669 | -------------------------------------------------------------------------------- /Collections/net_interp.py: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/xinntao/ESRGAN/blob/50fbd2de1d80a014e1c0e1c165913a128f5a8384/net_interp.py 2 | 3 | import argparse 4 | import caffe_pb2 5 | import numpy as np 6 | 7 | 8 | r""" 9 | Network interpolator for waifu2x-caffe 10 | 11 | It applies linear interpolation in the parameter space of two waifu2x-caffe models of the same architecture, 12 | which allows continuous imagery effect transition, e.g. adjusting the denoising strength. 13 | 14 | caffe_pb2 is required, see the "protobuf" part of 15 | https://mxnet.incubator.apache.org/versions/master/faq/caffe.html#how-to-build 16 | 17 | usage: 18 | network_interp.py -m1 ".\upconv_7_anime_style_art_rgb\scale2.0x_model.json.caffemodel" -m2 ".\upconv_7_anime_style_art_rgb\noise0_scale2.0x_model.json.caffemodel" --weight 0.5 19 | 20 | ref: 21 | [1] X. Wang, K. Yu, C. Dong, et al. Deep Network Interpolation for Continuous Imagery Effect Transition. CVPR 2019. 22 | [2] https://github.com/xinntao/DNI 23 | """ 24 | 25 | # parsing parameters 26 | parser = argparse.ArgumentParser(description="Network interpolator for waifu2x-caffe") 27 | parser.add_argument("-m1", "--model_1", type=str, required=True, help="the first model to interpolate (*.caffemodel)") 28 | parser.add_argument("-m2", "--model_2", type=str, required=True, help="the second model to interpolate (*.caffemodel)") 29 | parser.add_argument("-w", "--weight", type=float, required=True, help="weight used for interpolation [0-1]") 30 | parser.add_argument("-o", "--output", type=str, default="interpolated.caffemodel", help="model output file name") 31 | parser.add_argument("-v", "--verbose", type=bool, default=False, help="increase output verbosity") 32 | 33 | args = parser.parse_args() 34 | 35 | model_1_filename = args.model_1 36 | model_2_filename = args.model_2 37 | weight = args.weight 38 | output_filename = args.output 39 | verbose = args.verbose 40 | 41 | # process 42 | print(f"Loading {model_1_filename}\n") 43 | proto_1 = caffe_pb2.NetParameter() 44 | with open(model_1_filename, "rb") as f: 45 | proto_1.ParseFromString(f.read()) 46 | 47 | print(f"Loading {model_2_filename}\n") 48 | proto_2 = caffe_pb2.NetParameter() 49 | with open(model_2_filename, "rb") as f: 50 | proto_2.ParseFromString(f.read()) 51 | 52 | 53 | print(f"Start interpolation with weight={weight}:") 54 | for idx, layer in enumerate(proto_2.layer): 55 | if len(layer.blobs) > 0: 56 | for i in range(len(layer.blobs)): 57 | tmp_1_data = np.asarray(proto_1.layer[idx].blobs[i].data) 58 | tmp_2_data = np.asarray(proto_2.layer[idx].blobs[i].data) 59 | 60 | assert tmp_1_data.shape == tmp_2_data.shape 61 | 62 | if verbose: 63 | print(f'Interpolating layer "{layer.name}": {layer.type}, size={tmp_1_data.shape}') 64 | 65 | proto_1.layer[idx].blobs[i].data[:] = (1.0 - weight) * tmp_1_data + weight * tmp_2_data 66 | else: 67 | if verbose: 68 | print(f'Skipping layer "{layer.name}": {layer.type}') 69 | 70 | print(f"\nSaving interpolated model to {output_filename}") 71 | 72 | with open(output_filename, "wb") as f: 73 | f.write(proto_1.SerializeToString()) 74 | -------------------------------------------------------------------------------- /Collections/resize.py: -------------------------------------------------------------------------------- 1 | def resize(clip, w=None, h=None, sx=0, sy=0, sw=None, sh=None, kernel="spline36", a1=None, a2=None, mpeg2_cplace=True): 2 | """Experimental wrapper function for vszimg resizer in a fmtconv-like API""" 3 | 4 | assert core.version_number() >= 44 5 | 6 | def _expand(shift, num_planes): 7 | if isinstance(shift, (int, float)): 8 | return [shift for _ in range(num_planes)] 9 | elif len(shift) > 0: 10 | _shift = list(shift) 11 | while len(_shift) < num_planes: 12 | _shift.append(_shift[-1]) 13 | return _shift 14 | 15 | if w is None: 16 | w = clip.width 17 | if h is None: 18 | h = clip.height 19 | if sw is None: 20 | sw = clip.width 21 | if sh is None: 22 | sh = clip.height 23 | 24 | kernel = kernel.capitalize() 25 | if kernel == "Bicubic": 26 | a1, a2 = 0, 0.5 27 | elif kernel == "Lanczos": 28 | a1 = 3 29 | 30 | num_planes = clip.format.num_planes 31 | sx = _expand(sx, num_planes) 32 | sy = _expand(sy, num_planes) 33 | sw = _expand(sw, num_planes) 34 | sh = _expand(sh, num_planes) 35 | 36 | if num_planes == 1: 37 | res = eval(f"core.resize.{kernel}")(clip, w, h, src_left=sx[0], src_top=sy[0], src_width=sw[0], src_height=sh[0], filter_param_a=a1, filter_param_b=a2) 38 | else: 39 | # copied from nnedi3_resample.py 40 | hSubS = 1 << clip.format.subsampling_w 41 | hCPlace = 0.5 - hSubS / 2 if mpeg2_cplace else 0 42 | hScale = w / clip.width 43 | 44 | vSubS = 1 << clip.format.subsampling_h 45 | vCPlace = 0 46 | vScale = h / clip.height 47 | 48 | planes = [core.std.ShufflePlanes(clip, i, vs.GRAY) for i in range(num_planes)] 49 | for i in range(num_planes): 50 | if i == 0: 51 | planes[i] = eval(f"core.resize.{kernel}")(planes[0], w, h, src_left=sx[0], src_top=sy[0], src_width=sw[0], src_height=sh[0], filter_param_a=a1, filter_param_b=a2) 52 | else: 53 | planes[i] = eval(f"core.resize.{kernel}")(planes[i], w // (1 << clip.format.subsampling_w), h // (1 << clip.format.subsampling_h), src_left=((sx[i]-hCPlace) * hScale + hCPlace) / hScale / hSubS, src_top=((sy[i]-vCPlace) * vScale + vCPlace) / vScale / vSubS, src_width=sw[i] // (1 << clip.format.subsampling_w), src_height=sh[i] // (1 << clip.format.subsampling_h), filter_param_a=a1, filter_param_b=a2) 54 | res = core.std.ShufflePlanes(planes, [0] * num_planes, clip.format.color_family) 55 | 56 | return res 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # muvsfunc 2 | Muonium's VapourSynth functions 3 | 4 | ## Dependencies 5 | [VapourSynth](https://github.com/vapoursynth/vapoursynth) R39-R57 6 | 7 | ### Scripts 8 | - [mvsfunc](https://github.com/HomeOfVapourSynthEvolution/mvsfunc) 9 | 10 | - [nnedi3_resample](https://github.com/HomeOfVapourSynthEvolution/nnedi3_resample) 11 | 12 | and the dependencies of them. 13 | 14 | ### Plugins 15 | - [AWarpSharp2](https://github.com/dubhater/vapoursynth-awarpsharp2) 16 | 17 | - [Bilateral](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-Bilateral) 18 | 19 | - [CAS](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-CAS) 20 | 21 | - [CTMF](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-CTMF) 22 | 23 | - [descale](https://github.com/Irrational-Encoding-Wizardry/descale) 24 | 25 | - [DFTTest](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-DFTTest) 26 | 27 | - [EEDI2](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-EEDI2) 28 | 29 | - [fmtconv](https://github.com/EleonoreMizo/fmtconv) 30 | 31 | - [misc](https://github.com/vapoursynth/vs-miscfilters-obsolete) (required by VS R55 and later) 32 | 33 | - [MVTools](https://github.com/dubhater/vapoursynth-mvtools) 34 | 35 | - [nnedi3](https://github.com/dubhater/vapoursynth-nnedi3) 36 | 37 | - [RemoveGrain](https://github.com/vapoursynth/vs-removegrain) (required by VS R55 and later) 38 | 39 | - [SangNom](https://bitbucket.org/James1201/vapoursynth-sangnom) 40 | 41 | - [TCanny](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-TCanny) 42 | 43 | - [TemporalMedian](https://github.com/dubhater/vapoursynth-temporalmedian) 44 | 45 | - [VSFilter](https://github.com/HomeOfVapourSynthEvolution/VSFilter) (only required by `TextSub16()`) 46 | 47 | - [VSFilterMod](https://github.com/sorayuki/VSFilterMod) (only required by `TextSub16()`) 48 | 49 | - [vs_mxnet](https://github.com/kice/vs_mxnet) (only required by `super_resolution()`) 50 | 51 | ### Python Packages 52 | - [matplotlib](https://github.com/matplotlib/matplotlib) (only required by `getnative()`) 53 | 54 | - [MXNet](https://github.com/apache/incubator-mxnet) (only required by `super_resolution()`) 55 | 56 | ### Optional dependencies 57 | - [Akarin's Expr](https://github.com/AkarinVS/vapoursynth-plugin) (performance optimizations) 58 | 59 | 60 | ## Files 61 | `muvsfunc.py` is the main script. It contains some algorithms like `GradFun3`, `GuidedFilter`, `TextSub16`, some helper functions like `MergeChroma`, and some ideas that I develop like `LDMerge`, `AnimeMask`. 62 | 63 | `muvsfunc_misc.py` is a complement of the previous script, containing some out-dated algorithms like `SSR`(Single-scale Retinex), some helper functions like `gauss`, `band_merge`, and also one of my idea named `detail_enhancement`. It may or may not be merged to the main script some day. 64 | 65 | `muvsfunc_numpy.py` contains algorithms that are processed in `numpy.ndarray` rather than C/C++. Due to the low performance, they are mainly for research. Here is my current interest. 66 | 67 | `LUM.py` and `SuperRes.py`(it's not the SuperRes in madVR or MPDN) are the dross of history. You won't need to use them. 68 | 69 | ## Resources 70 | 71 | #### **_[OpenCV for VapourSynth](https://github.com/WolframRhodium/muvsfunc/wiki/OpenCV-Python-for-VapourSynth)_** 72 | 73 | #### [muvs tutorial](https://github.com/WolframRhodium/muvsfunc/wiki/muvs-tutorial) 74 | -------------------------------------------------------------------------------- /muvs.py: -------------------------------------------------------------------------------- 1 | """ 2 | An interface to VapourSynth 3 | 4 | *** DO NOT PUBLISH MODULES THAT DEPEND ON THIS *** 5 | 6 | objects: 7 | core (resembles vapoursynth.core) 8 | 9 | functions: 10 | pollute (poisons for foreign modules) 11 | expr (switch for arithmetic expression) 12 | Expr (resembles core.std.Expr(), but with infix expression) 13 | record (computational graph recorder, resembles open()) 14 | Recorder (base class for recorder) 15 | 16 | functions for arithmetic expression: 17 | Abs, Exp, Not, And, Or, Xor, Log, 18 | Sqrt, Min, Max, Conditional 19 | 20 | """ 21 | 22 | from abc import ABC, abstractmethod, abstractstaticmethod 23 | from collections import OrderedDict 24 | import collections.abc 25 | from contextlib import contextmanager 26 | import functools 27 | import inspect 28 | import itertools 29 | import math 30 | import numbers 31 | import operator as op 32 | from typing import Callable, Dict, List, MutableMapping, MutableSet 33 | from typing import Optional, Sequence, Union 34 | import weakref 35 | 36 | import vapoursynth as vs 37 | from vapoursynth import core as _vscore 38 | 39 | 40 | __all__ = [ 41 | "core", "expr", "pollute", "Expr", "record", "Recorder", 42 | "Abs", "Exp", "Not", "And", "Or", "Xor", "Log", "Sqrt", 43 | "Min", "Max", "Conditional"] 44 | 45 | 46 | _is_api4: bool = hasattr(vs, "__api_version__") and vs.__api_version__.api_major == 4 47 | 48 | class _Core: 49 | def __init__(self): 50 | self._registered_funcs = {} # type: Dict[str, Callable[..., '_VideoNode']] 51 | 52 | def __setattr__(self, name, value): 53 | if name in ["num_threads", "max_cache_size"]: 54 | setattr(_vscore, name, value) 55 | else: 56 | if callable(value): 57 | if name[0].isupper() and not hasattr(_vscore, name): 58 | self._registered_funcs[name] = value 59 | else: 60 | raise AttributeError("Attribute name should be capitalized") 61 | else: 62 | vars(self)[name] = value 63 | 64 | def __getattr__(self, name): 65 | try: 66 | attr = getattr(_vscore, name) 67 | except AttributeError as e: 68 | if name in self._registered_funcs: 69 | return self._registered_funcs[name] 70 | else: 71 | raise e 72 | else: 73 | if isinstance(attr, vs.Plugin): 74 | return _Plugin(attr) 75 | else: 76 | return attr 77 | 78 | def __dir__(self) -> List[str]: 79 | return dir(_vscore) + sorted(list(self._registered_funcs.keys())) 80 | 81 | def register_functions(self, **kwargs: Dict[str, Callable[..., '_VideoNode']]): 82 | if all((name[0].isupper() and not hasattr(_vscore, name)) 83 | for name in kwargs.keys()): 84 | 85 | self._registered_funcs.update(kwargs) 86 | else: 87 | raise ValueError("Registration error.") 88 | 89 | core = _Core() 90 | 91 | 92 | arithmetic_expr : bool = False 93 | 94 | @contextmanager 95 | def expr(): 96 | global arithmetic_expr 97 | prev_expr = arithmetic_expr 98 | 99 | arithmetic_expr = True 100 | 101 | try: 102 | yield None 103 | finally: 104 | arithmetic_expr = prev_expr 105 | 106 | 107 | class Recorder: 108 | _live_recorders : MutableSet["Recorder"] = weakref.WeakSet() 109 | 110 | def __init__(self): 111 | self.buffer : List[str] = [] 112 | self.is_recording : bool = False 113 | Recorder._live_recorders.add(self) 114 | 115 | def start_recording(self, include_header=False): 116 | self.is_recording = True 117 | 118 | if include_header: 119 | self.buffer.append( 120 | "import vapoursynth as vs\n" 121 | "from vapoursynth import core\n" 122 | "\n" 123 | f"core.num_threads = {core.num_threads}\n" 124 | f"core.max_cache_size = {core.max_cache_size}\n" 125 | "\n") 126 | 127 | def end_recording(self, filename_or_stream, mode='a', **open_kwargs): 128 | self.is_recording = False 129 | 130 | if self.buffer: 131 | if isinstance(filename_or_stream, str): 132 | with open(filename_or_stream, mode=mode, **open_kwargs) as f: 133 | f.writelines(self.buffer) 134 | else: 135 | stream = filename_or_stream 136 | stream.writelines(self.buffer) 137 | 138 | self.buffer.clear() 139 | 140 | def write(self, text): 141 | assert isinstance(text, str) 142 | self.buffer.append(text) 143 | 144 | 145 | @contextmanager 146 | def record(filename_or_stream, mode='a', include_header=False, **open_kwargs): 147 | recorder = Recorder() 148 | 149 | recorder.start_recording(include_header) 150 | 151 | try: 152 | yield recorder 153 | finally: 154 | recorder.end_recording(filename_or_stream=filename_or_stream, mode=mode, **open_kwargs) 155 | 156 | 157 | def _build_repr() -> Callable[..., str]: 158 | _clip_name_mapping = weakref.WeakKeyDictionary() # type: MutableMapping[vs.VideoNode, str] 159 | counter = 0 160 | 161 | def closure(obj, default_prefix="unknown") -> str: 162 | if isinstance(obj, vs.VideoNode): 163 | if obj in _clip_name_mapping: 164 | return _clip_name_mapping[obj] 165 | 166 | else: 167 | nonlocal counter 168 | name = f"{default_prefix}{counter}" 169 | _clip_name_mapping[obj] = name 170 | counter += 1 171 | return name 172 | 173 | elif isinstance(obj, _VideoNode): 174 | return closure(obj._node, default_prefix) 175 | 176 | elif isinstance(obj, collections.abc.Sequence) and not isinstance(obj, (str, bytes, bytearray)): 177 | return f"[{', '.join(closure(elem, default_prefix) for elem in obj)}]" 178 | 179 | elif isinstance(obj, ( 180 | vs.ColorFamily, vs.SampleType, 181 | getattr(vs, "PresetFormat", getattr(vs, "PresetVideoFormat", None)) 182 | )): 183 | return f"vs.{obj.name}" 184 | 185 | elif isinstance(obj, (vs.VideoFormat if _is_api4 else vs.Format)): 186 | arg_str = ', '.join(f"{k}={closure(v)}" for k, v in obj._as_dict().items()) 187 | return f"core.query_video_format({arg_str})" if _is_api4 else f"core.register_format({arg_str})" 188 | 189 | else: 190 | return repr(obj) 191 | 192 | return closure 193 | 194 | _repr = _build_repr() 195 | 196 | 197 | class _Plugin: 198 | def __init__(self, plugin: vs.Plugin, injected_clip: Optional[vs.VideoNode] = None): 199 | if isinstance(plugin, vs.Plugin): 200 | self._plugin = plugin 201 | else: 202 | raise TypeError(f"{type(self).__name__!r}: Unknown plugin ({type(plugin)})") 203 | 204 | if injected_clip is None or isinstance(injected_clip, vs.VideoNode): 205 | self._injected_clip = injected_clip 206 | else: 207 | raise TypeError(f"{type(self).__name__!r}: Unknown injected clip ({type(injected_clip)})") 208 | 209 | def __getattr__(self, function_name): 210 | attr = getattr(self._plugin, function_name) 211 | 212 | if isinstance(attr, vs.Function): 213 | func = attr 214 | 215 | @functools.wraps(func) 216 | def closure(*args, **kwargs): 217 | if self._injected_clip is not None: 218 | args = (self._injected_clip, ) + args 219 | 220 | def get_node(obj): 221 | if isinstance(obj, vs.VideoNode): 222 | return obj 223 | elif isinstance(obj, _VideoNode): 224 | return obj._node 225 | elif isinstance(obj, _ArithmeticExpr): 226 | return obj.compute()._node 227 | elif ( 228 | isinstance(obj, collections.abc.Sequence) and 229 | not isinstance(obj, (str, bytes, bytearray)) 230 | ): 231 | return type(obj)(get_node(item) for item in obj) 232 | elif callable(obj): 233 | class _remove_wrap: 234 | """Fixes callables that returns VideoNode""" 235 | def __init__(self, func): 236 | self.func = func 237 | 238 | def __call__(self, *args, **kwargs): 239 | output = self.func(*args, **kwargs) 240 | if isinstance(output, _VideoNode): 241 | output = output._node 242 | return output 243 | 244 | def __repr__(self): 245 | return repr(self.func) 246 | 247 | return _remove_wrap(obj) 248 | else: 249 | return obj 250 | 251 | def get_key(key): 252 | if key.startswith('_'): 253 | return key[1:] 254 | else: 255 | return key 256 | 257 | args = get_node(args) 258 | kwargs = dict((get_key(key), get_node(value)) for key, value in kwargs.items()) 259 | 260 | func_arg_names = ( 261 | key[:key.find(':')] 262 | for key in func.signature.split(';') 263 | if key != '') 264 | 265 | for _, arg_name in zip(args, func_arg_names): 266 | if arg_name in kwargs: 267 | raise TypeError( 268 | f"{func.plugin.namespace}.{func.name}() " 269 | f"got multiple values for argument \'{arg_name}\'") 270 | 271 | # process 272 | output = func(*args, **kwargs) 273 | 274 | if isinstance(output, vs.VideoNode): 275 | _ = _repr(output, default_prefix="clip") # register output 276 | 277 | for recorder in Recorder._live_recorders: 278 | if recorder.is_recording: 279 | recorder.buffer.append(self._get_str(func, args, kwargs, output) + '\n') 280 | 281 | return _VideoNode(output) 282 | elif isinstance(output, list) and len(output) > 0 and isinstance(output[0], vs.VideoNode): 283 | for item in output: 284 | _ = _repr(item, default_prefix="clip") # register output 285 | 286 | for recorder in Recorder._live_recorders: 287 | if recorder.is_recording: 288 | recorder.buffer.append(self._get_str(func, args, kwargs, output, check_output=False) + '\n') 289 | 290 | return list(_VideoNode(item) for item in output) 291 | else: 292 | return output 293 | 294 | return closure 295 | 296 | else: 297 | return attr 298 | 299 | def __hash__(self): 300 | return hash(self._plugin) 301 | 302 | def __dir__(self): 303 | return dir(self._plugin) 304 | 305 | @staticmethod 306 | def _get_str(func: vs.Function, args, kwargs, output, check_output=True): 307 | output_str = "" 308 | 309 | if check_output: 310 | def diff_str(clip1: vs.VideoNode, clip2: vs.VideoNode): 311 | """Compare two clips and output a string of their difference""" 312 | res = [] 313 | for attr in ["width", "height", "num_frames"]: 314 | if getattr(clip1, attr) != getattr(clip2, attr): 315 | res.append(f"{attr}: {getattr(clip1, attr)} -> {getattr(clip2, attr)}") 316 | if clip1.format.name != clip2.format.name: 317 | res.append(f"format: {clip1.format.name} -> {clip2.format.name}") 318 | if clip1.fps != clip2.fps: 319 | res.append(f"fps: {clip1.fps_num}/{clip1.fps_den} -> {clip2.fps_num}/{clip2.fps_den}") 320 | return ', '.join(res) 321 | 322 | if len(args) > 0 and isinstance(args[0], vs.VideoNode): 323 | if diff_str(args[0], output) != "": 324 | output_str += f"# {diff_str(args[0], output)}\n" 325 | elif kwargs.get("clip", None): 326 | if diff_str(kwargs["clip"], output) != "": 327 | output_str += f"# {diff_str(kwargs['clip'], output)}\n" 328 | else: 329 | output_str += (f"# output: {output.width} x {output.height}, {output.format.name}, " 330 | f"{output.num_frames} frames, {output.fps_num}/{output.fps_den} fps\n") 331 | 332 | args_dict = inspect.signature(func).bind(*args, **kwargs).arguments 333 | 334 | # replace clip in args_dict.values() with name of clip 335 | call_args = ', '.join(f"{k}={_repr(v)}" for k, v in args_dict.items() if v is not None) 336 | call_str = f"core.{func.plugin.namespace}.{func.name}({call_args})" 337 | 338 | output_str += f"{_repr(output, default_prefix='clip')} = {call_str}\n" 339 | 340 | return output_str 341 | 342 | 343 | ########################## Expr IR Start ########################## 344 | class ExprIR(ABC): 345 | """ AST-style expression """ 346 | 347 | @abstractmethod 348 | def __eq__(self, other): 349 | pass 350 | 351 | @abstractmethod 352 | def __repr__(self): 353 | """ Infix and function call style """ 354 | pass 355 | 356 | @abstractmethod 357 | def __str__(self): 358 | """ Postfix style """ 359 | pass 360 | 361 | class DupN(ExprIR): 362 | def __eq__(self, other): 363 | return isinstance(other, DupN) 364 | 365 | def __repr__(self): 366 | return "DupN()" 367 | 368 | def __str__(self): 369 | return "dup" 370 | dup = DupN() 371 | 372 | class UnaryBaseOp(ExprIR): 373 | @abstractstaticmethod 374 | def cast(x): 375 | pass 376 | 377 | def __init__(self, x): 378 | self.x = self.cast(x) 379 | 380 | def __eq__(self, other): 381 | return isinstance(other, type(self)) and self.x == other.x 382 | 383 | def __repr__(self): 384 | return f"{type(self).__name__}({self.x!r})" 385 | 386 | def __str__(self): 387 | return f"{self.x!s} {type(self).__name__.lower()}" 388 | 389 | class ConstantN(UnaryBaseOp): 390 | def __str__(self): 391 | return f"{self.x!s}" 392 | 393 | @staticmethod 394 | def cast(x): 395 | assert isinstance(x, numbers.Real) 396 | return x 397 | ConstantN_0 = ConstantN(0) 398 | ConstantN_1 = ConstantN(1) 399 | 400 | class VarN(UnaryBaseOp): 401 | def __eq__(self, other): 402 | return isinstance(other, VarN) and hash(self.x) == hash(other.x) 403 | 404 | def __str__(self): 405 | return f"{self.x!s}" 406 | 407 | @staticmethod 408 | def cast(x): 409 | assert isinstance(x, _VideoNode) 410 | return x 411 | 412 | def Cast(x): 413 | if isinstance(x, ExprIR): 414 | return x 415 | elif isinstance(x, numbers.Real): 416 | return ConstantN(x) 417 | elif isinstance(x, _VideoNode): 418 | return VarN(x) 419 | elif isinstance(x, vs.VideoNode): 420 | return VarN(_VideoNode(x)) 421 | else: 422 | raise TypeError(f"Unkonwn input ({type(x)})") 423 | 424 | class UnaryOp(UnaryBaseOp): 425 | @abstractstaticmethod 426 | def compute(x): 427 | pass 428 | 429 | def __str__(self): 430 | return f"{self.x!s} {self.op_name}" 431 | 432 | @staticmethod 433 | def cast(x): 434 | return Cast(x) 435 | 436 | class NotN(UnaryOp): 437 | op_name = "not" 438 | compute = op.not_ 439 | 440 | class AbsN(UnaryOp): 441 | op_name = "abs" 442 | compute = abs 443 | 444 | class SqrtN(UnaryOp): 445 | op_name = "sqrt" 446 | compute = math.sqrt 447 | 448 | class LogN(UnaryOp): 449 | op_name = "log" 450 | compute = math.log 451 | 452 | class ExpN(UnaryOp): 453 | op_name = "exp" 454 | compute = math.exp 455 | 456 | class BinaryOp(ExprIR): 457 | @abstractstaticmethod 458 | def compute(x, y): 459 | pass 460 | 461 | def __init__(self, x, y): 462 | self.x, self.y = self.cast(x, y) 463 | 464 | def __eq__(self, other): 465 | return ( 466 | isinstance(other, type(self)) and 467 | self.x == other.x and 468 | self.y == other.y 469 | ) 470 | 471 | def __repr__(self): 472 | return f"{type(self).__name__}({self.x!r}, {self.y!r})" 473 | 474 | def __str__(self): 475 | return f"{self.x!s} {self.y!s} {self.op_name}" 476 | 477 | @staticmethod 478 | def cast(x, y): 479 | return Cast(x), Cast(y) 480 | 481 | class AddN(BinaryOp): 482 | op_name = "+" 483 | compute = op.add 484 | 485 | class SubN(BinaryOp): 486 | op_name = "-" 487 | compute = op.sub 488 | 489 | class MulN(BinaryOp): 490 | op_name = "*" 491 | compute = op.mul 492 | 493 | class DivN(BinaryOp): 494 | op_name = "/" 495 | compute = op.truediv 496 | 497 | class PowN(BinaryOp): 498 | op_name = "pow" 499 | compute = op.pow 500 | 501 | class AndN(BinaryOp): 502 | op_name = "and" 503 | compute = op.and_ 504 | 505 | class OrN(BinaryOp): 506 | op_name = "or" 507 | compute = op.or_ 508 | 509 | class XorN(BinaryOp): 510 | op_name = "xor" 511 | compute = op.xor 512 | 513 | class LtN(BinaryOp): 514 | op_name = "<" 515 | compute = op.lt 516 | 517 | class LeN(BinaryOp): 518 | op_name = "<=" 519 | compute = op.le 520 | 521 | class EqN(BinaryOp): 522 | op_name = "=" 523 | compute = op.eq 524 | 525 | class NeN(BinaryOp): 526 | op_name = "= not" 527 | compute = op.ne 528 | 529 | class GeN(BinaryOp): 530 | op_name = ">=" 531 | compute = op.ge 532 | 533 | class GtN(BinaryOp): 534 | op_name = ">" 535 | compute = op.gt 536 | 537 | class MaxN(BinaryOp): 538 | op_name = "max" 539 | compute = max 540 | 541 | class MinN(BinaryOp): 542 | op_name = "min" 543 | compute = min 544 | 545 | class ConditionalN(ExprIR): 546 | def __init__(self, x, y, z): 547 | self.x, self.y, self.z = self.cast(x, y, z) 548 | 549 | def __eq__(self, other): 550 | return ( 551 | isinstance(other, ConditionalN) and 552 | self.x == other.x and 553 | self.y == other.y and 554 | self.z == other.z 555 | ) 556 | 557 | def __repr__(self): 558 | return f"ConditionalN({self.x!r}, {self.y!r}, {self.z!r})" 559 | 560 | def __str__(self): 561 | return f"{self.x!s} {self.y!s} {self.z!s} ?" 562 | 563 | @staticmethod 564 | def cast(x, y, z): 565 | return Cast(x), Cast(y), Cast(z) 566 | 567 | @staticmethod 568 | def compute(x, y, z): 569 | return y if x else z 570 | 571 | def _simplify(expr: ExprIR) -> ExprIR: 572 | assert isinstance(expr, ExprIR) 573 | 574 | while True: 575 | prev_expr = expr 576 | 577 | # early skipping 578 | if isinstance(expr, (DupN, ConstantN, VarN)): 579 | return expr 580 | # constant foldings and universal eliminations 581 | elif isinstance(expr, UnaryOp) and isinstance(expr.x, ConstantN): 582 | # num op -> op(num) 583 | return ConstantN(expr.compute(expr.x.x)) 584 | elif isinstance(expr, BinaryOp): 585 | if isinstance(expr.x, ConstantN): 586 | if isinstance(expr.y, ConstantN): 587 | # num1 num2 op -> op(num1, num2) 588 | return ConstantN(expr.compute(expr.x.x, expr.y.x)) 589 | elif expr.y == DupN: 590 | # num dup op -> op(num, num) 591 | return ConstantN(expr.compute(expr.x.x, expr.x.x)) 592 | elif expr.x == expr.y: 593 | # x x op -> x dup op 594 | expr = type(expr)(expr.x, dup) 595 | 596 | # operator specific simplifications 597 | if isinstance(expr, SqrtN): 598 | x = _simplify(expr.x) 599 | if isinstance(x, MulN) and isinstance(x.y, DupN): 600 | # x dup * sqrt -> x abs 601 | expr = AbsN(x.x) 602 | else: 603 | expr = SqrtN(x) 604 | elif isinstance(expr, LogN): 605 | x = _simplify(expr.x) 606 | if isinstance(x, ExpN): 607 | # x exp log -> x 608 | expr = x.x 609 | else: 610 | expr = LogN(x) 611 | elif isinstance(expr, ExpN): 612 | x = _simplify(expr.x) 613 | if isinstance(x, LogN): 614 | # x log exp -> x 615 | expr = x.x 616 | else: 617 | expr = ExpN(x) 618 | elif isinstance(expr, AddN): 619 | if expr.x == ConstantN_0: 620 | # 0 x + -> x 621 | expr = expr.y 622 | elif expr.y == ConstantN_0: 623 | # x 0 + -> x 624 | expr = expr.x 625 | elif isinstance(expr, SubN): 626 | if isinstance(expr.y, DupN): 627 | # x dup - -> 0 628 | return ConstantN_0 629 | elif expr.y == ConstantN_0: 630 | # x 0 - -> x 631 | expr = expr.x 632 | elif isinstance(expr, MulN): 633 | if expr.x == ConstantN_1: 634 | # 1 x * -> x 635 | expr = expr.y 636 | elif expr.y == ConstantN_1: 637 | # x 1 * -> x 638 | expr = expr.x 639 | elif isinstance(expr, DivN): 640 | if isinstance(expr.y, DupN): 641 | # x dup / -> 1 642 | return ConstantN_1 643 | elif expr.y == ConstantN_1: 644 | # x 1 / -> x 645 | expr = expr.x 646 | elif isinstance(expr, PowN): 647 | if isinstance(expr.x, ConstantN): 648 | if expr.x == ConstantN_0: 649 | # 0 x pow -> 0 650 | expr = ConstantN_0 651 | elif expr.x == ConstantN_1: 652 | # 1 x pow -> 1 653 | expr = ConstantN_1 654 | elif expr.x == ConstantN(math.e): 655 | # math.e x pow -> x exp 656 | expr = ExpN(expr.y) 657 | elif isinstance(expr.y, ConstantN): 658 | if expr.y == ConstantN_0: 659 | # x 0 pow -> 1 660 | expr = ConstantN_1 661 | elif expr.y == ConstantN_1: 662 | # x 1 pow -> x 663 | expr = expr.x 664 | elif expr.y == ConstantN(2): 665 | # x 2 pow -> x dup * 666 | expr = MulN(expr.x, dup) 667 | elif expr.y == ConstantN(0.5): 668 | # x 0.5 pow -> x sqrt 669 | expr = SqrtN(expr.x) 670 | elif expr.y == ConstantN(-0.5): 671 | # x -0.5 pow -> x dup sqrt / 672 | expr = DivN(expr.x, SqrtN(dup)) 673 | elif isinstance(expr, (MaxN, MinN)) and isinstance(expr.y, DupN): 674 | # x dup {max/min} -> x 675 | expr = expr.x 676 | elif isinstance(expr, ConditionalN): 677 | if isinstance(expr.x, ConstantN): 678 | # num x y ? -> (num ? x : y) 679 | expr = ConditionalN.compute(expr.x, expr.y, expr.z) 680 | elif expr.y == expr.z: 681 | # _ x x ? -> x 682 | expr = expr.y 683 | 684 | # non-local simplification of binary operations 685 | if isinstance(expr, BinaryOp): 686 | expr = type(expr)(_simplify(expr.x), _simplify(expr.y)) 687 | 688 | if isinstance(expr.x, ConstantN): 689 | if isinstance(expr.y, UnaryOp): 690 | if isinstance(expr.y.x, DupN): 691 | # num dup op1 op2 -> num num op1 op2 692 | expr = type(expr)(expr.x, type(expr.y)(expr.x)) 693 | elif isinstance(expr.y, BinaryOp): 694 | if isinstance(expr.y.x, DupN): 695 | # num dup x op1 op2 -> num num1 x op1 op2 696 | expr = type(expr)(expr.x, type(expr.y)(expr.x, expr.y.y)) 697 | elif isinstance(expr.y, BinaryOp) and expr.x == expr.y.x: 698 | # x x y op1 op2 -> x dup y op1 op2 699 | expr = type(expr)(expr.x, type(expr.y)(dup, expr.y.y)) 700 | 701 | if expr == prev_expr: 702 | # no progress 703 | return expr 704 | else: 705 | prev_expr = expr 706 | # continue 707 | 708 | def postfix(expr: ExprIR, namer: Optional[Callable[[VarN], str]] = None) -> str: 709 | assert isinstance(expr, ExprIR) 710 | 711 | if isinstance(expr, ConstantN): 712 | return str(expr) 713 | elif isinstance(expr, VarN): 714 | if namer is None: 715 | return str(expr) 716 | else: 717 | return namer(expr) 718 | elif isinstance(expr, DupN): 719 | return "dup" 720 | elif isinstance(expr, UnaryOp): 721 | return f"{postfix(expr.x, namer)} {expr.op_name}" 722 | elif isinstance(expr, BinaryOp): 723 | first = postfix(expr.x, namer) 724 | return f"{first} {postfix(expr.y, namer)} {expr.op_name}" 725 | elif isinstance(expr, ConditionalN): 726 | first = postfix(expr.x, namer) 727 | second = postfix(expr.y, namer) 728 | return f"{first} {second} {postfix(expr.z, namer)} ?" 729 | else: 730 | raise TypeError(f"Unknwon type {type(expr)}") 731 | 732 | 733 | def infix(expr: ExprIR, namer: Optional[Callable[[VarN], str]] = None, 734 | top: Optional[str] = None 735 | ) -> str: 736 | assert isinstance(expr, ExprIR) 737 | 738 | if isinstance(expr, ConstantN): 739 | return str(expr) 740 | elif isinstance(expr, VarN): 741 | if namer is None: 742 | return str(expr) 743 | else: 744 | return namer(expr) 745 | elif isinstance(expr, DupN): 746 | if top: 747 | return top 748 | else: 749 | raise ValueError("Empty dup node") 750 | elif isinstance(expr, UnaryOp): 751 | return f"{expr.op_name}({infix(expr.x, namer, top=top)})" 752 | elif isinstance(expr, BinaryOp): 753 | first = infix(expr.x, namer, top=top) 754 | return f"({first} {expr.op_name} {infix(expr.y, namer, top=first)})" 755 | elif isinstance(expr, ConditionalN): 756 | first = infix(expr.x, namer, top=top) 757 | second = infix(expr.y, namer, top=first) 758 | return f"({second} if {first} else {infix(expr.z, namer, top=second)})" 759 | else: 760 | raise TypeError(f"Unknwon type {type(expr)}") 761 | 762 | ########################## Expr IR End ########################## 763 | 764 | def namer_factory(): 765 | alphabet = "xyzabcdefghijklmnopqrstuvw" 766 | mapping = OrderedDict() # type: MutableMapping[_VideoNode, str] 767 | 768 | def namer(obj: VarN) -> str: 769 | x = obj.x 770 | if x in mapping or len(mapping) < len(alphabet): 771 | return mapping.setdefault(x, f"{alphabet[len(mapping)]}") 772 | else: 773 | raise RuntimeError(f"{type(self).__name__!r}: Too many nodes") 774 | 775 | return namer 776 | 777 | 778 | class _Fake_VideoNode: 779 | """ Fake VideoNode used to bypass instance check in other scripts """ 780 | pass 781 | 782 | 783 | class _ArithmeticExpr(_Fake_VideoNode): 784 | def __init__(self, obj): 785 | self._expr = Cast(obj) # type: ExprIR 786 | self._cached_clip = None # type: Optional[_VideoNode] 787 | 788 | def __getattr__(self, name): 789 | if hasattr(_vscore, name) or hasattr(self.clips[0], name): 790 | return getattr(self.compute(), name) 791 | else: 792 | raise AttributeError(f"{type(self).__name__!r} object has no attribute {name!r}") 793 | 794 | def __bool__(self): 795 | raise RuntimeError("Impossible") 796 | 797 | def __hash__(self): 798 | return hash(self.clips + (self.expr,)) 799 | 800 | def __str__(self): 801 | def namer(x: VarN): 802 | return _repr(x.x) 803 | return infix(self._expr, namer=namer).strip("()") 804 | 805 | @property 806 | def clips(self): 807 | from collections import OrderedDict 808 | 809 | clips_dict = OrderedDict() 810 | exprs = [self._expr] 811 | 812 | while exprs: 813 | expr = exprs.pop() 814 | if isinstance(expr, VarN): 815 | clips_dict.setdefault(expr.x, None) 816 | elif isinstance(expr, UnaryOp): 817 | exprs.append(expr.x) 818 | elif isinstance(expr, BinaryOp): 819 | exprs.extend([expr.y, expr.x]) 820 | elif isinstance(expr, ConditionalN): 821 | exprs.extend([expr.z, expr.y, expr.x]) 822 | 823 | return tuple(clips_dict.keys()) 824 | 825 | def get_expr(self, namer) -> str: 826 | return postfix(self._expr, namer=namer) 827 | 828 | @property 829 | def expr(self) -> str: 830 | return self.get_expr(namer=namer_factory()) 831 | 832 | @property 833 | def lut_func(self) -> Callable[..., numbers.Integral]: 834 | clips = self.clips 835 | 836 | assert len(clips) in [1, 2] 837 | 838 | func_impl = infix(self._expr, namer=namer_factory()) 839 | func_impl = f"min(max(int({func_impl} + 0.5), 0), {(2 ** clips[0].format.bits_per_sample) - 1})" # clamp 840 | 841 | if len(clips) == 1: 842 | lut_str = f"lambda x: {func_impl}" 843 | else: # len(clips) == 2 844 | lut_str = f"lambda x, y: {func_impl}" 845 | 846 | class _LambdaFunction: 847 | def __init__(self, func_str: str): 848 | self.func = eval(func_str, {"exp": math.exp, "log": math.log, "sqrt": math.sqrt}) 849 | self.func_str = func_str 850 | 851 | def __call__(self, *args, **kwargs): 852 | return self.func(*args, **kwargs) 853 | 854 | def __repr__(self): 855 | return self.func_str 856 | 857 | return _LambdaFunction(lut_str) 858 | 859 | def compute(self, planes=None, bits=None, use_lut=None, 860 | simplify: Union[bool, Callable[[ExprIR], ExprIR]] = True 861 | ) -> '_VideoNode': 862 | 863 | if arithmetic_expr: 864 | cacheable = planes is None and bits is None and use_lut is None 865 | 866 | if cacheable and self._cached_clip is not None: 867 | return self._cached_clip 868 | 869 | if simplify: 870 | if callable(simplify): 871 | self._expr = simplify(self._expr) 872 | else: 873 | self._expr = _simplify(self._expr) 874 | 875 | if len(self.clips) == 0: 876 | raise ValueError("ArithmeticExpr becomes empty") 877 | 878 | if self.expr in ['', 'x']: # empty expr 879 | return _VideoNode(self.clips[0]._node) 880 | else: 881 | clips = self.clips 882 | if len(clips) > 26: 883 | raise RuntimeError("Too many clips.") 884 | 885 | if bits is None: 886 | not_equal_bits = ( 887 | lambda clip1, clip2: 888 | clip1.format.bits_per_sample != clip2.format.bits_per_sample) 889 | 890 | if len(clips) >= 2 and any(not_equal_bits(clips[0], clip) for clip in clips[1:]): 891 | raise ValueError('"bits" must be specified.') 892 | else: 893 | bits = clips[0].format.bits_per_sample 894 | 895 | is_int = lambda clip: clip.format.sample_type == vs.INTEGER 896 | get_bits = lambda clip: clip.format.bits_per_sample 897 | lut_available = ( 898 | lambda clips: 899 | len(clips) <= 2 and all(map(is_int, clips)) and sum(map(get_bits, clips)) <= 20) 900 | 901 | if use_lut is None: 902 | use_lut = lut_available(clips) and len(self.expr.split()) >= 15 903 | elif use_lut and not lut_available(clips): 904 | raise ValueError("Lut computation is not available") 905 | 906 | # process 907 | if use_lut: # std.Lut() / std.Lut2() 908 | if len(clips) == 1: 909 | res = core.std.Lut(clips[0], planes=planes, bits=bits, function=self.lut_func) 910 | else: # len(clips) == 2 911 | res = core.std.Lut2( 912 | clips[0], clips[1], planes=planes, bits=bits, function=self.lut_func) 913 | 914 | else: # std.Expr() 915 | if planes is None: 916 | expr = self.expr 917 | else: 918 | if isinstance(planes, int): 919 | planes = [planes] 920 | 921 | expr = [ 922 | (self.expr if i in planes else "") 923 | for i in range(clips[0].format.num_planes)] 924 | 925 | in_format = clips[0].format 926 | 927 | if bits == in_format.bits_per_sample: 928 | out_format = None 929 | else: 930 | query_video_format = core.query_video_format if _is_api4 else core.register_format 931 | out_format = query_video_format( 932 | color_family=in_format.color_family, 933 | sample_type=vs.INTEGER if bits <= 16 else vs.FLOAT, 934 | bits_per_sample=bits, 935 | subsampling_w=in_format.subsampling_w, 936 | subsampling_h=in_format.subsampling_h 937 | ) 938 | 939 | res = core.std.Expr(clips=clips, expr=expr, format=out_format) 940 | 941 | if cacheable: 942 | self._cached_clip = res 943 | 944 | return res 945 | 946 | else: 947 | raise RuntimeError("Arithmetic expression is disabled.") 948 | 949 | # Arithmetic methods 950 | def _operate(self, 951 | op: Union[UnaryOp, BinaryOp, ConditionalN], 952 | *operands: Sequence[Union[numbers.Real, vs.VideoNode, "_VideoNode", ExprIR]] 953 | ) -> "_ArithmeticExpr": 954 | unwrap = lambda x: x._expr if isinstance(x, type(self)) else x 955 | result = op(*map(unwrap, operands)) 956 | return type(self)(result) 957 | 958 | # unary operations 959 | def __neg__(self): 960 | return self._operate(SubN, 0, self) 961 | 962 | def __pos__(self): 963 | return self 964 | 965 | def __abs__(self): 966 | return self._operate(AbsN, self) 967 | 968 | def __invert__(self): 969 | return self._operate(NotN, self) 970 | 971 | def __exp__(self): 972 | return self._operate(ExpN, self) 973 | 974 | def __log__(self): 975 | return self._operate(LogN, self) 976 | 977 | def __sqrt__(self): 978 | return self._operate(SqrtN, self) 979 | 980 | # binary operations 981 | def __lt__(self, other): 982 | return self._operate(LtN, self, other) 983 | 984 | def __le__(self, other): 985 | return self._operate(LeN, self, other) 986 | 987 | def __eq__(self, other): 988 | return self._operate(EqN, self, other) 989 | 990 | def __ne__(self, other): 991 | return self._operate(NeN, self, other) 992 | 993 | def __gt__(self, other): 994 | return self._operate(GtN, self, other) 995 | 996 | def __ge__(self, other): 997 | return self._operate(GeN, self, other) 998 | 999 | def __add__(self, other): 1000 | return self._operate(AddN, self, other) 1001 | 1002 | def __radd__(self, other): 1003 | return self._operate(AddN, other, self) 1004 | 1005 | def __sub__(self, other): 1006 | return self._operate(SubN, self, other) 1007 | 1008 | def __rsub__(self, other): 1009 | return self._operate(SubN, other, self) 1010 | 1011 | def __mul__(self, other): 1012 | return self._operate(MulN, self, other) 1013 | 1014 | def __rmul__(self, other): 1015 | return self._operate(MulN, other, self) 1016 | 1017 | def __truediv__(self, other): 1018 | return self._operate(DivN, self, other) 1019 | 1020 | def __rtruediv__(self, other): 1021 | return self._operate(DivN, other, self) 1022 | 1023 | def __pow__(self, other, module=None): 1024 | if module is None: 1025 | return self._operate(PowN, self, other) 1026 | else: 1027 | raise NotImplemented 1028 | 1029 | def __rpow__(self, other): 1030 | return self._operate(PowN, other, self) 1031 | 1032 | def __and__(self, other): 1033 | return self._operate(AndN, self, other) 1034 | 1035 | def __rand__(self, other): 1036 | return self._operate(AndN, other, self) 1037 | 1038 | def __or__(self, other): 1039 | return self._operate(OrN, self, other) 1040 | 1041 | def __ror__(self, other): 1042 | return self._operate(OrN, other, self) 1043 | 1044 | def __xor__(self, other): 1045 | return self._operate(XorN, self, other) 1046 | 1047 | def __rxor__(self, other): 1048 | return self._operate(XorN, other, self) 1049 | 1050 | # custom binary operations 1051 | def __max__(self, other): 1052 | return self._operate(MaxN, self, other) 1053 | 1054 | def __rmax__(self, other): 1055 | return self._operate(MaxN, other, self) 1056 | 1057 | def __min__(self, other): 1058 | return self._operate(MinN, self, other) 1059 | 1060 | def __rmin__(self, other): 1061 | return self._operate(MinN, other, self) 1062 | 1063 | # custom ternary operation 1064 | def __conditional__(self, other_true, other_false): 1065 | return self._operate(ConditionalN, self, other_true, other_false) 1066 | 1067 | def __rconditional__(self, other_condition, other_false): 1068 | return self._operate(ConditionalN, other_true, self, other_false) 1069 | 1070 | def __rrconditional__(self, other_condition, other_true): 1071 | return self._operate(ConditionalN, other_true, other_false, self) 1072 | 1073 | 1074 | def _build_VideoNode(fake_vn=None): 1075 | _plane_idx_mapping = { 1076 | vs.YUV: {'Y': 0, 'U': 1, 'V': 2}, 1077 | vs.RGB: {'R': 0, 'G': 1, 'B': 2}, 1078 | vs.GRAY: {'GRAY': 0, 'Y': 0} 1079 | } 1080 | 1081 | def __init__(self, node: vs.VideoNode): 1082 | if not isinstance(node, vs.VideoNode): 1083 | raise TypeError(f"{type(self).__name__!r}: Unknown input ({type(node)})") 1084 | self._node = node 1085 | 1086 | def __getattr__(self, name): 1087 | if name[0].isupper(): # non-standard attributes 1088 | if (self.format.color_family in _plane_idx_mapping and 1089 | name in _plane_idx_mapping[self.format.color_family]): 1090 | 1091 | idx = _plane_idx_mapping[self.format.color_family][name] 1092 | return self.std.ShufflePlanes(planes=idx, colorfamily=vs.GRAY) 1093 | 1094 | elif hasattr(core, name): 1095 | func = getattr(core, name) 1096 | return functools.partial(func, self) 1097 | else: 1098 | raise AttributeError(f"{type(self).__name__!r} object has no attribute {name!r}") 1099 | 1100 | elif hasattr(_vscore, name) and isinstance(getattr(_vscore, name), vs.Plugin): 1101 | plugin = getattr(_vscore, name) 1102 | return _Plugin(plugin, self._node) 1103 | else: 1104 | attr = getattr(self._node, name) 1105 | 1106 | if callable(attr): # set_output(), etc 1107 | @functools.wraps(attr) 1108 | def closure(*args, **kwargs): 1109 | for recorder in Recorder._live_recorders: 1110 | if recorder.is_recording: 1111 | args_str = ', '.join(map(_repr, args)) 1112 | kwargs_str = ', '.join(f"{k}={_repr(v)}" for k, v in kwargs.items()) 1113 | call_str = ', '.join(s for s in [args_str, kwargs_str] if s != '') 1114 | recorder.buffer.append(f"{_repr(self)}.{name}({call_str})\n") 1115 | 1116 | return attr(*args, **kwargs) 1117 | 1118 | return closure 1119 | 1120 | else: 1121 | return attr 1122 | 1123 | def __len__(self): 1124 | return self.num_frames 1125 | 1126 | def __str__(self): 1127 | return f"muvs {self._node!s}" 1128 | 1129 | def __bool__(self): 1130 | raise RuntimeError("Impossible") 1131 | 1132 | def __dir__(self): 1133 | return dir(self._node) + list(_plane_idx_mapping[self.format.color_family].keys()) 1134 | 1135 | def __hash__(self): 1136 | return hash(self._node) 1137 | 1138 | def __iter__(self): 1139 | return (type(self)(clip) for clip in iter(self._node)) 1140 | 1141 | def __getitem__(self, val): 1142 | if isinstance(val, slice): 1143 | start, stop, step = val.indices(self.num_frames) 1144 | 1145 | if step > 0: 1146 | stop -= 1 1147 | else: # step < 0 1148 | start, stop = stop + 1, start 1149 | 1150 | ret = self 1151 | 1152 | if start != 0 or stop != ret.num_frames - 1: 1153 | ret = ret.std.Trim(first=start, last=stop) 1154 | 1155 | if step < 0: 1156 | ret = ret.std.Reverse() 1157 | 1158 | if abs(step) != 1: 1159 | ret = ret.std.SelectEvery(cycle=abs(step), offsets=[0]) 1160 | 1161 | if ret is self: # shallow copy 1162 | ret = type(self)(self._node) 1163 | 1164 | return ret 1165 | 1166 | elif isinstance(val, int): 1167 | if val < 0: 1168 | n = self.num_frames + val 1169 | else: 1170 | n = val 1171 | 1172 | if n < 0 or (self.num_frames > 0 and n >= self.num_frames): 1173 | raise IndexError("index out of range") 1174 | else: 1175 | return self.std.Trim(first=n, length=1) 1176 | else: 1177 | raise TypeError(f"indices must be integers or slices, not {type(val)}") 1178 | 1179 | methods = locals().copy() 1180 | 1181 | create_method = (lambda name: 1182 | lambda self, *args: 1183 | getattr(_ArithmeticExpr(self), name)(*args)) 1184 | 1185 | magic_methods = [ 1186 | "__neg__", "__pos__", "__abs__", "__exp__", "__log__", "__invert__", "__sqrt__", "__lt__", 1187 | "__le__", "__eq__", "__ne__", "__gt__", "__ge__", "__add__", "__radd__", "__sub__", 1188 | "__rsub__", "__mul__", "__rmul__", "__truediv__", "__rtruediv__", "__pow__", "__rpow__", 1189 | "__and__", "__rand__", "__xor__", "__rxor__", "__or__", "__ror__", "__min__", "__rmin__", 1190 | "__max__", "__rmax__", "__conditional__", "__rconditional__", "__rrconditional__" 1191 | ] 1192 | 1193 | methods.update((name, create_method(name)) for name in magic_methods) 1194 | 1195 | return type("_VideoNode", (fake_vn,) if fake_vn is not None else (), methods) 1196 | 1197 | _VideoNode = _build_VideoNode(_Fake_VideoNode) 1198 | 1199 | 1200 | def Expr(exprs, format=None, 1201 | simplify: Union[bool, Callable[[ExprIR], ExprIR]] = True 1202 | ) -> '_VideoNode': 1203 | if isinstance(exprs, _VideoNode): 1204 | exprs = [_ArithmeticExpr(exprs)] 1205 | elif isinstance(exprs, _ArithmeticExpr): 1206 | exprs = [exprs] 1207 | elif isinstance(exprs, collections.abc.Sequence): 1208 | if len(exprs) == 0: 1209 | raise ValueError("Empty expression") 1210 | 1211 | for i in range(len(exprs)): 1212 | if isinstance(exprs[i], _VideoNode): 1213 | exprs[i] = _ArithmeticExpr(exprs[i]) 1214 | elif exprs[i] is not None and not isinstance(exprs[i], (_ArithmeticExpr, numbers.Real)): 1215 | raise TypeError(f"Invalid type ({type(exprs[i])})") 1216 | 1217 | if simplify: 1218 | for i in range(len(exprs)): 1219 | if isinstance(exprs[i], _ArithmeticExpr): 1220 | if callable(simplify): 1221 | exprs[i] = _ArithmeticExpr(simplify(exprs[i]._expr)) 1222 | else: 1223 | exprs[i] = _ArithmeticExpr(_simplify(exprs[i]._expr)) 1224 | 1225 | for expr in exprs: 1226 | if isinstance(expr, _ArithmeticExpr): 1227 | num_planes = expr.clips[0].format.num_planes 1228 | 1229 | for i in range(len(exprs), num_planes): 1230 | exprs.append(exprs[-1]) 1231 | 1232 | break 1233 | else: 1234 | raise ValueError("No clip is given") 1235 | 1236 | namer = namer_factory() 1237 | 1238 | expr_strs = [] 1239 | for i in range(num_planes): 1240 | if exprs[i] is None: 1241 | expr_strs.append("") 1242 | elif isinstance(exprs[i], numbers.Real): 1243 | expr_strs.append(str(exprs[i])) 1244 | else: 1245 | expr_str = exprs[i].get_expr(namer=namer) 1246 | 1247 | if expr_str == 'x': 1248 | expr_strs.append('') 1249 | else: 1250 | expr_strs.append(expr_str) 1251 | 1252 | clips = ( 1253 | tuple(OrderedDict((obj, None) for obj in itertools.chain.from_iterable( 1254 | expr.clips for expr in exprs 1255 | if isinstance(expr, _ArithmeticExpr) 1256 | )).keys())) 1257 | 1258 | return core.std.Expr(clips, expr_strs, format) 1259 | 1260 | 1261 | # custom operations 1262 | Abs = abs 1263 | 1264 | def Exp(x): 1265 | if isinstance(x, (_ArithmeticExpr, _VideoNode)): 1266 | return x.__exp__() 1267 | else: 1268 | return math.exp(x) 1269 | 1270 | 1271 | def Not(x): 1272 | if isinstance(x, (_ArithmeticExpr, _VideoNode)): 1273 | return x.__invert__() 1274 | else: 1275 | return not x 1276 | 1277 | 1278 | def And(x, y): 1279 | if isinstance(x, (_ArithmeticExpr, _VideoNode)): 1280 | return x.__and__(y) 1281 | elif isinstance(y, (_ArithmeticExpr, _VideoNode)): 1282 | return y.__rand__(x) 1283 | else: 1284 | return x and y 1285 | 1286 | 1287 | def Or(x, y): 1288 | if isinstance(x, (_ArithmeticExpr, _VideoNode)): 1289 | return x.__or__(y) 1290 | elif isinstance(y, (_ArithmeticExpr, _VideoNode)): 1291 | return y.__ror__(x) 1292 | else: 1293 | return x or y 1294 | 1295 | 1296 | def Xor(x, y): 1297 | if isinstance(x, (_ArithmeticExpr, _VideoNode)): 1298 | return x.__xor__(y) 1299 | elif isinstance(y, (_ArithmeticExpr, _VideoNode)): 1300 | return y.__rxor__(x) 1301 | else: 1302 | return (x and not y) or (not x and y) 1303 | 1304 | 1305 | def Log(x): 1306 | if isinstance(x, (_ArithmeticExpr, _VideoNode)): 1307 | return x.__log__() 1308 | else: 1309 | return math.log(x) 1310 | 1311 | 1312 | def Sqrt(x): 1313 | if isinstance(x, (_ArithmeticExpr, _VideoNode)): 1314 | return x.__sqrt__() 1315 | else: 1316 | return math.sqrt(x) 1317 | 1318 | 1319 | def Min(x, y): 1320 | if isinstance(x, (_ArithmeticExpr, _VideoNode)): 1321 | return x.__min__(y) 1322 | elif isinstance(y, (_ArithmeticExpr, _VideoNode)): 1323 | return y.__rmin__(x) 1324 | else: 1325 | return min(x, y) 1326 | 1327 | 1328 | def Max(x, y): 1329 | if isinstance(x, (_ArithmeticExpr, _VideoNode)): 1330 | return x.__max__(y) 1331 | elif isinstance(y, (_ArithmeticExpr, _VideoNode)): 1332 | return y.__rmax__(x) 1333 | else: 1334 | return max(x, y) 1335 | 1336 | 1337 | def Conditional(condition, condition_if_true, condition_if_false): 1338 | try: 1339 | return condition_if_true if condition else condition_if_false 1340 | except RuntimeError: 1341 | if isinstance(condition, (_ArithmeticExpr, _VideoNode)): 1342 | return condition.__conditional__(condition_if_true, condition_if_false) 1343 | elif isinstance(condition_if_true, (_ArithmeticExpr, _VideoNode)): 1344 | return condition_if_true.__rconditional__(condition, condition_if_false) 1345 | elif isinstance(condition_if_false, (_ArithmeticExpr, _VideoNode)): 1346 | return condition_if_false.__rrconditional__(condition, condition_if_true) 1347 | else: 1348 | raise TypeError(f"'Conditional': Unknown input ({type(condition)}, " 1349 | f"{type(condition_if_true)}, {type(condition_if_false)})") 1350 | 1351 | 1352 | def pollute(*modules): 1353 | class _FakeVS: 1354 | def __init__(self): 1355 | self.VideoNode = _Fake_VideoNode 1356 | self.core = core 1357 | self.get_core = lambda : core 1358 | 1359 | def __getattr__(self, name): 1360 | return getattr(vs, name) 1361 | 1362 | _vs = _FakeVS() 1363 | 1364 | # modify symbol table of each module 1365 | if len(modules) == 0: 1366 | import sys 1367 | for name, module in sys.modules.items(): 1368 | if ( 1369 | name not in ("__vapoursynth__", "__main__") and 1370 | getattr(module, "core", None) is not core and 1371 | ((getattr(module, "vs", None) is vs) or 1372 | (getattr(module, "core", None) is _vscore)) 1373 | ): 1374 | module.core = core 1375 | module.vs = _vs 1376 | else: 1377 | for module in modules: 1378 | module.core = core 1379 | module.vs = _vs 1380 | --------------------------------------------------------------------------------