├── Collections
    ├── LUM.py
    ├── SuperRes.py
    ├── descale_verifyer.vpy
    ├── examples
    │   ├── BilateralGPU_cupy
    │   │   ├── bilateral.cu
    │   │   └── bilateral_gpu_cupy.vpy
    │   ├── Dpid_cupy
    │   │   ├── dpid.cu
    │   │   └── dpid_cupy.vpy
    │   ├── Expr_msvc
    │   │   ├── Expr.vpy
    │   │   └── Source_template.cpp
    │   ├── FFDNet_TensorRT
    │   │   ├── README.md
    │   │   ├── benchmark.py
    │   │   ├── build_engine.py
    │   │   ├── ffdnet_test.vpy
    │   │   ├── utils.py
    │   │   └── vs_ffdnet.py
    │   ├── KNLMeasCL_cupy
    │   │   ├── knlm.cu
    │   │   ├── knlm_cupy.vpy
    │   │   ├── knlm_mem_inefficient.cu
    │   │   └── knlm_mem_inefficient_cupy.vpy
    │   ├── NLH_cupy
    │   │   ├── NLH_cupy.vpy
    │   │   └── kernel.cu
    │   ├── SigmaFilter_cupy
    │   │   ├── sigma_filter.cu
    │   │   └── sigma_filter_cupy.vpy
    │   ├── SigmaFilter_cython
    │   │   ├── setup.py
    │   │   ├── sigma_filter.pyx
    │   │   └── sigma_filter_cython.vpy
    │   ├── Super-xBR_cupy
    │   │   ├── super-xbr.cu
    │   │   └── super-xbr_cupy.vpy
    │   ├── sigma_filter_numba.vpy
    │   ├── super_resolution_mxnet.vpy
    │   └── super_resolution_opencv.vpy
    ├── muvsfunc_misc.py
    ├── muvsfunc_numpy.py
    ├── net_interp.py
    └── resize.py
├── README.md
├── muvs.py
└── muvsfunc.py


/Collections/LUM.py:
--------------------------------------------------------------------------------
 1 | """LUM filters for VapourSynth
 2 | 
 3 |     Ref:
 4 |         [1] Hardie, R. C., & Boncelet, C. (1993). LUM filters: a class of rank-order-based filters for smoothing and sharpening. IEEE transactions on signal processing, 41(3), 1061-1076.
 5 | """
 6 | 
 7 | def lum_smoother(input, k):
 8 |     low = muf.Sort(input, k, mode='min')
 9 |     max = muf.Sort(input, k, mode='max')
10 | 
11 |     return haf.Clamp(input, max, min)  # or just core.rgvs.RemoveGrain(input, k - 1)
12 | 
13 | 
14 | def lum_sharper(input, l=2):
15 |     if l not in range(1, 6):
16 |         raise ValueError("\'l\' must be in [1, 5] !")
17 | 
18 |     low1 = muf.Sort(input, l, mode='min')
19 |     high1 = muf.Sort(input, l, mode='max')
20 | 
21 |     return core.std.Expr([input, low1, high1], ['x y z + 2 / <= x y min x z max ?'])
22 | 
23 | 
24 | def lum_filter(input, k=3, l=4):
25 |     if (not isinstance(l, int)) or (not isinstance(l, int)) or (l < k):
26 |         raise ValueError("\'k\' and \'l\' must be in [1, 5] and \'k\' must be not greater than \'l\'!")
27 | 
28 |     low_k = muf.Sort(input, k, mode='min')
29 |     low_l = muf.Sort(input, l, mode='min')
30 |     high_l = muf.Sort(input, l, mode='max')
31 |     high_k = muf.Sort(input, k, mode='max')
32 | 
33 |     return core.std.Expr([input, low_k, low_l, high_l, high_k], ['x z a + 2 / <= x y < y x z min ? x b > b x a max ? ?', ''])
34 | 
35 | 
36 | def asymmetric_lum_filter(input, k=3, l=4, q=6, r=7):
37 |     if (not isinstance(k, int)) or (not isinstance(l, int)) or (not isinstance(q, int)) or (not isinstance(r, int)) or (not 1 <= k <= l <= q <= r <= 9):
38 |         raise ValueError("\'k\', \'l\', \'q\' and \'r\' must be in [1, 9] in ascending order!")
39 | 
40 |     order_k = muf.Sort(input, k, mode='min')
41 |     order_l = muf.Sort(input, l, mode='min')
42 |     order_q = muf.Sort(input, q, mode='min')
43 |     order_r = muf.Sort(input, r, mode='min')
44 | 
45 |     return core.std.Expr([input, order_k, order_l, order_q, order_r], 
46 |         ['x z a + 2 / <= x y < y x z min ? x b > b x a max ? ?'])


--------------------------------------------------------------------------------
/Collections/SuperRes.py:
--------------------------------------------------------------------------------
 1 | # SuperRes1(): Super Resolution
 2 | # SuperRes2(): Super Resolution with nnedi3 upsampling
 3 | 
 4 | # SuperRes(): Super Resolution with NLMeans filtering and user-defined resampling
 5 | """Example of using nnedi3() as a main upsampling filter:
 6 | 
 7 | import nnedi3_resample as nnrs
 8 | from functools import partial
 9 | 
10 | input = ...
11 | target_width = ...
12 | target_height = ...
13 | upsampleFilter = partial(nnrs.nnedi3_resample, target_width=target_width, target_height=target_height)
14 | superResolution = SuperRes(input, target_width, target_height, upsampleFilter1=upsampleFilter)
15 | 
16 | """
17 | 
18 | # Appears to behave naturally when used to enhance textures during upsampling, though there would be lots of aliasing after filtering
19 | 
20 | # 16bit integer clip is required
21 | 
22 | # Main function
23 | def SuperRes(lowRes, width, height, fltPass=3, upsampleFilter1=None, upsampleFilter2=None, downsampleFilter=None, useNLMeans=True, **knlm_args):
24 |     if upsampleFilter1 is None:
25 |         def upsampleFilter1(input):
26 |             return core.fmtc.resample(input, width, height)
27 |     if upsampleFilter2 is None:
28 |         def upsampleFilter2(input):
29 |             return core.fmtc.resample(input, width, height)
30 |     if downsampleFilter is None:
31 |         def downsampleFilter(input):
32 |             return core.fmtc.resample(input, lowRes.width, lowRes.height)
33 | 
34 |     def computeError(input):
35 |         return core.std.MakeDiff(lowRes, downsampleFilter(input))
36 | 
37 |     highRes = upsampleFilter1(lowRes)
38 |     for i in range(fltPass):
39 |         diff = upsampleFilter2(computeError(highRes))
40 |         if useNLMeans:
41 |             diff = core.knlm.KNLMeansCL(diff, rclip=highRes, **knlm_args)
42 |         highRes = core.std.MergeDiff(highRes, diff)
43 |     return highRes
44 | 
45 | 
46 | # Wrap functions
47 | def SuperRes1(lowRes, w, h, fltPass=3, useNLMeans=True, knlm_args=dict(), **fmtc_args):
48 |     from functools import partial
49 | 
50 |     upsampleFilter = partial(core.fmtc.resample, w=w, h=h, **fmtc_args)
51 |         
52 |     downsampleFilter = partial(core.fmtc.resample, w=lowRes.width, h=lowRes.height, **fmtc_args)
53 |     
54 |     return SuperRes(lowRes, w, h, fltPass, upsampleFilter, upsampleFilter, downsampleFilter, useNLMeans, **knlm_args)
55 | 
56 | def SuperRes2(lowRes, w, h, fltPass=3, useNLMeans=True, nnedi3_args=dict(), knlm_args=dict(), **fmtc_args):
57 |     from functools import partial
58 |     import nnedi3_resample as nnrs
59 |     
60 |     upsampleFilter1 = partial(nnrs.nnedi3_resample, target_width=w, target_height=h, **nnedi3_args)
61 |     
62 |     upsampleFilter2 = partial(core.fmtc.resample, w=w, h=h, **fmtc_args)
63 |     
64 |     downsampleFilter = partial(core.fmtc.resample, w=lowRes.width, h=lowRes.height, **fmtc_args)
65 |     
66 |     return SuperRes(lowRes, w, h, fltPass, upsampleFilter1, upsampleFilter2, downsampleFilter, useNLMeans, **knlm_args)


--------------------------------------------------------------------------------
/Collections/descale_verifyer.vpy:
--------------------------------------------------------------------------------
 1 | # Modified from https://github.com/himesaka-noa/descale-verifier/blob/master/descale_verify.py
 2 | 
 3 | import vapoursynth as vs
 4 | import numpy as np
 5 | try:
 6 |     import matplotlib
 7 |     import matplotlib.pyplot as plt
 8 | except BaseException:
 9 |     import matplotlib
10 |     matplotlib.use('Agg')
11 |     import matplotlib.pyplot as plt
12 | from datetime import datetime
13 | from functools import partial
14 | import sys
15 | 
16 | 
17 | core = vs.core
18 | 
19 | # parameters
20 | src = core.lsmas.LWLibavSource(r"00004.m2ts")
21 | src = core.std.SelectEvery(src, 100, 0)
22 | src = core.resize.Point(src, format=vs.GRAYS)
23 | 
24 | descaled_width = 1280
25 | descaled_height = 720
26 | kernel = "bicubic"
27 | a, b = 0, 0.5
28 | 
29 | 
30 | if sys.platform != "win32":
31 |     save_filename = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
32 | else:
33 |     save_filename = datetime.now().strftime("%y-%m-%d %H-%M-%S")
34 | 
35 | 
36 | # functions
37 | def calc_statistics(original, w, h, dw, dh, kernel, a, b):
38 |     if kernel == 'bicubic':
39 |         descaled = core.descale.Debicubic(original, dw, dh, b=a, c=b)
40 |         rescaled = core.resize.Bicubic(descaled, w, h, filter_param_a=a, filter_param_b=b)
41 |     elif kernel == 'bilinear':
42 |         descaled = core.descale.Debilinear(original, dw, dh)
43 |         rescaled = core.resize.Bilinear(descaled, w, h)
44 |     elif kernel == 'lanczos':
45 |         descaled = core.descale.Delanczos(original, dw, dh, taps=int(a))
46 |         rescaled = core.resize.Lanczos(descaled, w, h, filter_param_a=int(a))
47 |     elif kernel == 'spline16':
48 |         descaled = core.descale.Despline16(original, dw, dh)
49 |         rescaled = core.resize.Spline16(descaled, w, h)
50 |     elif kernel == 'spline36':
51 |         descaled = core.descale.Despline36(original, dw, dh)
52 |         rescaled = core.resize.Spline36(descaled, w, h)
53 |     else:
54 |         raise NotImplementedError(f"Kernel {kernel} is not implemented.")
55 | 
56 |     return core.std.PlaneStats(original, rescaled)
57 | 
58 | 
59 | def create_plot(data, save_filename):
60 |     fig, ax = plt.subplots()
61 |     t = np.arange(data.shape[0])
62 |     ax.plot(t, data)
63 |     ax.set(xlabel='frames', ylabel='relative difference', title='Descale Error')
64 |     ax.grid()
65 |     fig.savefig(f"{save_filename}.png")
66 | 
67 | 
68 | def output_statistics(clip, save_filename):
69 |     values = np.zeros(clip.num_frames)
70 |     remaining_frames = clip.num_frames
71 | 
72 |     def filter_core(n, f, clip):
73 |         values[n] = f.props["PlaneStatsDiff"]
74 | 
75 |         nonlocal remaining_frames
76 |         remaining_frames -= 1
77 | 
78 |         if remaining_frames == 0:
79 |             create_plot(values, save_filename)
80 | 
81 |         return clip
82 | 
83 |     clip = core.std.FrameEval(clip, partial(filter_core, clip=clip), clip)
84 |     return clip
85 | 
86 | 
87 | # process
88 | clip = calc_statistics(src, src.width, src.height, descaled_width, descaled_height, kernel, a, b)
89 | clip = output_statistics(clip, save_filename)
90 | clip.set_output()
91 | 


--------------------------------------------------------------------------------
/Collections/examples/BilateralGPU_cupy/bilateral.cu:
--------------------------------------------------------------------------------
 1 | // naive implementation of CUDA-accelerated (NN/SNN) Bilateral filter
 2 | 
 3 | // modified from
 4 | // https://github.com/opencv/opencv_contrib/blob/82733fe56b13401519ace101dc4d724f0a83f535/modules/cudaimgproc/perf/perf_bilateral_filter.cpp
 5 | 
 6 | 
 7 | #define WIDTH $width
 8 | #define HEIGHT $height
 9 | #define SIGMA_S ${sigma_s}f
10 | #define SIGMA_R ${sigma_r}f
11 | #define SIGMA ${sigma}f
12 | #define HALF_KERNEL_SIZE ${half_kernel_size}
13 | #define SNN ${snn}
14 | 
15 | #ifndef MIN
16 | #define MIN(a,b) (((a)<(b))?(a):(b))
17 | #endif
18 | 
19 | #ifndef MAX
20 | #define MAX(a,b) (((a)>(b))?(a):(b))
21 | #endif
22 | 
23 | extern "C"
24 | __global__ void bilateral(const float * __restrict__ src, float * __restrict__ dst) {
25 |     const int x = threadIdx.x + blockIdx.x * blockDim.x;
26 |     const int y = threadIdx.y + blockIdx.y * blockDim.y;
27 | 
28 |     if (x >= WIDTH || y >= HEIGHT)
29 |         return;
30 | 
31 |     const float center = src[y * WIDTH + x];
32 | 
33 |     float sum1 = 0;
34 |     float sum2 = 0;
35 | 
36 |     for (int cy = MAX(y - HALF_KERNEL_SIZE, 0); cy <= MIN(y + HALF_KERNEL_SIZE, HEIGHT - 1); ++cy)
37 |         for (int cx = MAX(x - HALF_KERNEL_SIZE, 0); cx <= MIN(x + HALF_KERNEL_SIZE, WIDTH - 1); ++cx) {
38 |             const float space = (x - cx) * (x - cx) + (y - cy) * (y - cy);
39 | 
40 |             const float value = src[cy * WIDTH + cx];
41 | 
42 | #if SNN
43 |             const float weight = expf(space * SIGMA_S + 
44 |                 fabsf((value - center) * (value - center) - SIGMA) * SIGMA_R);
45 | #else
46 |             const float weight = expf(space * SIGMA_S + (value - center) * (value - center) * SIGMA_R);
47 | #endif
48 | 
49 |             sum1 += weight * value;
50 |             sum2 += weight;
51 |         }
52 |     
53 |     dst[y * WIDTH + x] = sum1 / sum2;
54 | }
55 | 


--------------------------------------------------------------------------------
/Collections/examples/BilateralGPU_cupy/bilateral_gpu_cupy.vpy:
--------------------------------------------------------------------------------
 1 | """Bilateral-GPU in VapourSynth"""
 2 | 
 3 | from string import Template
 4 | 
 5 | import cupy as cp
 6 | import vapoursynth as vs
 7 | from vapoursynth import core
 8 | import muvsfunc_numpy as mufnp
 9 | 
10 | # Load source clip. Only GRAYS is supported
11 | src = core.std.BlankClip(format=vs.GRAYS)
12 | 
13 | # params of bilateral filter. See documentation at https://github.com/HomeOfVapourSynthEvolution/VapourSynth-Bilateral
14 | sigmaS = 3.0
15 | sigmaR = 0.02
16 | # params of SNN (Statistical Nearest Neighbors) sampling strategiy
17 | # ref: I. Frosio, J. Kautz, Statistical Nearest Neighbors for Image Denoising, IEEE Trans. Image Processing, 2019.
18 | sigma = 0 # 0.0003
19 | 
20 | # other params
21 | half_kernel_size = round(sigmaS * 2)
22 | blksize = (32, 8) # dimensions of the CUDA thread block
23 | 
24 | # whether to enable '--use_fast_math' in NVRTC runtime compilation
25 | # to make use of fast math operations
26 | fast = False
27 | 
28 | # pre-processing
29 | snn = int(sigma > 0) # whether to use SNN sampling strategy
30 | 
31 | if src.format.id != vs.GRAYS:
32 |     raise vs.Error("Bilateral: Only 32-bit float grayscale is supported!")
33 | 
34 | w, h = src.width, src.height
35 | 
36 | # source code of CUDA kernel
37 | with open('bilateral.cu', 'r') as f:
38 |     kernel_source_code = f.read()
39 | 
40 | kernel_source_code = Template(kernel_source_code)
41 | kernel_source_code = kernel_source_code.substitute(
42 |     width=w, height=h, sigma_s=-0.5/(sigmaS**2), sigma_r=-0.5/(sigmaR**2), 
43 |     sigma=sigma, snn=snn, half_kernel_size=half_kernel_size)
44 | 
45 | 
46 | if fast:
47 |     kernel = cp.RawKernel(kernel_source_code, 'bilateral', 
48 |         options=('--use_fast_math', ))
49 | else:
50 |     kernel = cp.RawKernel(kernel_source_code, 'bilateral')
51 | 
52 | # create NumPy function
53 | def bilateral_core(h_img, kernel):
54 |     # h_img must be a 2-D image
55 | 
56 |     d_img = cp.asarray(h_img)
57 |     d_out = cp.empty_like(d_img)
58 | 
59 |     kernel(((w + blksize[0] - 1)//blksize[0], (h + blksize[1] - 1)//blksize[1]), blksize, (d_img, d_out))
60 | 
61 |     h_out = cp.asnumpy(d_out)
62 | 
63 |     return h_out
64 | 
65 | # process
66 | res = mufnp.numpy_process(src, bilateral_core, kernel=kernel)
67 | 
68 | res.set_output()
69 | 


--------------------------------------------------------------------------------
/Collections/examples/Dpid_cupy/dpid.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2016 Nicolas Weber and Sandra C. Amend / GCC / TU-Darmstadt. All rights reserved. 
  2 | // Use of this source code is governed by the BSD 3-Clause license that can be
  3 | // found in the LICENSE file.
  4 | // modified by WolframRhodium
  5 | 
  6 | #define THREADS 128
  7 | #define WSIZE 32
  8 | #define TSIZE (THREADS / WSIZE)
  9 | 
 10 | #define TX threadIdx.x
 11 | #define PX (blockIdx.x * TSIZE + (TX / WSIZE))
 12 | #define PY blockIdx.y
 13 | 
 14 | #define WTHREAD (TX % WSIZE)
 15 | #define WARP (TX / WSIZE)
 16 | 
 17 | #define LAMBDA ((float) (${lamda}))
 18 | #define IWIDTH (${iwidth})
 19 | #define IHEIGHT (${iheight})
 20 | #define OWIDTH (${owidth})
 21 | #define OHEIGHT (${oheight})
 22 | #define PWIDTH ((float) (${pwidth}))
 23 | #define PHEIGHT ((float) (${pheight}))
 24 | 
 25 | #define SX (fmaxf(PX * PWIDTH, 0.0f))
 26 | #define EX (fminf((PX + 1) * PWIDTH, IWIDTH))
 27 | #define SY (fmaxf(PY * PHEIGHT, 0.0f))
 28 | #define EY (fminf((PY + 1) * PHEIGHT, IHEIGHT))
 29 | #define SXR (__float2uint_rd(SX))
 30 | #define SYR (__float2uint_rd(SY))
 31 | #define EXR (__float2uint_ru(EX))
 32 | #define EYR (__float2uint_ru(EY))
 33 | #define XCOUNT (EXR - SXR)
 34 | #define YCOUNT (EYR - SYR)
 35 | #define PIXELCOUNT (XCOUNT * YCOUNT)
 36 | 
 37 | //-------------------------------------------------------------------
 38 | // DEVICE
 39 | //-------------------------------------------------------------------
 40 | __device__ __forceinline__ void normalize(float4& var)
 41 | {
 42 |     var.x /= var.w;
 43 |     var.y /= var.w;
 44 |     var.z /= var.w;
 45 |     var.w = 1.0f;
 46 | }
 47 | 
 48 | //-------------------------------------------------------------------
 49 | __device__ __forceinline__ void add(float4& output, const ${dtype}3& color, const float factor)
 50 | {
 51 |     output.x += color.x * factor;
 52 |     output.y += color.y * factor;
 53 |     output.z += color.z * factor;
 54 |     output.w += factor;
 55 | }
 56 | 
 57 | //-------------------------------------------------------------------
 58 | __device__ __forceinline__ void add(float4& output, const float4& color)
 59 | {
 60 |     output.x += color.x;
 61 |     output.y += color.y;
 62 |     output.z += color.z;
 63 |     output.w += color.w;
 64 | }
 65 | 
 66 | //-------------------------------------------------------------------
 67 | __device__ __forceinline__ float lambda(const float dist)
 68 | {
 69 |     if (LAMBDA == 0.0f)
 70 |         return 1.0f;
 71 |     else if (LAMBDA == 1.0f)
 72 |         return dist;
 73 | 
 74 |     return powf(dist, LAMBDA);
 75 | }
 76 | 
 77 | //-------------------------------------------------------------------
 78 | __device__ __forceinline__ void operator+=(float4& output, const float4 value)
 79 | {
 80 |     output.x += value.x;
 81 |     output.y += value.y;
 82 |     output.z += value.z;
 83 |     output.w += value.w;
 84 | }
 85 | 
 86 | //-------------------------------------------------------------------
 87 | __device__ __forceinline__ float contribution(float f, const unsigned int x, const unsigned int y)
 88 | {
 89 |     if (x < SX)
 90 |         f *= 1.0f - (SX - x);
 91 | 
 92 |     if ((x + 1.0f) > EX)
 93 |         f *= 1.0f - ((x + 1.0f) - EX);
 94 | 
 95 |     if (y < SY)
 96 |         f *= 1.0f - (SY - y);
 97 | 
 98 |     if ((y + 1.0f) > EY)
 99 |         f *= 1.0f - ((y + 1.0f) - EY);
100 | 
101 |     return f;
102 | }
103 | 
104 | //-------------------------------------------------------------------
105 | // taken from: https://devblogs.nvidia.com/parallelforall/faster-parallel-reductions-kepler/
106 | __device__ __forceinline__ float4 __shfl_down(const float4 var, const unsigned int srcLane, const unsigned int width = 32)
107 | {
108 |     float4 output;
109 | 
110 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later
111 |     output.x = __shfl_down_sync(0xFFFFFFFFU, var.x, srcLane, width);
112 |     output.y = __shfl_down_sync(0xFFFFFFFFU, var.y, srcLane, width);
113 |     output.z = __shfl_down_sync(0xFFFFFFFFU, var.z, srcLane, width);
114 |     output.w = __shfl_down_sync(0xFFFFFFFFU, var.w, srcLane, width);
115 | #else
116 |     output.x = __shfl_down(var.x, srcLane, width);
117 |     output.y = __shfl_down(var.y, srcLane, width);
118 |     output.z = __shfl_down(var.z, srcLane, width);
119 |     output.w = __shfl_down(var.w, srcLane, width);
120 | #endif
121 | 
122 |     return output;
123 | }
124 | 
125 | //-------------------------------------------------------------------
126 | __device__ __forceinline__ void reduce(float4& value)
127 | {
128 |     value += __shfl_down(value, 16);
129 |     value += __shfl_down(value, 8);
130 |     value += __shfl_down(value, 4);
131 |     value += __shfl_down(value, 2);
132 |     value += __shfl_down(value, 1);
133 | }
134 | 
135 | //-------------------------------------------------------------------
136 | __device__ __forceinline__ float distance(const float4& avg, const ${dtype}3& color)
137 | {
138 |     const float x = avg.x - color.x;
139 |     const float y = avg.y - color.y;
140 |     const float z = avg.z - color.z;
141 | 
142 |     return sqrtf(x * x + y * y + z * z);
143 |     // return sqrtf((x * x + y * y + z * z) / 3.0f) / PIXEL_MAX; // L2-Norm / sqrt(255^2 * 3)
144 | }
145 | 
146 | //-------------------------------------------------------------------
147 | extern "C"
148 | __global__ void kernelGuidance(const ${dtype}3* __restrict__ input, ${dtype}3* __restrict__ patches)
149 | {
150 |     if (PX >= OWIDTH || PY >= OHEIGHT)
151 |         return;
152 | 
153 |     // init
154 |     float4 color = { 0 };
155 | 
156 |     // iterate pixels
157 |     for (unsigned int i = WTHREAD; i < PIXELCOUNT; i += WSIZE)
158 |     {
159 |         const unsigned int x = SXR + (i % XCOUNT);
160 |         const unsigned int y = SYR + (i / XCOUNT);
161 | 
162 |         const float f = contribution(1.0f, x, y);
163 | 
164 |         const ${dtype}3& pixel = input[x + y * IWIDTH];
165 | 
166 |         add(color, make_float4(pixel.x * f, pixel.y * f, pixel.z * f, f));
167 |     }
168 | 
169 |     // reduce warps
170 |     reduce(color);
171 | 
172 |     // store results
173 |     if ((TX % 32) == 0)
174 |     {
175 |         normalize(color);
176 |         patches[PX + PY * OWIDTH] = make_${dtype}3(color.x, color.y, color.z);
177 |     }
178 | }
179 | 
180 | //-------------------------------------------------------------------
181 | __device__ __forceinline__ float4 calcAverage(const ${dtype}3* __restrict__ patches)
182 | {
183 |     const float corner = 1.0f;
184 |     const float edge = 2.0f;
185 |     const float center = 4.0f;
186 | 
187 |     // calculate average color
188 |     float4 avg = { 0.f };
189 | 
190 |     // TOP
191 |     if (PY > 0)
192 |     {
193 |         if (PX > 0)
194 |             add(avg, patches[(PX - 1) + (PY - 1) * OWIDTH], corner);
195 | 
196 |         add(avg, patches[(PX)+(PY - 1) * OWIDTH], edge);
197 | 
198 |         if ((PX + 1) < OWIDTH)
199 |             add(avg, patches[(PX + 1) + (PY - 1) * OWIDTH], corner);
200 |     }
201 | 
202 |     // LEFT
203 |     if (PX > 0)
204 |         add(avg, patches[(PX - 1) + (PY)* OWIDTH], edge);
205 | 
206 |     // CENTER
207 |     add(avg, patches[(PX)+(PY)* OWIDTH], center);
208 | 
209 |     // RIGHT
210 |     if ((PX + 1) < OWIDTH)
211 |         add(avg, patches[(PX + 1) + (PY)* OWIDTH], edge);
212 | 
213 |     // BOTTOM
214 |     if ((PY + 1) < OHEIGHT)
215 |     {
216 |         if (PX > 0)
217 |             add(avg, patches[(PX - 1) + (PY + 1) * OWIDTH], corner);
218 | 
219 |         add(avg, patches[(PX)+(PY + 1) * OWIDTH], edge);
220 | 
221 |         if ((PX + 1) < OWIDTH)
222 |             add(avg, patches[(PX + 1) + (PY + 1) * OWIDTH], corner);
223 |     }
224 | 
225 |     normalize(avg);
226 | 
227 |     return avg;
228 | }
229 | 
230 | //-------------------------------------------------------------------
231 | extern "C"
232 | __global__ void kernelDownsampling(const ${dtype}3* __restrict__ input, const ${dtype}3* __restrict__ patches, ${dtype}3* __restrict__ output)
233 | {
234 |     if (PX >= OWIDTH || PY >= OHEIGHT) return;
235 | 
236 |     // init
237 |     const float4 avg = calcAverage(patches);
238 | 
239 |     float4 color = { 0.f };
240 | 
241 |     // iterate pixels
242 |     for (unsigned int i = WTHREAD; i < PIXELCOUNT; i += WSIZE)
243 |     {
244 |         const unsigned int x = SXR + (i % XCOUNT);
245 |         const unsigned int y = SYR + (i / XCOUNT);
246 | 
247 |         const ${dtype}3& pixel = input[x + y * IWIDTH];
248 |         float f = distance(avg, pixel);
249 | 
250 |         f = lambda(f);
251 |         f = contribution(f, x, y);
252 | 
253 |         add(color, pixel, f);
254 |     }
255 | 
256 |     // reduce warp
257 |     reduce(color);
258 | 
259 |     if (WTHREAD == 0)
260 |     {
261 |         ${dtype}3& ref = output[PX + PY * OWIDTH];
262 | 
263 |         if (color.w == 0.0f)
264 |             ref = make_${dtype}3(avg.x, avg.y, avg.z);
265 |         else
266 |         {
267 |             normalize(color);
268 |             ref = make_${dtype}3(color.x, color.y, color.z);
269 |         }
270 |     }
271 | }
272 | 


--------------------------------------------------------------------------------
/Collections/examples/Dpid_cupy/dpid_cupy.vpy:
--------------------------------------------------------------------------------
 1 | """core.dpid.Dpid() in CuPy"""
 2 | 
 3 | from string import Template
 4 | 
 5 | import cupy as cp
 6 | import vapoursynth as vs
 7 | from vapoursynth import core
 8 | import muvsfunc_numpy as mufnp
 9 | 
10 | 
11 | # Load source clip. Only RGB24/RGB48/RGBS is supported
12 | src = core.std.BlankClip(format=vs.RGB24)
13 | 
14 | 
15 | # params of core.dpid.Dpid()
16 | width = src.width // 2
17 | height = src.height // 2
18 | _lambda = 1.0
19 | 
20 | # whether to enable '--use_fast_math' in NVRTC runtime compilation
21 | # to make use of fast math operations
22 | fast = False
23 | 
24 | 
25 | # pre-processing
26 | if src.format.color_family != vs.RGB:
27 |     raise TypeError("'src' must be a RGB clip.")
28 | 
29 | 
30 | if src.format.sample_type == vs.FLOAT:
31 |     dtype = 'float'
32 | 
33 | elif src.format.bits_per_sample == 8:
34 |     dtype = 'uchar'
35 | 
36 | else:
37 |     dtype = 'ushort'
38 | 
39 | 
40 | # load CUDA kernel
41 | with open('dpid.cu', 'r') as f:
42 |     kernel_source_code = f.read()
43 | 
44 | kernel_source_code = Template(kernel_source_code)
45 | kernel_source_code = kernel_source_code.substitute(
46 |     iwidth=src.width, iheight=src.height, owidth=width, oheight=height,
47 |     pwidth=src.width / width, pheight=src.height / height, lamda=_lambda, 
48 |     dtype=dtype)
49 | 
50 | 
51 | if fast:
52 |     kernelGuidance = cp.RawKernel(code=kernel_source_code, name='kernelGuidance', 
53 |         options=('--use_fast_math', ))
54 |     kernelDownsampling = cp.RawKernel(code=kernel_source_code, name='kernelDownsampling', 
55 |         options=('--use_fast_math', ))
56 | else:
57 |     kernelGuidance = cp.RawKernel(code=kernel_source_code, name='kernelGuidance')
58 |     kernelDownsampling = cp.RawKernel(code=kernel_source_code, name='kernelDownsampling')
59 | 
60 | 
61 | # create NumPy function
62 | def dpid_core(h_input, width, height, kernelGuidance, kernelDownsampling):
63 |     d_input = cp.asarray(h_input)
64 |     d_output = cp.zeros((height, width, 3), dtype=h_input.dtype)
65 |     d_guidance = cp.zeros((height, width, 3), dtype=h_input.dtype)
66 | 
67 |     kernelGuidance((width // 4, height, 1), (128, 1, 1), (d_input, d_guidance))
68 |     kernelDownsampling((width // 4, height, 1), (128, 1, 1), (d_input, d_guidance, d_output))
69 | 
70 |     h_out = cp.asnumpy(d_output)
71 | 
72 |     return h_out
73 | 
74 | 
75 | # process
76 | res = mufnp.numpy_process(
77 |         [core.std.BlankClip(src, width=width, height=height), src], 
78 |         dpid_core, width=width, height=height, 
79 |         kernelGuidance=kernelGuidance, kernelDownsampling=kernelDownsampling, 
80 |         input_per_plane=False, output_per_plane=False, omit_first_clip=True)
81 | 
82 | """
83 | if src.format.sample_type == vs.INTEGER:
84 |     res = core.dpid.Dpid(src, width=width, height=height, _lambda=_lambda)
85 | 
86 | else: # src.format.sample_type == vs.FLOAT
87 |     res = core.dpid.Dpid(src.fmtc.bitdepth(bits=16), width=width, height=height, _lambda=_lambda)
88 |     res = core.fmtc.bitdepth(res, bits=src.format.bits_per_sample)
89 | """
90 | 
91 | 
92 | res.set_output()
93 | 


--------------------------------------------------------------------------------
/Collections/examples/Expr_msvc/Expr.vpy:
--------------------------------------------------------------------------------
 1 | """
 2 | this project is inspired by https://github.com/Endilll/exprcpp
 3 | it dynamically generates code for a plugin (Expr in this case)
 4 | 
 5 | ** the code here is badly written, don't use it**
 6 | """
 7 | 
 8 | 
 9 | import os
10 | import tempfile
11 | from string import Template
12 | 
13 | import vapoursynth as vs
14 | from vapoursynth import core
15 | 
16 | 
17 | def compile_plugin(filenames, vs_include_dir):
18 |     from distutils.msvccompiler import MSVCCompiler
19 |     msvc = MSVCCompiler()
20 | 
21 |     msvc.add_include_dir(vs_include_dir)
22 |     
23 |     tempdir=tempfile.gettempdir()
24 | 
25 |     msvc.compile(filenames, extra_postargs=["/O2", "/EHsc"], output_dir=tempdir)
26 |     msvc.link_shared_lib([os.path.join(tempdir, "Source.obj")], output_libname="expr", output_dir=tempdir)
27 |     core.std.LoadPlugin(os.path.join(tempdir, "expr.dll"))
28 | 
29 | 
30 | def Expr(clips, func_impl, func_name, planes=None, vs_include_dir=r"D:\VapourSynth\sdk\include\vapoursynth"):
31 |     # assertions
32 |     for clip in clips[1:]:
33 |         assert (
34 |             clip.format.id == clips[0].format.id and 
35 |             clip.width == clips[0].width and 
36 |             clip.height == clips[0].height and 
37 |             clip.num_frames == clips[0].num_frames
38 |         )
39 | 
40 |     # completes code from template
41 |     num_inputs = len(clips)
42 | 
43 |     clip = clips[0]
44 |     if clip.format.sample_type == vs.FLOAT:
45 |         t = "float"
46 |     elif clip.format.bits_per_sample == 8:
47 |         t = "uint8_t"
48 |     else:
49 |         t = "uint16_t"
50 |     inputs = ", ".join(f"srcp[{i}][x]" for i in range(num_inputs))
51 | 
52 |     if planes is None:
53 |         planes = list(range(clip.format.num_planes))
54 |     planes = "{" + ", ".join(("1" if i in planes else "0") for i in range(3)) + "}"
55 |     
56 |     with open("Source_template.cpp", 'r') as f:
57 |         code_template = f.read()
58 | 
59 |     code = (
60 |         Template(code_template)
61 |         .substitute(
62 |             num_inputs=num_inputs, t=t, inputs=inputs, func_name=func_name, 
63 |             planes=planes, func_impl=func_impl)
64 |     )
65 | 
66 |     # generates final source code
67 |     with open("Source.cpp", 'w') as f:
68 |         f.write(code)
69 | 
70 |     compile_plugin(("Source.cpp",), vs_include_dir)
71 | 
72 |     return core.expr.Expr(clips)
73 | 
74 | 
75 | # test clips
76 | src1 = core.std.BlankClip(format=vs.YUV420P8, color=[0, 13, 29])
77 | src2 = core.std.BlankClip(format=vs.YUV420P8, color=[93, 128, 247])
78 | 
79 | 
80 | # usage
81 | func_impl = """
82 | T add(T x, T y) {
83 |     return x + y - (T) 128;
84 | }
85 | """
86 | 
87 | res = Expr([src1, src2], func_impl, func_name="add", planes=[0, 2])
88 | 
89 | res.set_output()
90 | 


--------------------------------------------------------------------------------
/Collections/examples/Expr_msvc/Source_template.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdint>
  2 | #include <VapourSynth.h>
  3 | #include <VSHelper.h>
  4 | 
  5 | #define kNumInputs ${num_inputs}
  6 | #define T ${t}
  7 | #define kInputs ${inputs}
  8 | #define kFunction ${func_name}
  9 | const int kProcess[3] = ${planes};
 10 | 
 11 | ${func_impl}
 12 | 
 13 | typedef struct {
 14 |     VSNodeRef *node[kNumInputs];
 15 |     const VSVideoInfo *vi;
 16 | } ExprData;
 17 | 
 18 | static void VS_CC ExprInit(VSMap *in, VSMap *out, void **instanceData, VSNode *node, VSCore *core, const VSAPI *vsapi) {
 19 |     ExprData *d = (ExprData *) * instanceData;
 20 |     vsapi->setVideoInfo(d->vi, 1, node);
 21 | }
 22 | 
 23 | static const VSFrameRef *VS_CC ExprGetFrame(int n, int activationReason, void **instanceData, void **frameData, VSFrameContext *frameCtx, VSCore *core, const VSAPI *vsapi) {
 24 |     ExprData *d = (ExprData *) * instanceData;
 25 | 
 26 |     if (activationReason == arInitial) {
 27 |         for (int i = 0; i < kNumInputs; ++i)
 28 |             vsapi->requestFrameFilter(n, d->node[i], frameCtx);
 29 |     } else if (activationReason == arAllFramesReady) {
 30 |         const VSFrameRef *src[kNumInputs] = {};
 31 |         for (int i = 0; i < kNumInputs; ++i)
 32 |             src[i] = vsapi->getFrameFilter(n, d->node[i], frameCtx);
 33 | 
 34 |         const VSFormat *fi = d->vi->format;
 35 |         int height = vsapi->getFrameHeight(src[0], 0);
 36 |         int width = vsapi->getFrameWidth(src[0], 0);
 37 | 
 38 |         int planes[3] = { 0, 1, 2 };
 39 |         const VSFrameRef *srcf[3] = { kProcess[0] ? nullptr : src[0], kProcess[1] ? nullptr : src[0], kProcess[2] ? nullptr : src[0] };
 40 |         VSFrameRef *dst = vsapi->newVideoFrame2(fi, width, height, srcf, planes, src[0], core);
 41 | 
 42 |         for (int plane = 0; plane < d->vi->format->numPlanes; plane++) {
 43 |             if (!kProcess[plane])
 44 |                 continue;
 45 | 
 46 |             const T *srcp[kNumInputs] = {};
 47 |             for (int i = 0; i < kNumInputs; ++i)
 48 |                 srcp[i] = (const T*) vsapi->getReadPtr(src[i], plane);
 49 | 
 50 |             int src_stride = vsapi->getStride(src[0], plane);
 51 |             T *dstp = (T*) vsapi->getWritePtr(dst, plane);
 52 |             int dst_stride = vsapi->getStride(dst, plane);
 53 |             int h = vsapi->getFrameHeight(src[0], plane);
 54 |             int w = vsapi->getFrameWidth(src[0], plane);
 55 | 
 56 |             for (int y = 0; y < h; y++) {
 57 |                 for (int x = 0; x < w; x++) {
 58 |                     dstp[x] = kFunction(kInputs);
 59 |                 }
 60 | 
 61 |                 dstp += dst_stride;
 62 |                 for (int i = 0; i < kNumInputs; ++i)
 63 |                     srcp[i] += src_stride;
 64 |             }
 65 |         }
 66 | 
 67 |         for (int i = 0; i < kNumInputs; ++i)
 68 |             vsapi->freeFrame(src[i]);
 69 | 
 70 |         return dst;
 71 |     }
 72 | 
 73 |     return 0;
 74 | }
 75 | 
 76 | static void VS_CC ExprFree(void *instanceData, VSCore *core, const VSAPI *vsapi) {
 77 |     ExprData *d = (ExprData *)instanceData;
 78 |     for (int i = 0; i < kNumInputs; ++i)
 79 |         vsapi->freeNode(d->node[i]);
 80 |     free(d);
 81 | }
 82 | 
 83 | static void VS_CC ExprCreate(const VSMap *in, VSMap *out, void *userData, VSCore *core, const VSAPI *vsapi) {
 84 |     ExprData d;
 85 |     ExprData *data;
 86 | 
 87 |     for (int i = 0; i < kNumInputs; ++i) {
 88 |         auto node = vsapi->propGetNode(in, "clips", i, 0);
 89 |         auto vi = vsapi->getVideoInfo(node);
 90 |         if (!isConstantFormat(vi)) {
 91 |             vsapi->setError(out, "Expr: only constant format input supported");
 92 |             for (int j = 0; j < i; ++j)
 93 |                 vsapi->freeNode(d.node[j]);
 94 |             return;
 95 |         }
 96 |         d.node[i] = node;
 97 |     }
 98 | 
 99 |     d.vi = vsapi->getVideoInfo(d.node[0]);
100 | 
101 |     data = (ExprData *) malloc(sizeof(d));
102 |     *data = d;
103 | 
104 |     vsapi->createFilter(in, out, "Expr", ExprInit, ExprGetFrame, ExprFree, fmParallel, 0, data, core);
105 | }
106 | 
107 | 
108 | VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) {
109 |     configFunc("exprtest", "expr", "Expr test", VAPOURSYNTH_API_VERSION, 1, plugin);
110 |     registerFunc("Expr", "clips:clip[];", ExprCreate, 0, plugin);
111 | }
112 | 


--------------------------------------------------------------------------------
/Collections/examples/FFDNet_TensorRT/README.md:
--------------------------------------------------------------------------------
 1 | # Instructions
 2 | 1. Install [CUDA-Python](https://github.com/NVIDIA/cuda-python).
 3 | 
 4 | 2. Install TensorRT Python API. [Install-guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). PyCUDA is not required.
 5 | 
 6 | 3. Download [FFDNet model](https://github.com/HolyWu/vs-ffdnet/blob/master/vsffdnet/ffdnet_color.pth) from HolyWu/vs-ffdnet.
 7 | 
 8 | 4. Run `build_engine.py` to create serialized TensorRT engine.
 9 | 
10 |     "The generated plan files are **not portable** across platforms or TensorRT versions and are specific to the exact GPU model they were built on", according to [TensorRT Developer Guide](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work). This sample further assumes that the dimensions of the video are fixed and known before engine creation.
11 | 
12 | 5. (Optionally) Run `benchmark.py` or `trtexec --loadEngine="ffdnet.engine" --useCudaGraph` to test the engine's raw performance.
13 | 
14 |     `benchmark.py` writes a DOT file "ffdnet.dot" describing inference graph structure when `use_cuda_graph=True`. The DOT file can be visualized by running `dot -Tsvg ffdnet.dot > ffdnet.svg`.
15 | 
16 | 6. Run `ffdnet_test.vpy` to test in VapourSynth.
17 | 
18 | 


--------------------------------------------------------------------------------
/Collections/examples/FFDNet_TensorRT/benchmark.py:
--------------------------------------------------------------------------------
 1 | from cuda import cuda
 2 | import tensorrt as trt
 3 | from utils import *
 4 | 
 5 | 
 6 | def _get_bindings(
 7 |     context: trt.IExecutionContext,
 8 |     num_bindings: int
 9 | ) -> List[UniqueResource]:
10 | 
11 |     bindings = []
12 |     for i in range(num_bindings):
13 |         binding = checkError(cuda.cuMemAlloc(context.get_strides(i)[0] * 4))
14 |         binding = UniqueResource(binding, cuda.cuMemFree, binding)
15 |         bindings.append(binding)
16 |     return bindings
17 | 
18 | 
19 | def benchmark(
20 |     width: int,
21 |     height: int,
22 |     iter: int = 5,
23 |     use_cuda_graph: bool = False,
24 |     logger: trt.Logger = trt.Logger(trt.Logger.VERBOSE)
25 | ) -> None:
26 | 
27 |     cuda_context = init_cuda()
28 | 
29 |     runtime = trt.Runtime(logger)
30 | 
31 |     with open(f"ffdnet_{width}_{height}.engine", "rb") as f:
32 |         engine = runtime.deserialize_cuda_engine(f.read())
33 | 
34 |     execution_context = engine.create_execution_context()
35 | 
36 |     _bindings = _get_bindings(execution_context, engine.num_bindings)
37 |     bindings = [binding.obj for binding in _bindings]
38 | 
39 |     stream = checkError(cuda.cuStreamCreate(cuda.CUstream_flags.CU_STREAM_NON_BLOCKING.value))
40 |     stream = UniqueResource(stream, cuda.cuStreamDestroy, stream)
41 | 
42 |     start = checkError(cuda.cuEventCreate(cuda.CUevent_flags.CU_EVENT_DEFAULT.value))
43 |     start = UniqueResource(start, cuda.cuEventDestroy, start)
44 | 
45 |     end = checkError(cuda.cuEventCreate(cuda.CUevent_flags.CU_EVENT_DEFAULT.value))
46 |     end = UniqueResource(end, cuda.cuEventDestroy, end)
47 | 
48 |     def execute():
49 |         execution_context.execute_async_v2(bindings, stream_handle=stream.obj)
50 | 
51 |     if use_cuda_graph:
52 |         checkError(cuda.cuStreamBeginCapture(
53 |             stream.obj, cuda.CUstreamCaptureMode.CU_STREAM_CAPTURE_MODE_RELAXED))
54 | 
55 |         execute()
56 | 
57 |         graph = checkError(cuda.cuStreamEndCapture(stream.obj))
58 |         graphexec, error_node = checkError(cuda.cuGraphInstantiate(
59 |             graph, logBuffer=b"", bufferSize=0))
60 |         graphexec = UniqueResource(graphexec, cuda.cuGraphExecDestroy, graphexec)
61 |         checkError(cuda.cuGraphDebugDotPrint(
62 |             graph, b"ffdnet.dot",
63 |             cuda.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE.value))
64 |         checkError(cuda.cuGraphDestroy(graph))
65 | 
66 |     for _ in range(iter):
67 |         checkError(cuda.cuEventRecord(start.obj, stream.obj))
68 | 
69 |         if use_cuda_graph:
70 |             checkError(cuda.cuGraphLaunch(graphexec.obj, stream.obj))
71 |         else:
72 |             execute()
73 | 
74 |         checkError(cuda.cuEventRecord(end.obj, stream.obj))
75 |         checkError(cuda.cuEventSynchronize(end.obj))
76 | 
77 |         duration = checkError(cuda.cuEventElapsedTime(start.obj, end.obj))
78 | 
79 |         print(f"duration: {duration} ms")
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     benchmark(width=1920, height=1080, iter=10, use_cuda_graph=False)
84 | 


--------------------------------------------------------------------------------
/Collections/examples/FFDNet_TensorRT/build_engine.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | import tensorrt as trt
 3 | from utils import *
 4 | 
 5 | 
 6 | def build_engine(
 7 |     width: int,
 8 |     height: int,
 9 |     args_dict: Dict,
10 |     max_workspace_size: int = int(1.6 * 1024 ** 3),
11 |     logger: trt.Logger = trt.Logger(trt.Logger.VERBOSE)
12 | ) -> None:
13 | 
14 |     assert width % 2 == 0 and height % 2 == 0
15 | 
16 |     builder = trt.Builder(logger)
17 |     builder.max_batch_size = 1
18 | 
19 |     flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
20 |     network = builder.create_network(flags=flags)
21 |     network.name = "ffdnet"
22 | 
23 |     input = network.add_input(
24 |         name="input", dtype=trt.float32, shape=(1, 3, height, width))
25 |     sigma = network.add_input(
26 |         name="sigma", dtype=trt.float32, shape=(1, 1, height // 2, width // 2))
27 | 
28 |     input_down = pixel_unshuffle(network, input, downscale_factor=2)
29 |     network_input = network.add_concatenation([input_down, sigma]).get_output(0)
30 | 
31 |     conv = convolution(
32 |         network, network_input, 13, 96, 3,
33 |         kernel=args_dict["model.0.weight"].numpy(),
34 |         bias=args_dict["model.0.bias"].numpy())
35 |     relu = network.add_activation(conv, trt.ActivationType.RELU).get_output(0)
36 | 
37 |     for i in range(1, 11):
38 |         conv = convolution(
39 |             network, relu, 96, 96, 3,
40 |             kernel=args_dict[f"model.{i*2}.weight"].numpy(),
41 |             bias=args_dict[f"model.{i*2}.bias"].numpy())
42 | 
43 |         relu = network.add_activation(conv, trt.ActivationType.RELU).get_output(0)
44 | 
45 |     conv = convolution(
46 |         network, relu, 96, 12, 3,
47 |         kernel=args_dict[f"model.22.weight"].numpy(),
48 |         bias=args_dict[f"model.22.bias"].numpy())
49 | 
50 |     output = pixel_shuffle(network, conv, upscale_factor=2)
51 | 
52 |     network.mark_output(output)
53 | 
54 |     config = builder.create_builder_config()
55 |     config.max_workspace_size = max_workspace_size
56 |     with open("timing_cache.buffer", "rb") as cache_f:
57 |         cache = config.create_timing_cache(cache_f.read())
58 |     config.set_timing_cache(cache=cache, ignore_mismatch=False)
59 | 
60 |     output = builder.build_serialized_network(network, config)
61 | 
62 |     with open("timing_cache.buffer", "wb") as cache_f:
63 |         cache_f.write(cache.serialize())
64 | 
65 |     with open(f"ffdnet_{width}_{height}.engine", "wb") as f:
66 |         f.write(output)
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     import torch
71 | 
72 |     # https://github.com/HolyWu/vs-ffdnet/blob/master/vsffdnet/ffdnet_color.pth
73 |     args_dict = torch.load("ffdnet_color.pth")
74 | 
75 |     build_engine(width=1920, height=1080, args_dict=args_dict)
76 | 


--------------------------------------------------------------------------------
/Collections/examples/FFDNet_TensorRT/ffdnet_test.vpy:
--------------------------------------------------------------------------------
 1 | import vapoursynth as vs
 2 | from vapoursynth import core
 3 | 
 4 | import sys
 5 | sys.path.append(".")
 6 | import vs_ffdnet
 7 | 
 8 | src = core.lsmas.LWLibavSource(r'PV02.mkv')
 9 | src = core.resize.Bicubic(src, 1920, 1080, format=vs.RGBS, matrix_in_s="709")
10 | res = vs_ffdnet.FFDNet(src, sigma=5.0, use_cuda_graph=False)
11 | 
12 | res.set_output()
13 | 
14 | 


--------------------------------------------------------------------------------
/Collections/examples/FFDNet_TensorRT/utils.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | 
  3 | import numpy as np
  4 | from cuda import cuda
  5 | import tensorrt as trt
  6 | 
  7 | 
  8 | def checkError(args):
  9 |     err, *ret = args
 10 | 
 11 |     if isinstance(err, cuda.CUresult):
 12 |         if err != cuda.CUresult.CUDA_SUCCESS:
 13 |             raise RuntimeError(f"Cuda Error: {err}")
 14 |     else:
 15 |         raise RuntimeError(f"Unknown error type: {err}")
 16 | 
 17 |     if len(ret) == 0:
 18 |         return
 19 |     elif len(ret) == 1:
 20 |         return ret[0]
 21 |     else:
 22 |         return ret
 23 | 
 24 | 
 25 | class UniqueResource:
 26 |     def __init__(self, obj, func, *args, **kwargs):
 27 |         self.obj = obj
 28 |         self._func = [func]
 29 |         self._args = [args]
 30 |         self._kwargs = [kwargs]
 31 | 
 32 |     def register(self, func, *args, **kwargs):
 33 |         """ register a finalizer """
 34 | 
 35 |         self._func.append(func)
 36 |         self._args.append(args)
 37 |         self._kwargs.append(kwargs)
 38 | 
 39 |     def __del__(self):
 40 |         # calls finalizers in reversed order
 41 |         it = zip(reversed(self._func), reversed(self._args), reversed(self._kwargs))
 42 | 
 43 |         for func, args, kwargs in it:
 44 |             func(*args, **kwargs)
 45 | 
 46 | 
 47 | def init_cuda():
 48 |     checkError(cuda.cuInit(0))
 49 |     device = checkError(cuda.cuDeviceGet(0))
 50 | 
 51 |     context = checkError(cuda.cuDevicePrimaryCtxRetain(device))
 52 |     context = UniqueResource(context, cuda.cuDevicePrimaryCtxRelease, device)
 53 | 
 54 |     checkError(cuda.cuCtxPushCurrent(context.obj))
 55 |     context.register(cuda.cuCtxPopCurrent)
 56 | 
 57 |     return device, context
 58 | 
 59 | 
 60 | def convolution(
 61 |     network: trt.INetworkDefinition,
 62 |     input: trt.ITensor,
 63 |     in_channels: int,
 64 |     out_channels: int,
 65 |     kernel_size: int,
 66 |     kernel: Optional[np.array] = None,
 67 |     bias: Optional[np.array] = None
 68 | ) -> trt.ITensor:
 69 | 
 70 |     if kernel is None:
 71 |         kernel = np.empty(
 72 |             (out_channels, in_channels, kernel_size, kernel_size),
 73 |             dtype=np.float32)
 74 | 
 75 |     if bias is None:
 76 |         bias = np.zeros(out_channels, dtype=np.float32)
 77 | 
 78 |     output = network.add_convolution_nd(
 79 |         input=input, num_output_maps=out_channels,
 80 |         kernel_shape=(kernel_size, kernel_size),
 81 |         kernel=kernel, bias=bias)
 82 | 
 83 |     output.padding_nd = (kernel_size // 2, kernel_size // 2)
 84 |     output.stride_nd = (1, 1)
 85 | 
 86 |     return output.get_output(0)
 87 | 
 88 | 
 89 | def pixel_unshuffle(
 90 |     network: trt.INetworkDefinition,
 91 |     input: trt.ITensor,
 92 |     downscale_factor: int
 93 | ) -> trt.ITensor:
 94 | 
 95 |     n, ic, ih, iw = input.shape
 96 |     assert ih % downscale_factor == 0 and ih % downscale_factor == 0
 97 |     oc = ic * (downscale_factor ** 2)
 98 |     oh = ih // downscale_factor
 99 |     ow = iw // downscale_factor
100 | 
101 |     reshape = network.add_shuffle(input)
102 |     reshape.reshape_dims = trt.Dims([n, ic, oh, downscale_factor, ow, downscale_factor])
103 |     reshape.second_transpose = trt.Permutation([0, 1, 3, 5, 2, 4])
104 | 
105 |     reshape = network.add_shuffle(reshape.get_output(0))
106 |     reshape.reshape_dims = trt.Dims([n, oc, oh, ow])
107 | 
108 |     return reshape.get_output(0)
109 | 
110 | 
111 | def pixel_shuffle(
112 |     network: trt.INetworkDefinition,
113 |     input: trt.ITensor,
114 |     upscale_factor: int
115 | ) -> trt.ITensor:
116 | 
117 |     n, ic, ih, iw = input.shape
118 |     assert ic % (upscale_factor ** 2) == 0
119 |     oc = ic // (upscale_factor ** 2)
120 |     oh = ih * upscale_factor
121 |     ow = iw * upscale_factor
122 | 
123 |     reshape = network.add_shuffle(input)
124 |     reshape.reshape_dims = trt.Dims([n, oc, upscale_factor, upscale_factor, ih, iw])
125 |     reshape.second_transpose = trt.Permutation([0, 1, 4, 2, 5, 3])
126 | 
127 |     reshape = network.add_shuffle(reshape.get_output(0))
128 |     reshape.reshape_dims = trt.Dims([n, oc, oh, ow])
129 | 
130 |     return reshape.get_output(0)
131 | 
132 | 


--------------------------------------------------------------------------------
/Collections/examples/FFDNet_TensorRT/vs_ffdnet.py:
--------------------------------------------------------------------------------
  1 | import ctypes
  2 | 
  3 | import vapoursynth as vs
  4 | from vapoursynth import core
  5 | 
  6 | import numpy as np
  7 | import tensorrt as trt
  8 | 
  9 | from utils import *
 10 | 
 11 | 
 12 | _is_api4: bool = hasattr(vs, "__api_version__") and vs.__api_version__.api_major == 4
 13 | 
 14 | 
 15 | def _get_array(frame, plane, read=True):
 16 |     if not read and frame.readonly:
 17 |         raise ValueError("Frame is readonly")
 18 | 
 19 |     if _is_api4:
 20 |         return frame[plane]
 21 |     else:
 22 |         if read:
 23 |             return frame.get_read_array(plane)
 24 |         else:
 25 |             return frame.get_write_array(plane)
 26 | 
 27 | 
 28 | _cuda_context = init_cuda()
 29 | 
 30 | 
 31 | def FFDNet(
 32 |     clip: vs.VideoNode,
 33 |     sigma: float = 5.0,
 34 |     use_cuda_graph: bool = False,
 35 |     logger: trt.Logger = trt.Logger(trt.Logger.WARNING)
 36 | ) -> vs.VideoNode:
 37 | 
 38 |     assert clip.format.id == vs.RGBS
 39 |     width, height = clip.width, clip.height
 40 | 
 41 |     sigma /= 255
 42 | 
 43 |     runtime = trt.Runtime(logger)
 44 | 
 45 |     with open(f"ffdnet_{width}_{height}.engine", "rb") as f:
 46 |         engine = runtime.deserialize_cuda_engine(f.read())
 47 | 
 48 |     execution_context = engine.create_execution_context()
 49 |     input_size = execution_context.get_strides(0)[0] * 4
 50 |     input_shape = execution_context.get_binding_shape(0)
 51 |     sigma_size = execution_context.get_strides(1)[0] * 4
 52 |     sigma_shape = execution_context.get_binding_shape(1)
 53 |     output_size = execution_context.get_strides(2)[0] * 4
 54 |     output_shape = execution_context.get_binding_shape(2)
 55 | 
 56 |     h_sigma = checkError(cuda.cuMemHostAlloc(
 57 |         sigma_size, cuda.CU_MEMHOSTALLOC_WRITECOMBINED))
 58 |     h_sigma = UniqueResource(h_sigma, cuda.cuMemFreeHost, h_sigma)
 59 |     h_sigma_pointer = ctypes.cast(
 60 |         ctypes.c_void_p(h_sigma.obj), ctypes.POINTER(ctypes.c_float))
 61 |     h_sigma_array = np.ctypeslib.as_array(
 62 |         h_sigma_pointer, shape=(sigma_size // 4,)).reshape(sigma_shape)
 63 | 
 64 |     d_sigma = checkError(cuda.cuMemAlloc(sigma_size))
 65 |     d_sigma = UniqueResource(d_sigma, cuda.cuMemFree, d_sigma)
 66 | 
 67 |     h_input = checkError(cuda.cuMemHostAlloc(
 68 |         input_size, cuda.CU_MEMHOSTALLOC_WRITECOMBINED))
 69 |     h_input = UniqueResource(h_input, cuda.cuMemFreeHost, h_input)
 70 |     h_input_pointer = ctypes.cast(
 71 |         ctypes.c_void_p(h_input.obj), ctypes.POINTER(ctypes.c_float))
 72 |     h_input_array = np.ctypeslib.as_array(
 73 |         h_input_pointer, shape=(input_size // 4,)).reshape(input_shape)
 74 | 
 75 |     d_input = checkError(cuda.cuMemAlloc(input_size))
 76 |     d_input = UniqueResource(d_input, cuda.cuMemFree, d_input)
 77 | 
 78 |     d_output = checkError(cuda.cuMemAlloc(output_size))
 79 |     d_output = UniqueResource(d_output, cuda.cuMemFree, d_output)
 80 | 
 81 |     h_output = checkError(cuda.cuMemAllocHost(output_size))
 82 |     h_output = UniqueResource(h_output, cuda.cuMemFreeHost, h_output)
 83 |     h_output_pointer = ctypes.cast(
 84 |         ctypes.c_void_p(h_output.obj), ctypes.POINTER(ctypes.c_float))
 85 |     h_output_array = np.ctypeslib.as_array(
 86 |         h_output_pointer, shape=(output_size // 4,)).reshape(output_shape)
 87 | 
 88 |     stream = checkError(cuda.cuStreamCreate(
 89 |         cuda.CUstream_flags.CU_STREAM_NON_BLOCKING.value))
 90 |     stream = UniqueResource(stream, cuda.cuStreamDestroy, stream)
 91 | 
 92 |     h_sigma_array[...] = sigma
 93 |     checkError(cuda.cuMemcpyHtoDAsync(
 94 |         d_sigma.obj, h_sigma.obj, sigma_size, stream.obj))
 95 | 
 96 |     def execute():
 97 |         checkError(cuda.cuMemcpyHtoDAsync(
 98 |             d_input.obj, h_input.obj, input_size, stream.obj))
 99 | 
100 |         execution_context.execute_async_v2(
101 |             [d_input.obj, d_sigma.obj, d_output.obj],
102 |             stream_handle=stream.obj)
103 | 
104 |         checkError(cuda.cuMemcpyDtoHAsync(
105 |             h_output.obj, d_output.obj, output_size, stream.obj))
106 | 
107 |     if use_cuda_graph:
108 |         checkError(cuda.cuStreamBeginCapture(
109 |             stream.obj, cuda.CUstreamCaptureMode.CU_STREAM_CAPTURE_MODE_RELAXED))
110 | 
111 |         execute()
112 | 
113 |         graph = checkError(cuda.cuStreamEndCapture(stream.obj))
114 |         graphexec, error_node = checkError(cuda.cuGraphInstantiate(
115 |             graph, logBuffer=b"", bufferSize=0))
116 |         graphexec = UniqueResource(graphexec, cuda.cuGraphExecDestroy, graphexec)
117 |         checkError(cuda.cuGraphDestroy(graph))
118 | 
119 |     def inference_core(n, f):
120 |         for i in range(3):
121 |             h_input_array[0, i, :, :] = np.asarray(_get_array(f, plane=i, read=True))
122 | 
123 |         if use_cuda_graph:
124 |             checkError(cuda.cuGraphLaunch(graphexec.obj, stream.obj))
125 |         else:
126 |             execute()
127 | 
128 |         fout = f.copy()
129 |         fout.get_write_array(0) # triggers COW
130 |         checkError(cuda.cuStreamSynchronize(stream.obj))
131 | 
132 |         for i in range(3):
133 |             np.asarray(_get_array(fout, plane=i, read=False))[...] = h_output_array[0, i, :, :]
134 | 
135 |         return fout
136 | 
137 |     return core.std.ModifyFrame(clip, clips=[clip], selector=inference_core)
138 | 


--------------------------------------------------------------------------------
/Collections/examples/KNLMeasCL_cupy/knlm.cu:
--------------------------------------------------------------------------------
  1 | // original OpenCL implementation: https://github.com/Khanattila/KNLMeansCL/blob/27f95992e2344586b745d013eafa010764c78979/KNLMeansCL/NLMKernel.cpp#L67-L406
  2 | 
  3 | #define VI_DIM_X ${width}
  4 | #define VI_DIM_Y ${height}
  5 | 
  6 | #define NLM_S ${s}
  7 | #define NLM_H ((float) ${h})
  8 | #define NLM_WMODE ${wmode}
  9 | #define NLM_WREF ((float) ${wref})
 10 | 
 11 | #define NLM_NORM (255.0f * 255.0f)
 12 | #define NLM_LEGACY 3.0f
 13 | #define NLM_S_SIZE ((2 * NLM_S + 1) * (2 * NLM_S + 1))
 14 | #define NLM_H2_INV_NORM (NLM_NORM / (NLM_LEGACY * NLM_H * NLM_H * NLM_S_SIZE))
 15 | 
 16 | #define HRZ_BLOCK_X ${hrz_block_x}
 17 | #define HRZ_BLOCK_Y ${hrz_block_y}
 18 | #define HRZ_RESULT ${hrz_result}
 19 | #define VRT_BLOCK_X ${vrt_block_x}
 20 | #define VRT_BLOCK_Y ${vrt_block_y}
 21 | #define VRT_RESULT ${vrt_result}
 22 | 
 23 | #ifndef MIN
 24 | #define MIN(a,b) (((a)<(b))?(a):(b))
 25 | #endif
 26 | 
 27 | #ifndef MAX
 28 | #define MAX(a,b) (((a)>(b))?(a):(b))
 29 | #endif
 30 | 
 31 | #define CLAMPX(x) (MIN(MAX(x, 0), VI_DIM_X - 1))
 32 | #define CLAMPY(y) (MIN(MAX(y, 0), VI_DIM_Y - 1))
 33 | 
 34 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later
 35 |     #include <cooperative_groups.h>
 36 |     namespace cg = cooperative_groups;
 37 | #endif
 38 | 
 39 | extern "C" __global__
 40 | void nlmDistance(const float * __restrict__ U1, float * __restrict__ U4a, 
 41 |     const int qx, const int qy) {
 42 | 
 43 |     const int x = blockIdx.x * blockDim.x + threadIdx.x;
 44 |     const int y = blockIdx.y * blockDim.y + threadIdx.y;
 45 | 
 46 |     if (x >= VI_DIM_X || y >= VI_DIM_Y)
 47 |         return;
 48 | 
 49 |     const int gidx = y * VI_DIM_X + x;
 50 | 
 51 |     // #if defined(NLM_CLIP_REF_LUMA)
 52 |     const float u1 = U1[gidx];
 53 |     const float u1_pq = U1[CLAMPY(y + qy) * VI_DIM_X + CLAMPX(x + qx)];
 54 | 
 55 |     const float val = 3.0f * ((u1 - u1_pq) * (u1 - u1_pq));
 56 |     // #endif
 57 | 
 58 |     U4a[gidx] = val;
 59 | }
 60 | 
 61 | extern "C" __global__
 62 | void nlmHorizontal(const float * __restrict__ U4a, float * __restrict__ U4b) {
 63 | 
 64 |     __shared__ float buffer[HRZ_BLOCK_Y][(HRZ_RESULT + 2) * HRZ_BLOCK_X];
 65 | 
 66 |     const int x = (blockIdx.x * HRZ_RESULT - 1) * HRZ_BLOCK_X + threadIdx.x;
 67 |     const int y = blockIdx.y * blockDim.y + threadIdx.y;
 68 | 
 69 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later
 70 |     // Handle to thread block group
 71 |     cg::thread_block cta = cg::this_thread_block();
 72 | #endif
 73 | 
 74 |     for (int i = 0; i <= 1 + HRZ_RESULT; i++)
 75 |         buffer[threadIdx.y][threadIdx.x + i * HRZ_BLOCK_X] = 
 76 |             U4a[y * VI_DIM_X + CLAMPX(x + i * HRZ_BLOCK_X)];
 77 | 
 78 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later
 79 |     cta.sync();
 80 | #else
 81 |     __syncthreads();
 82 | #endif
 83 | 
 84 |     for (int i = 1; i <= HRZ_RESULT; i++) {
 85 |         if ((x + i * HRZ_BLOCK_X >= VI_DIM_X) || y >= VI_DIM_Y) 
 86 |             return;
 87 | 
 88 |         float sum = 0.0f;
 89 | 
 90 |         for (int j = -NLM_S; j <= NLM_S; j++)
 91 |             sum += buffer[threadIdx.y][threadIdx.x + i * HRZ_BLOCK_X + j];
 92 | 
 93 |         U4b[y * VI_DIM_X + (x + i * HRZ_BLOCK_X)] = sum; // (x + i * HRZ_BLOCK_X) >= 0
 94 |     }
 95 | }
 96 | 
 97 | extern "C" __global__
 98 | void nlmVertical(const float * __restrict__ U4b, float * __restrict__ U4a) {
 99 | 
100 |     __shared__ float buffer[VRT_BLOCK_X][(VRT_RESULT + 2) * VRT_BLOCK_Y + 1];
101 | 
102 |     const int x = blockIdx.x * blockDim.x + threadIdx.x;
103 |     const int y = (blockIdx.y * VRT_RESULT - 1) * VRT_BLOCK_Y + threadIdx.y;
104 | 
105 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later
106 |     // Handle to thread block group
107 |     cg::thread_block cta = cg::this_thread_block();
108 | #endif
109 | 
110 |     for (int i = 0; i <= 1 + VRT_RESULT; i++)
111 |         buffer[threadIdx.x][threadIdx.y + i * VRT_BLOCK_Y] = 
112 |             U4b[CLAMPY(y + i * VRT_BLOCK_Y) * VI_DIM_X + x];
113 | 
114 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later
115 |     cta.sync();
116 | #else
117 |     __syncthreads();
118 | #endif
119 | 
120 |     for (int i = 1; i <= HRZ_RESULT; i++) {
121 |         if (x >= VI_DIM_X || (y + i * VRT_BLOCK_Y) >= VI_DIM_Y) 
122 |             return;
123 | 
124 |         float sum = 0.0f;
125 | 
126 |         for (int j = -NLM_S; j <= NLM_S; j++)
127 |             sum += buffer[threadIdx.x][threadIdx.y + i * VRT_BLOCK_Y + j];
128 | 
129 | #if NLM_WMODE == 0
130 |         // #if defined(NLM_WMODE_WELSCH)
131 |         const float val = expf(-sum * NLM_H2_INV_NORM);
132 | #elif NLM_WMODE == 1
133 |         // #if defined(NLM_WMODE_BISQUARE_A)
134 |         const float val = fdimf(1.0f, sum * NLM_H2_INV_NORM);
135 | #elif NLM_WMODE == 2
136 |         // #if defined(NLM_WMODE_BISQUARE_B)
137 |         const float val = powf(fdimf(1.0f, sum * NLM_H2_INV_NORM), 2.0f);
138 | #elif NLM_WMODE == 3
139 |         // #if defined(NLM_WMODE_BISQUARE_C)
140 |         const float val = powf(fdimf(1.0f, sum * NLM_H2_INV_NORM), 8.0f);
141 | #endif
142 | 
143 |         U4a[(y + i * VRT_BLOCK_Y) * VI_DIM_X + x] = val; // (y + i * VRT_BLOCK_Y) >= 0
144 |     }
145 | }
146 | 
147 | extern "C" __global__
148 | void nlmAccumulation(const float * __restrict__ U1a, float * __restrict__ U2a, 
149 |     float * __restrict__ U2b, const float * __restrict__ U4a, float * __restrict__ U5, 
150 |     const int qx, const int qy) {
151 | 
152 |     const int x = blockIdx.x * blockDim.x + threadIdx.x;
153 |     const int y = blockIdx.y * blockDim.y + threadIdx.y;
154 | 
155 |     if (x >= VI_DIM_X || y >= VI_DIM_Y)
156 |         return;
157 | 
158 |     const int gidx = y * VI_DIM_X + x;
159 | 
160 |     const float u4 = U4a[gidx];
161 |     const float u4_mq = U4a[CLAMPY(y - qy) * VI_DIM_X + CLAMPX(x - qx)];
162 |     U5[gidx] = fmaxf(u4, fmaxf(u4_mq, U5[gidx]));
163 | 
164 |     // #if (NLM_CHANNELS == 1)
165 |     const float u1_pq = U1a[CLAMPY(y + qy) * VI_DIM_X + CLAMPX(x + qx)];
166 |     const float u1_mq = U1a[CLAMPY(y - qy) * VI_DIM_X + CLAMPX(x - qx)];
167 | 
168 |     U2a[gidx] += (u4 * u1_pq) + (u4_mq * u1_mq);
169 |     U2b[gidx] += (u4 + u4_mq);
170 |     // #endif
171 | }
172 | 
173 | extern "C" __global__
174 | void nlmFinish(const float * __restrict__ U1a, float * __restrict__ U1z, 
175 |     const float * __restrict__ U2a, const float * __restrict__ U2b, 
176 |     const float * __restrict__ U5) {
177 | 
178 |     const int x = blockIdx.x * blockDim.x + threadIdx.x;
179 |     const int y = blockIdx.y * blockDim.y + threadIdx.y;
180 | 
181 |     if (x >= VI_DIM_X || y >= VI_DIM_Y)
182 |         return;
183 | 
184 |     const int gidx = y * VI_DIM_X + x;
185 |     const float m = NLM_WREF * U5[gidx];
186 | 
187 |     // #if (NLM_CHANNELS == 1)
188 |     const float u1 = U1a[gidx];
189 |     const float u2a = U2a[gidx];
190 |     const float u2b = U2b[gidx];
191 | 
192 |     const float den = m + u2b;
193 |     const float val = (u1 * m + u2a) / den;
194 | 
195 |     U1z[gidx] = val;
196 |     // #endif
197 | }
198 | 


--------------------------------------------------------------------------------
/Collections/examples/KNLMeasCL_cupy/knlm_cupy.vpy:
--------------------------------------------------------------------------------
  1 | """Reimplementation of KNLMeansCL(Non-local Means) in CuPy"""
  2 | 
  3 | from string import Template
  4 | 
  5 | import cupy as cp
  6 | import vapoursynth as vs
  7 | from vapoursynth import core
  8 | import muvsfunc_numpy as mufnp
  9 | 
 10 | 
 11 | # Load source clip. Only GRAYS is supported
 12 | src = core.std.BlankClip(format=vs.GRAYS)
 13 | 
 14 | # params of KNLMeansCL. Documentation: https://github.com/Khanattila/KNLMeansCL/wiki/Filter-description
 15 | # d = 0 # only spatial processing is implemented.
 16 | a = 2
 17 | s = 4
 18 | h = 1.2
 19 | channels = 'Y' # only grayscale filtering is implemented
 20 | wmode = 0
 21 | wref = 1.0
 22 | rclip = None # not implemented
 23 | ocl_x = 16 # local work group width of the separable convolution kernel
 24 | ocl_y = 8 # local work group height of the separable convolution kernel
 25 | ocl_r = 3 # number of processed pixel for work-item
 26 | 
 27 | # whether to enable '--use_fast_math' in NVRTC runtime compilation
 28 | # to make use of fast math operations
 29 | fast = False
 30 | 
 31 | 
 32 | # pre-processing
 33 | if src.format.id != vs.GRAYS:
 34 |     raise TypeError('Only 32-bit float grayscale input is supported!')
 35 | 
 36 | # CUDA kernel execution configuration
 37 | dst_block = (32, 32, 1) # used by 'nlmDistance'
 38 | hrz_block = (ocl_x, ocl_y, 1) # used by 'nlmHorizontal'
 39 | vrt_block = (ocl_x, ocl_y, 1) # used by 'nlmVertical'
 40 | work_block = (32, 32, 1) # used by 'nlmAccumulation' and 'nlmFinish'
 41 | 
 42 | # load CUDA kernel
 43 | with open('knlm.cu', 'r') as f:
 44 |     kernel_source_code = f.read()
 45 | 
 46 | kernel_source_code = Template(kernel_source_code)
 47 | kernel_source_code = kernel_source_code.substitute(
 48 |     width=src.width, height=src.height, s=s, h=h, wmode=wmode, wref=wref, 
 49 |     hrz_block_x=ocl_x, hrz_block_y=ocl_y, hrz_result=ocl_r, 
 50 |     vrt_block_x=ocl_x, vrt_block_y=ocl_y, vrt_result=ocl_r)
 51 | 
 52 | if fast:
 53 |     nlmDistance = cp.RawKernel(kernel_source_code, 'nlmDistance', 
 54 |         options=('--use_fast_math', ))
 55 |     nlmHorizontal = cp.RawKernel(kernel_source_code, 'nlmHorizontal', 
 56 |         options=('--use_fast_math', ))
 57 |     nlmVertical = cp.RawKernel(kernel_source_code, 'nlmVertical', 
 58 |         options=('--use_fast_math', ))
 59 |     nlmAccumulation = cp.RawKernel(kernel_source_code, 'nlmAccumulation', 
 60 |         options=('--use_fast_math', ))
 61 |     nlmFinish = cp.RawKernel(kernel_source_code, 'nlmFinish', 
 62 |         options=('--use_fast_math', ))
 63 | else:
 64 |     nlmDistance = cp.RawKernel(kernel_source_code, 'nlmDistance')
 65 |     nlmHorizontal = cp.RawKernel(kernel_source_code, 'nlmHorizontal')
 66 |     nlmVertical = cp.RawKernel(kernel_source_code, 'nlmVertical')
 67 |     nlmAccumulation = cp.RawKernel(kernel_source_code, 'nlmAccumulation')
 68 |     nlmFinish = cp.RawKernel(kernel_source_code, 'nlmFinish')
 69 | 
 70 | 
 71 | # create NumPy function
 72 | def nlm_core(h_img, a, nlmDistance, nlmHorizontal, nlmVertical, nlmAccumulation, nlmFinish):
 73 |     U1a = cp.asarray(h_img)
 74 |     h, w = U1a.shape
 75 | 
 76 |     U2a = cp.zeros_like(U1a)
 77 |     U2b = cp.zeros_like(U1a)
 78 |     U1z = cp.empty_like(U1a)
 79 |     U4a = cp.empty_like(U1a)
 80 |     U4b = cp.empty_like(U1a)
 81 |     U5 = cp.full_like(U1a, fill_value=1.1920928955078125e-7) # CL_FLT_EPSILON
 82 | 
 83 |     # Spatial processing
 84 |     for j in range(-a, a+1):
 85 |         for i in range(-a, a+1):
 86 |             if (j * (2 * a + 1) + i < 0):
 87 |                 nlmDistance(((w + dst_block[0] - 1) // dst_block[0], (h + dst_block[1] - 1) // dst_block[1], 1), dst_block, (U1a, U4a, i, j))
 88 |                 nlmHorizontal(((w + hrz_block[0] - 1) // hrz_block[0], (h + hrz_block[1] - 1) // hrz_block[1], 1), hrz_block, (U4a, U4b))
 89 |                 nlmVertical(((w + vrt_block[0] - 1) // vrt_block[0], (h + vrt_block[1] - 1) // vrt_block[1]), vrt_block, (U4b, U4a))
 90 |                 nlmAccumulation(((w + work_block[0] - 1) // work_block[0], (h + work_block[1] - 1) // work_block[1]), work_block, (U1a, U2a, U2b, U4a, U5, i, j))
 91 | 
 92 |     nlmFinish(((w + work_block[0] - 1) // work_block[0], (h + work_block[1] - 1) // work_block[1]), work_block, (U1a, U1z, U2a, U2b, U5))
 93 | 
 94 |     h_out = cp.asnumpy(U1z)
 95 | 
 96 |     return h_out
 97 | 
 98 | 
 99 | # process
100 | res = mufnp.numpy_process(
101 |     src, nlm_core, a=a, 
102 |     nlmDistance=nlmDistance, nlmHorizontal=nlmHorizontal, 
103 |     nlmVertical=nlmVertical, nlmAccumulation=nlmAccumulation, 
104 |     nlmFinish=nlmFinish)
105 | 
106 | """
107 | res = core.knlm.KNLMeansCL(
108 |     src, d=0, a=a, s=s, h=h, channels='Y', wmode=wmode, rclip=None, 
109 |     device_type='GPU', ocl_x=ocl_x, ocl_y=ocl_y, ocl_r=ocl_r, info=False)
110 | """
111 | 
112 | res.set_output()
113 | 


--------------------------------------------------------------------------------
/Collections/examples/KNLMeasCL_cupy/knlm_mem_inefficient.cu:
--------------------------------------------------------------------------------
  1 | // original OpenCL implementation: https://github.com/Khanattila/KNLMeansCL/blob/27f95992e2344586b745d013eafa010764c78979/KNLMeansCL/NLMKernel.cpp#L67-L406
  2 | 
  3 | #define WIDTH ${width}
  4 | #define HEIGHT ${height}
  5 | 
  6 | #define NLM_A ${a}
  7 | #define NLM_S ${s}
  8 | #define NLM_H ((float) ${h})
  9 | #define NLM_WMODE ${wmode}
 10 | #define NLM_WREF ((float) ${wref})
 11 | 
 12 | #define NLM_NORM (255.0f * 255.0f)
 13 | #define NLM_LEGACY 3.0f
 14 | #define NLM_S_SIZE ((2 * NLM_S + 1) * (2 * NLM_S + 1))
 15 | #define NLM_H2_INV_NORM (NLM_NORM / (NLM_LEGACY * NLM_H * NLM_H * NLM_S_SIZE))
 16 | 
 17 | #define HRZ_BLOCK_X ${hrz_block_x}
 18 | #define HRZ_BLOCK_Y ${hrz_block_y}
 19 | #define HRZ_RESULT ${hrz_result}
 20 | #define VRT_BLOCK_X ${vrt_block_x}
 21 | #define VRT_BLOCK_Y ${vrt_block_y}
 22 | #define VRT_RESULT ${vrt_result}
 23 | 
 24 | #ifndef MIN
 25 | #define MIN(a,b) (((a)<(b))?(a):(b))
 26 | #endif
 27 | 
 28 | #ifndef MAX
 29 | #define MAX(a,b) (((a)>(b))?(a):(b))
 30 | #endif
 31 | 
 32 | #define CLAMPX(x) (MIN(MAX(x, 0), WIDTH - 1))
 33 | #define CLAMPY(y) (MIN(MAX(y, 0), HEIGHT - 1))
 34 | 
 35 | #if __CUDACC_VER_MAJOR__ >= 9 // CUDA 9.0 or later
 36 |     #include <cooperative_groups.h>
 37 |     namespace cg = cooperative_groups;
 38 | #endif
 39 | 
 40 | extern "C" __global__
 41 | void nlmDistance(const float U1[HEIGHT][WIDTH], float U4a[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH]) {
 42 | 
 43 |     int x = blockIdx.x * blockDim.x + threadIdx.x;
 44 |     int y = blockIdx.y * blockDim.y + threadIdx.y;
 45 | 
 46 |     if (x >= WIDTH || y >= HEIGHT)
 47 |         return;
 48 | 
 49 |     for (int qy = -NLM_A; qy <= NLM_A; qy++)
 50 |         for (int qx = -NLM_A; qx <= NLM_A; qx++) 
 51 |             if (qy * (2 * NLM_A + 1) + qx < 0) {
 52 |                 // #if defined(NLM_CLIP_REF_LUMA)
 53 |                 float diff = U1[y][x] - U1[CLAMPY(y + qy)][CLAMPX(x + qx)];
 54 | 
 55 |                 float val = 3.0f * diff * diff;
 56 |                 // #endif
 57 | 
 58 |                 U4a[qy+NLM_A][qx+NLM_A][y][x] = val;
 59 |             }
 60 | }
 61 | 
 62 | extern "C" __global__
 63 | void nlmHorizontal(const float U4a[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH], float U4b[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH]) {
 64 | 
 65 |     __shared__ float buffer[HRZ_BLOCK_Y][(HRZ_RESULT + 2) * HRZ_BLOCK_X];
 66 | 
 67 |     const int x = (blockIdx.x * HRZ_RESULT - 1) * HRZ_BLOCK_X + threadIdx.x;
 68 |     const int y = blockIdx.y * blockDim.y + threadIdx.y;
 69 | 
 70 |     cg::thread_block cta = cg::this_thread_block();
 71 | 
 72 |     for (int qy = -NLM_A; qy <= NLM_A; qy++)
 73 |         for (int qx = -NLM_A; qx <= NLM_A; qx++) {
 74 |             if (qy * (2 * NLM_A + 1) + qx < 0) {
 75 |                 for (int i = 0; i <= 1 + HRZ_RESULT; i++)
 76 |                     buffer[threadIdx.y][threadIdx.x + i * HRZ_BLOCK_X] = 
 77 |                         U4a[qy+NLM_A][qx+NLM_A][y][CLAMPX(x + i * HRZ_BLOCK_X)];
 78 |             }
 79 | 
 80 |             cta.sync();
 81 | 
 82 |             if (qy * (2 * NLM_A + 1) + qx < 0) {
 83 |                 for (int i = 1; i <= HRZ_RESULT; i++) {
 84 |                     if ((x + i * HRZ_BLOCK_X < WIDTH) && y < HEIGHT) {
 85 |                         float sum = 0.0f;
 86 | 
 87 |                         for (int j = -NLM_S; j <= NLM_S; j++)
 88 |                             sum += buffer[threadIdx.y][threadIdx.x + i * HRZ_BLOCK_X + j];
 89 | 
 90 |                         U4b[qy+NLM_A][qx+NLM_A][y][x + i * HRZ_BLOCK_X] = sum; // (x + i * HRZ_BLOCK_X) >= 0
 91 |                     }
 92 |                 }
 93 |             }
 94 |         }
 95 | }
 96 | 
 97 | extern "C" __global__
 98 | void nlmVertical(const float U4b[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH], float U4a[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH]) {
 99 | 
100 |     __shared__ float buffer[VRT_BLOCK_X][(VRT_RESULT + 2) * VRT_BLOCK_Y + 1];
101 | 
102 |     const int x = blockIdx.x * blockDim.x + threadIdx.x;
103 |     const int y = (blockIdx.y * VRT_RESULT - 1) * VRT_BLOCK_Y + threadIdx.y;
104 | 
105 | 
106 |     cg::thread_block cta = cg::this_thread_block();
107 | 
108 |     for (int qy = -NLM_A; qy <= NLM_A; qy++)
109 |         for (int qx = -NLM_A; qx <= NLM_A; qx++) {
110 |             if (qy * (2 * NLM_A + 1) + qx < 0) {
111 |                 for (int i = 0; i <= 1 + VRT_RESULT; i++)
112 |                     buffer[threadIdx.x][threadIdx.y + i * VRT_BLOCK_Y] = 
113 |                         U4b[qy+NLM_A][qx+NLM_A][CLAMPY(y + i * VRT_BLOCK_Y)][x];
114 |             }
115 | 
116 |             cta.sync();
117 | 
118 |             if (qy * (2 * NLM_A + 1) + qx < 0) {
119 |                 for (int i = 1; i <= HRZ_RESULT; i++) {
120 |                    if (x < WIDTH && (y + i * VRT_BLOCK_Y) < HEIGHT) {
121 |                         float sum = 0.0f;
122 | 
123 |                         for (int j = -NLM_S; j <= NLM_S; j++)
124 |                             sum += buffer[threadIdx.x][threadIdx.y + i * VRT_BLOCK_Y + j];
125 | 
126 | #if NLM_WMODE == 0
127 |                         // #if defined(NLM_WMODE_WELSCH)
128 |                         const float val = expf(-sum * NLM_H2_INV_NORM);
129 | #elif NLM_WMODE == 1
130 |                         // #if defined(NLM_WMODE_BISQUARE_A)
131 |                         const float val = fdimf(1.0f, sum * NLM_H2_INV_NORM);
132 | #elif NLM_WMODE == 2
133 |                         // #if defined(NLM_WMODE_BISQUARE_B)
134 |                         const float val = powf(fdimf(1.0f, sum * NLM_H2_INV_NORM), 2.0f);
135 | #elif NLM_WMODE == 3
136 |                         // #if defined(NLM_WMODE_BISQUARE_C)
137 |                         const float val = powf(fdimf(1.0f, sum * NLM_H2_INV_NORM), 8.0f);
138 | #endif
139 | 
140 |                         U4a[qy+NLM_A][qx+NLM_A][y + i * VRT_BLOCK_Y][x] = val; // (y + i * VRT_BLOCK_Y) >= 0
141 |                    }
142 |                 }
143 |             }
144 |         }
145 | }
146 | 
147 | extern "C" __global__
148 | void nlmAccumulation_Finish(const float U1a[HEIGHT][WIDTH], float U1z[HEIGHT][WIDTH], 
149 |     const float U4a[NLM_A*2+1][NLM_A*2+1][HEIGHT][WIDTH]) {
150 | 
151 |     const int x = blockIdx.x * blockDim.x + threadIdx.x;
152 |     const int y = blockIdx.y * blockDim.y + threadIdx.y;
153 | 
154 |     if (x >= WIDTH || y >= HEIGHT)
155 |         return;
156 | 
157 |     float u5 = 1.1920928955078125e-7; // CL_FLT_EPSILON
158 |     float u2a = 0.f;
159 |     float u2b = 0.f;
160 | 
161 |     for (int qy = -NLM_A; qy <= NLM_A; qy++)
162 |         for (int qx = -NLM_A; qx <= NLM_A; qx++) 
163 |             if (qy * (2 * NLM_A + 1) + qx < 0) {
164 |                 float u4 = U4a[qy+NLM_A][qx+NLM_A][y][x];
165 |                 float u4_mq = U4a[qy+NLM_A][qx+NLM_A][CLAMPY(y - qy)][CLAMPX(x - qx)];
166 |                 u5 = fmaxf(u4, fmaxf(u4_mq, u5));
167 | 
168 |                 // #if (NLM_CHANNELS == 1)
169 |                 float u1_pq = U1a[CLAMPY(y + qy)][CLAMPX(x + qx)];
170 |                 float u1_mq = U1a[CLAMPY(y - qy)][CLAMPX(x - qx)];
171 | 
172 |                 u2a += (u4 * u1_pq) + (u4_mq * u1_mq);
173 |                 u2b += (u4 + u4_mq);
174 |                 // #endif
175 |             }
176 | 
177 |     float m = NLM_WREF * u5;
178 |     float den = m + u2b;
179 | 
180 |     U1z[y][x] = (U1a[y][x] * m + u2a) / (m + u2b);
181 | }
182 | 


--------------------------------------------------------------------------------
/Collections/examples/KNLMeasCL_cupy/knlm_mem_inefficient_cupy.vpy:
--------------------------------------------------------------------------------
  1 | """Reimplementation of KNLMeansCL(Non-local Means) in CuPy"""
  2 | 
  3 | from string import Template
  4 | 
  5 | import cupy as cp
  6 | import vapoursynth as vs
  7 | from vapoursynth import core
  8 | import muvsfunc_numpy as mufnp
  9 | 
 10 | 
 11 | # Load source clip. Only GRAYS is supported
 12 | src = core.std.BlankClip(format=vs.GRAYS)
 13 | src = core.imwri.Read(r"C:\Users\KurtWu\Desktop\1\300_gray.png").fmtc.bitdepth(bits=32)
 14 | 
 15 | # params of KNLMeansCL. Documentation: https://github.com/Khanattila/KNLMeansCL/wiki/Filter-description
 16 | # d = 0 # only spatial processing is implemented.
 17 | a = 2
 18 | s = 4
 19 | h = 1.2
 20 | channels = 'Y' # only grayscale filtering is implemented
 21 | wmode = 0
 22 | wref = 1.0
 23 | rclip = None # not implemented
 24 | ocl_x = 16 # local work group width of the separable convolution kernel
 25 | ocl_y = 8 # local work group height of the separable convolution kernel
 26 | ocl_r = 3 # number of processed pixel for work-item
 27 | 
 28 | # whether to enable '--use_fast_math' in NVRTC runtime compilation
 29 | # to make use of fast math operations
 30 | fast = False
 31 | 
 32 | 
 33 | # pre-processing
 34 | if src.format.id != vs.GRAYS:
 35 |     raise TypeError('Only 32-bit float grayscale input is supported!')
 36 | 
 37 | # CUDA kernel execution configuration
 38 | dst_block = (32, 32, 1) # used by 'nlmDistance'
 39 | hrz_block = (ocl_x, ocl_y, 1) # used by 'nlmHorizontal'
 40 | vrt_block = (ocl_x, ocl_y, 1) # used by 'nlmVertical'
 41 | work_block = (32, 32, 1) # used by 'nlmAccumulation' and 'nlmFinish'
 42 | 
 43 | # load CUDA kernel
 44 | with open('knlm_mem_inefficient.cu', 'r') as f:
 45 |     kernel_source_code = f.read()
 46 | 
 47 | kernel_source_code = Template(kernel_source_code)
 48 | kernel_source_code = kernel_source_code.substitute(
 49 |     width=src.width, height=src.height, a=a, s=s, h=h, wmode=wmode, wref=wref, 
 50 |     hrz_block_x=ocl_x, hrz_block_y=ocl_y, hrz_result=ocl_r, 
 51 |     vrt_block_x=ocl_x, vrt_block_y=ocl_y, vrt_result=ocl_r)
 52 | 
 53 | if fast:
 54 |     nlmDistance = cp.RawKernel(kernel_source_code, 'nlmDistance', 
 55 |         options=('--use_fast_math', ))
 56 |     nlmHorizontal = cp.RawKernel(kernel_source_code, 'nlmHorizontal', 
 57 |         options=('--use_fast_math', ))
 58 |     nlmVertical = cp.RawKernel(kernel_source_code, 'nlmVertical', 
 59 |         options=('--use_fast_math', ))
 60 |     nlmAccumulation_Finish = cp.RawKernel(kernel_source_code, 'nlmAccumulation_Finish', 
 61 |         options=('--use_fast_math', ))
 62 | else:
 63 |     nlmDistance = cp.RawKernel(kernel_source_code, 'nlmDistance')
 64 |     nlmHorizontal = cp.RawKernel(kernel_source_code, 'nlmHorizontal')
 65 |     nlmVertical = cp.RawKernel(kernel_source_code, 'nlmVertical')
 66 |     nlmAccumulation_Finish = cp.RawKernel(kernel_source_code, 'nlmAccumulation_Finish')
 67 | 
 68 | 
 69 | # create NumPy function
 70 | def nlm_core(h_img, a, nlmDistance, nlmHorizontal, nlmVertical, nlmAccumulation_Finish):
 71 |     U1a = cp.asarray(h_img)
 72 |     h, w = U1a.shape
 73 | 
 74 |     U4a = cp.empty((2*a+1, 2*a+1, h, w))
 75 |     U4b = cp.empty((2*a+1, 2*a+1, h, w))
 76 |     U1z = cp.empty_like(U1a)
 77 | 
 78 |     # Spatial processing
 79 |     nlmDistance(((w + dst_block[0] - 1) // dst_block[0], (h + dst_block[1] - 1) // dst_block[1], 1), dst_block, (U1a, U4a))
 80 |     nlmHorizontal(((w + hrz_block[0] - 1) // hrz_block[0], (h + hrz_block[1] - 1) // hrz_block[1], 1), hrz_block, (U4a, U4b))
 81 |     nlmVertical(((w + vrt_block[0] - 1) // vrt_block[0], (h + vrt_block[1] - 1) // vrt_block[1]), vrt_block, (U4b, U4a))
 82 |     nlmAccumulation_Finish(((w + work_block[0] - 1) // work_block[0], (h + work_block[1] - 1) // work_block[1]), work_block, (U1a, U1z, U4a))
 83 | 
 84 |     h_out = cp.asnumpy(U1z)
 85 | 
 86 |     return h_out
 87 | 
 88 | 
 89 | # process
 90 | res = mufnp.numpy_process(
 91 |     src, nlm_core, a=a, 
 92 |     nlmDistance=nlmDistance, nlmHorizontal=nlmHorizontal, 
 93 |     nlmVertical=nlmVertical, 
 94 |     nlmAccumulation_Finish=nlmAccumulation_Finish)
 95 | 
 96 | """
 97 | res = core.knlm.KNLMeansCL(
 98 |     src, d=0, a=a, s=s, h=h, channels='Y', wmode=wmode, rclip=None, 
 99 |     device_type='GPU', ocl_x=ocl_x, ocl_y=ocl_y, ocl_r=ocl_r, info=False)
100 | """
101 | 
102 | res.set_output()
103 | 


--------------------------------------------------------------------------------
/Collections/examples/NLH_cupy/NLH_cupy.vpy:
--------------------------------------------------------------------------------
 1 | """
 2 | Implementation of NLH (NLH: A Blind Pixel-level Non-local Method for Real-world Image Denoising) in CuPy
 3 | 
 4 | Ref:
 5 |     [1] Hou, Y., Xu, J., Liu, M., Liu, G., Liu, L., Zhu, F., & Shao, L. (2019). 
 6 |         NLH: A Blind Pixel-level Non-local Method for Real-world Image Denoising. 
 7 |         arXiv preprint arXiv:1906.06834.
 8 | 
 9 | """
10 | 
11 | from string import Template
12 | 
13 | import cupy as cp
14 | import vapoursynth as vs
15 | from vapoursynth import core
16 | 
17 | import muvsfunc_numpy as mufnp
18 | 
19 | 
20 | # Load source clip. Only GRAYS is supported
21 | src = core.std.BlankClip(format=vs.GRAYS)
22 | 
23 | # params of NLH
24 | # d = 0 # only spatial processing is implemented.
25 | a = 2
26 | s = 4
27 | h = 1.6
28 | h2 = 1.6
29 | 
30 | # whether to enable '--use_fast_math' in NVRTC runtime compilation
31 | # to make use of fast math operations
32 | fast = False
33 | 
34 | # CUDA kernel execution configuration
35 | work_block = (16, 16, 1)
36 | 
37 | 
38 | # pre-processing
39 | if src.format.id != vs.GRAYS:
40 |     raise TypeError('Only 32-bit float grayscale input is supported!')
41 | 
42 | 
43 | # load CUDA kernel
44 | with open('kernel.cu', 'r') as f:
45 |     kernel_source_code = f.read()
46 | 
47 | kernel_source_code = Template(kernel_source_code)
48 | kernel_source_code = kernel_source_code.substitute(
49 |     width=src.width, height=src.height, a=a, s=s, h=h, h2=h2)
50 | 
51 | if fast:
52 |     compute = cp.RawKernel(kernel_source_code, 'compute', options=('--use_fast_math', ))
53 | else:
54 |     compute = cp.RawKernel(kernel_source_code, 'compute')
55 | 
56 | 
57 | # create NumPy function
58 | def nlm_core(h_src, compute):
59 |     d_src = cp.asarray(h_src)
60 |     h, w = h_src.shape
61 | 
62 |     d_dst = cp.empty_like(d_src)
63 | 
64 |     compute(((w + work_block[0] - 1) // work_block[0], (h + work_block[1] - 1) // work_block[1]), work_block, (d_src, d_dst))
65 | 
66 |     h_out = cp.asnumpy(d_dst)
67 | 
68 |     return h_out
69 | 
70 | # process
71 | res = mufnp.numpy_process(src, nlm_core, compute=compute)
72 | 
73 | # feisty2's CPU implementation (https://github.com/IFeelBloated/NLMeans-PM/tree/f2539968e3ded41588cc18b3c2a984f42b79e4a9)
74 | # "ref" is not currently implemented on the CUDA version
75 | # res = core.test.Test(src, a=a, s=s, h=h, h2=h2)
76 | 
77 | 
78 | res.set_output()
79 | 


--------------------------------------------------------------------------------
/Collections/examples/NLH_cupy/kernel.cu:
--------------------------------------------------------------------------------
 1 | #define WIDTH ${width}
 2 | #define HEIGHT ${height}
 3 | #define NLM_A ${a}
 4 | #define NLM_S ${s}
 5 | #define NLM_H ((float) (${h} / 79.636080791869483631941455867052))
 6 | #define NLM_H2 ((float) (${h2} / 79.636080791869483631941455867052))
 7 | 
 8 | 
 9 | #define GET(pointer, y0, x0) pointer[max(min((y0), HEIGHT-1), 0) * WIDTH + max(min((x0), WIDTH-1), 0)]
10 | #define PatchMatrix(y0, x0) GET(srcp, y-NLM_A-NLM_S + (y0) / (2*NLM_A+1) + (x0) / (2*NLM_S+1), x-NLM_A-NLM_S + (y0) % (2*NLM_A+1) + (x0) % (2*NLM_S+1))
11 | #define Square(x) ((x) * (x))
12 | 
13 | #define PatchSize Square(2 * NLM_S + 1)
14 | #define SearchSize Square(2 * NLM_A + 1)
15 | 
16 | extern "C" __global__ 
17 | void compute(const float * __restrict__ srcp, float * __restrict__ dstp) {
18 |     int x = blockDim.x * blockIdx.x + threadIdx.x;
19 |     int y = blockDim.y * blockIdx.y + threadIdx.y;
20 | 
21 |     if (x >= WIDTH || y >= HEIGHT)
22 |         return;
23 | 
24 |     float PatchWeights[SearchSize];
25 | 
26 |     // CalculatePatchWeights
27 |     float NormalizingConstant = 0.f;
28 |     for (int i = 0; i < SearchSize; i++) {
29 |         float SSE = 0.f;
30 |         for (int j = 0; j < PatchSize; j++)
31 |             SSE += Square(PatchMatrix(i, j) - PatchMatrix(SearchSize / 2, j));
32 |         float Weight = expf(-SSE / Square(NLM_H));
33 |         PatchWeights[i] = Weight;
34 |         NormalizingConstant += Weight;
35 |     }
36 | 
37 |     for (int i = 0; i < SearchSize; i++) {
38 |         PatchWeights[i] /= NormalizingConstant;
39 |     }
40 | 
41 |     // CalculatePositionWeights & Aggregate
42 |     float Result = 0.f;
43 |     NormalizingConstant = 0.f;
44 |     for (int j = 0; j < PatchSize; j++) {
45 |         float SSE = 0.f;
46 |         for (int i = 0; i < SearchSize; i++)
47 |             SSE += PatchWeights[i] * Square(PatchMatrix(i, j) - PatchMatrix(i, PatchSize / 2));
48 |         float Weight = expf(-SSE / Square(NLM_H2));
49 |         Result += Weight * PatchMatrix(SearchSize / 2, j);
50 |         NormalizingConstant += Weight;
51 |     }
52 |     
53 |     GET(dstp, y, x) = Result / NormalizingConstant;
54 | }
55 | 


--------------------------------------------------------------------------------
/Collections/examples/SigmaFilter_cupy/sigma_filter.cu:
--------------------------------------------------------------------------------
 1 | #define WIDTH ${width}
 2 | #define HEIGHT ${height}
 3 | 
 4 | #define RADIUS ${radius}
 5 | #define THRESHOLD ((float) ${threshold})
 6 | 
 7 | #ifndef MIN
 8 | #define MIN(a,b) (((a)<(b))?(a):(b))
 9 | #endif
10 | 
11 | #ifndef MAX
12 | #define MAX(a,b) (((a)>(b))?(a):(b))
13 | #endif
14 | 
15 | #define CLAMPX(x) (MIN(MAX(x, 0), WIDTH - 1))
16 | #define CLAMPY(y) (MIN(MAX(y, 0), HEIGHT - 1))
17 | 
18 | extern "C" __global__
19 | void sigmaFilter(const float * __restrict__ src, float * __restrict__ dst) {
20 |     const int x = blockIdx.x * blockDim.x + threadIdx.x;
21 |     const int y = blockIdx.y * blockDim.y + threadIdx.y;
22 | 
23 |     if (x >= WIDTH || y >= HEIGHT)
24 |         return;
25 | 
26 |     const float center = src[y * WIDTH + x];
27 | 
28 |     float sum = 0.0f;
29 |     int count = 0;
30 | 
31 |     for (int j = -RADIUS; j <= RADIUS; j++)
32 |         for (int i = -RADIUS; i <= RADIUS; i++) {
33 |             const float val = src[CLAMPY(y + j) * WIDTH + CLAMPX(x + i)];
34 | 
35 |             if (fabsf(val - center) < THRESHOLD) {
36 |                 sum += val;
37 |                 count += 1;
38 |             }
39 |         }
40 | 
41 |     dst[y * WIDTH + x] = sum / count;
42 | }
43 | 


--------------------------------------------------------------------------------
/Collections/examples/SigmaFilter_cupy/sigma_filter_cupy.vpy:
--------------------------------------------------------------------------------
 1 | """Sigma Filter in CuPy"""
 2 | 
 3 | from string import Template
 4 | 
 5 | import cupy as cp
 6 | import vapoursynth as vs
 7 | from vapoursynth import core
 8 | import muvsfunc_numpy as mufnp
 9 | 
10 | 
11 | # Load source clip. Only GRAYS is supported
12 | src = core.std.BlankClip(format=vs.GRAYS)
13 | 
14 | # params of mufnp.SigmaFilter()
15 | radius = 3
16 | thr = 0.01
17 | 
18 | # whether to enable '--use_fast_math' in NVRTC runtime compilation
19 | # to make use of fast math operations
20 | fast = False
21 | 
22 | 
23 | # pre-processing
24 | if src.format.id != vs.GRAYS:
25 |     raise TypeError('Only 32-bit float grayscale input is supported!')
26 | 
27 | # CUDA kernel execution configuration
28 | blksize = (16, 8, 1)
29 | 
30 | # load CUDA kernel
31 | with open('sigma_filter.cu', 'r') as f:
32 |     kernel_source_code = f.read()
33 | 
34 | kernel_source_code = Template(kernel_source_code)
35 | kernel_source_code = kernel_source_code.substitute(
36 |     width=src.width, height=src.height, radius=radius, threshold=thr)
37 | 
38 | if fast:
39 |     kernel = cp.RawKernel(kernel_source_code, 'sigmaFilter', 
40 |         options=('--use_fast_math', ))
41 | else:
42 |     kernel = cp.RawKernel(kernel_source_code, 'sigmaFilter')
43 | 
44 | # create NumPy function
45 | def sigma_filter_core(h_img):
46 |     d_img = cp.asarray(h_img)
47 |     h, w = d_img.shape
48 | 
49 |     d_out = cp.empty_like(d_img)
50 | 
51 |     kernel(((w + blksize[0] - 1)//blksize[0], (h + blksize[1] - 1)//blksize[1]), blksize, (d_img, d_out))
52 | 
53 |     h_out = cp.asnumpy(d_out)
54 | 
55 |     return h_out
56 | 
57 | 
58 | # process
59 | res = mufnp.numpy_process(src, sigma_filter_core)
60 | 
61 | # res = mufnp.SigmaFilter(src, radius=radius, thr=thr)
62 | 
63 | res.set_output()


--------------------------------------------------------------------------------
/Collections/examples/SigmaFilter_cython/setup.py:
--------------------------------------------------------------------------------
 1 | # To build, run `python setup.py build_ext --inplace`
 2 | 
 3 | from distutils.core import setup
 4 | from distutils.extension import Extension
 5 | from Cython.Build import cythonize
 6 | 
 7 | 
 8 | ext_modules = [Extension(name="sigma_filter", sources=["sigma_filter.pyx"])]
 9 | 
10 | setup(name='sigma_filter', ext_modules=cythonize(module_list=ext_modules, language_level=3))
11 | 


--------------------------------------------------------------------------------
/Collections/examples/SigmaFilter_cython/sigma_filter.pyx:
--------------------------------------------------------------------------------
 1 | # cython: boundscheck=False, initializedcheck=False, language_level=3, nonecheck=False, overflowcheck=False, wraparound=False
 2 | 
 3 | cimport cython
 4 | from cython cimport view
 5 | 
 6 | 
 7 | cdef Py_ssize_t clamp(const Py_ssize_t val, const Py_ssize_t low, const Py_ssize_t high) nogil:
 8 |     return min(max(val, low), high)
 9 | 
10 | 
11 | cpdef void sigma_filter(
12 |     const float [:, ::view.contiguous] src, float [:, ::view.contiguous] dst, 
13 |     const int radius, const float threshold):
14 |     """Sigma filter"""
15 | 
16 |     cdef Py_ssize_t height = src.shape[0]
17 |     cdef Py_ssize_t width = src.shape[1]
18 | 
19 |     cdef float center, val, acc
20 |     cdef int count, x, y, i, j
21 | 
22 |     with nogil:
23 |         for y in range(height):
24 |             for x in range(width):
25 |                 center = src[y, x]
26 | 
27 |                 acc = 0.
28 |                 count = 0
29 | 
30 |                 for j in range(-radius, radius + 1):
31 |                     for i in range(-radius, radius + 1):
32 |                         val = src[clamp(y + j, 0, height - 1), clamp(x + i, 0, width - 1)]
33 | 
34 |                         if abs(center - val) < threshold:
35 |                             acc += val
36 |                             count += 1
37 | 
38 |                 dst[y, x] = acc / count
39 | 


--------------------------------------------------------------------------------
/Collections/examples/SigmaFilter_cython/sigma_filter_cython.vpy:
--------------------------------------------------------------------------------
 1 | """Sigma Filter in Cython"""
 2 | 
 3 | import vapoursynth as vs
 4 | from vapoursynth import core
 5 | from functools import partial
 6 | 
 7 | # To build, run `python setup.py build_ext --inplace`
 8 | from sigma_filter import sigma_filter
 9 | 
10 | is_api4: bool = hasattr(vs, "__api_version__") and vs.__api_version__.api_major == 4
11 | 
12 | def get_array(frame, plane, read=True):
13 |     if not read and frame.readonly:
14 |         raise ValueError("Frame is readonly")
15 | 
16 |     if is_api4:
17 |         return frame[plane]
18 |     else:
19 |         if read:
20 |             return frame.get_read_array(plane)
21 |         else:
22 |             return frame.get_write_array(plane)
23 | 
24 | 
25 | # Load source clip. Only GRAYS is supported
26 | src = core.std.BlankClip(format=vs.GRAYS)
27 | 
28 | # params of mufnp.SigmaFilter()
29 | radius = 3
30 | thr = 0.01
31 | 
32 | parallel = True
33 | 
34 | 
35 | # pre-processing
36 | if src.format.id != vs.GRAYS:
37 |     raise TypeError('Only 32-bit float grayscale input is supported!')
38 | 
39 | 
40 | def executor(n, f, radius, thr):
41 |     fout = f.copy()
42 | 
43 |     src = get_array(f, 0)
44 |     dst = get_array(fout, 0, read=False)
45 | 
46 |     sigma_filter(src, dst, radius, thr)
47 | 
48 |     return fout
49 | 
50 | selector = partial(executor, radius=radius, thr=thr)
51 | # process
52 | if parallel:
53 |     res = core.std.FrameEval(src, lambda n: core.std.ModifyFrame(src, src, selector))
54 | else:
55 |     res = core.std.ModifyFrame(src, src, selector)
56 | 
57 | # res = mufnp.SigmaFilter(src, radius=radius, thr=thr)
58 | 
59 | res.set_output()
60 | 


--------------------------------------------------------------------------------
/Collections/examples/Super-xBR_cupy/super-xbr.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | CUDA port of Super-xBR image upscaling algorithm by WolframRhodium
  3 | 
  4 | The algorithm is modified for data parallelism
  5 | 
  6 | Source: https://pastebin.com/cbH8ZQQT
  7 | 
  8 | *******  Super XBR Scaler  *******
  9 |  
 10 | Copyright (c) 2016 Hyllian - sergiogdb@gmail.com
 11 |  
 12 | Permission is hereby granted, free of charge, to any person obtaining a copy
 13 | of this software and associated documentation files (the "Software"), to deal
 14 | in the Software without restriction, including without limitation the rights
 15 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 16 | copies of the Software, and to permit persons to whom the Software is
 17 | furnished to do so, subject to the following conditions:
 18 |  
 19 | The above copyright notice and this permission notice shall be included in
 20 | all copies or substantial portions of the Software.
 21 |  
 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 28 | THE SOFTWARE.
 29 | */
 30 | 
 31 | #define IWIDTH (${width})
 32 | #define IHEIGHT (${height})
 33 | #define OWIDTH (IWIDTH * 2)
 34 | #define OHEIGHT (IHEIGHT * 2)
 35 | 
 36 | #define WGT1 ((float) ${wgt1}) // 0.129633f
 37 | #define WGT2 ((float) ${wgt2}) // 0.175068f
 38 | #define W1 (-WGT1)
 39 | #define W2 (WGT1 + 0.5f)
 40 | #define W3 (-WGT2)
 41 | #define W4 (WGT2 + 0.5f)
 42 | 
 43 | __device__ __forceinline__ int clamp(int x, int floor, int ceil) {
 44 |     return max(floor, min(x, ceil));
 45 | }
 46 | 
 47 | __device__ __forceinline__ float3 min4(float3 a, float3 b, float3 c, float3 d) {
 48 |     return make_float3(
 49 |         fminf(fminf(a.x, b.x), fminf(c.x, d.x)), 
 50 |         fminf(fminf(a.y, b.y), fminf(c.y, d.y)), 
 51 |         fminf(fminf(a.z, b.z), fminf(c.z, d.z)));
 52 | }
 53 | 
 54 | __device__ __forceinline__ float3 max4(float3 a, float3 b, float3 c, float3 d) {
 55 |     return make_float3(
 56 |         fmaxf(fmaxf(a.x, b.x), fmaxf(c.x, d.x)), 
 57 |         fmaxf(fmaxf(a.y, b.y), fmaxf(c.y, d.y)), 
 58 |         fmaxf(fmaxf(a.z, b.z), fmaxf(c.z, d.z)));
 59 | }
 60 | 
 61 | __device__ __forceinline__ float3 clamp(float3 x, float3 floor, float3 ceil) {
 62 |     return make_float3(
 63 |         fmaxf(floor.x, fminf(x.x, ceil.x)), 
 64 |         fmaxf(floor.y, fminf(x.y, ceil.y)), 
 65 |         fmaxf(floor.z, fminf(x.z, ceil.z)));
 66 | }
 67 | 
 68 | __device__ __forceinline__ float df(float a, float b) {
 69 |     return fabsf(a - b);
 70 | }
 71 | 
 72 | __device__ __forceinline__ float3 operator+(float3 a, float3 b)
 73 | {
 74 |     return make_float3(
 75 |         a.x + b.x, 
 76 |         a.y + b.y, 
 77 |         a.z + b.z);
 78 | }
 79 | 
 80 | __device__ __forceinline__ float3 operator*(float a, float3 b)
 81 | {
 82 |     return make_float3(
 83 |         a * b.x, 
 84 |         a * b.y,
 85 |         a * b.z);
 86 | }
 87 | 
 88 | __device__ __forceinline__ float diagonal_edge(const float mat[][4], const float *wp) {
 89 |     float dw1 = wp[0]*(df(mat[0][2], mat[1][1]) + df(mat[1][1], mat[2][0]) + df(mat[1][3], mat[2][2]) + df(mat[2][2], mat[3][1])) + \
 90 |                 wp[1]*(df(mat[0][3], mat[1][2]) + df(mat[2][1], mat[3][0])) + \
 91 |                 wp[2]*(df(mat[0][3], mat[2][1]) + df(mat[1][2], mat[3][0])) + \
 92 |                 wp[3]*(df(mat[1][2], mat[2][1])) + \
 93 |                 wp[4]*(df(mat[0][2], mat[2][0]) + df(mat[1][3], mat[3][1])) + \
 94 |                 wp[5]*(df(mat[0][1], mat[1][0]) + df(mat[2][3], mat[3][2]));
 95 |  
 96 |     float dw2 = wp[0]*(df(mat[0][1], mat[1][2]) + df(mat[1][2], mat[2][3]) + df(mat[1][0], mat[2][1]) + df(mat[2][1], mat[3][2])) + \
 97 |                 wp[1]*(df(mat[0][0], mat[1][1]) + df(mat[2][2], mat[3][3])) + \
 98 |                 wp[2]*(df(mat[0][0], mat[2][2]) + df(mat[1][1], mat[3][3])) + \
 99 |                 wp[3]*df(mat[1][1], mat[2][2]) + \
100 |                 wp[4]*(df(mat[1][0], mat[3][2]) + df(mat[0][1], mat[2][3])) + \
101 |                 wp[5]*(df(mat[0][2], mat[1][3]) + df(mat[2][0], mat[3][1]));
102 |  
103 |     return (dw1 - dw2);
104 | }
105 | 
106 | extern "C"
107 | __global__ void super_xbr_pass1(const float3 * __restrict__ src, float3 * __restrict__ dst) {
108 |     // src: W x H, dst: 2W x 2H
109 |     // x: 0:W:1, y: 0:H:1
110 | 
111 |     const int x = blockIdx.x * blockDim.x + threadIdx.x;
112 |     const int y = blockIdx.y * blockDim.y + threadIdx.y;
113 | 
114 |     if (x >= IWIDTH || y >= IHEIGHT)
115 |         return;
116 | 
117 |     // copy pixels to output image
118 |     dst[((y * 2) * OWIDTH) + (x * 2)] = src[y * IWIDTH + x];
119 |     dst[((y * 2) * OWIDTH) + (x * 2 + 1)] = src[y * IWIDTH + x];
120 |     dst[((y * 2 + 1) * OWIDTH) + (x * 2)] = src[y * IWIDTH + x];
121 | 
122 |     // init
123 |     constexpr float wp[6] = { 2.0f, 1.0f, -1.0f, 4.0f, -1.0f, 1.0f };
124 | 
125 |     float3 rgb_data[4][4];
126 |     float y_data[4][4];
127 | 
128 |     // sample supporting pixels in original image
129 |     for (int sy = -1; sy <= 2; ++sy) {
130 |         const int csy = clamp(y + sy, 0, IHEIGHT - 1);
131 | 
132 |         for (int sx = -1; sx <= 2; ++sx) {
133 |             // clamp pixel locations
134 |             const int csx = clamp(x + sx, 0, IWIDTH - 1);
135 | 
136 |             // sample & add weighted components
137 |             rgb_data[sy + 1][sx + 1] = src[csy * IWIDTH + csx];
138 | 
139 |             y_data[sy + 1][sx + 1] = 0.2126f * rgb_data[sy + 1][sx + 1].x + 0.7152f * rgb_data[sy + 1][sx + 1].y + \
140 |                 0.0722f * rgb_data[sy + 1][sx + 1].z;
141 |         }
142 |     }
143 | 
144 |     const float3 min_sample = min4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]);
145 |     const float3 max_sample = max4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]);
146 | 
147 |     const float d_edge = diagonal_edge(y_data, wp);
148 | 
149 |     const float3 rgb1 = W1 * (rgb_data[0][3] + rgb_data[3][0]) + W2 * (rgb_data[1][2] + rgb_data[2][1]);
150 |     const float3 rgb2 = W1 * (rgb_data[0][0] + rgb_data[3][3]) + W2 * (rgb_data[1][1] + rgb_data[2][2]);
151 | 
152 |     // generate and write result
153 |     float3 rgbf = (d_edge <= 0.0f) ? rgb1 : rgb2;
154 | 
155 |     // anti-ringing, clamp
156 |     rgbf = clamp(rgbf, min_sample, max_sample);
157 | 
158 |     // output
159 |     dst[((y * 2 + 1) * OWIDTH) + (x * 2 + 1)] = rgbf;
160 | }
161 | 
162 | extern "C"
163 | __global__ void super_xbr_pass2(const float3 * __restrict__ src, float3 * __restrict__ dst) {
164 |     // src: 2W x 2H, dst: 2W x 2H
165 |     // x: 0:W:1, y: 0:H:1
166 | 
167 |     const int x = blockIdx.x * blockDim.x + threadIdx.x;
168 |     const int y = blockIdx.y * blockDim.y + threadIdx.y;
169 | 
170 |     if (x >= IWIDTH || y >= IHEIGHT)
171 |         return;
172 | 
173 |     // copy pixels to output image
174 |     dst[((y * 2) * OWIDTH) + (x * 2)] = src[((y * 2) * OWIDTH) + (x * 2)];
175 |     dst[((y * 2 + 1) * OWIDTH) + (x * 2 + 1)] = src[((y * 2 + 1) * OWIDTH) + (x * 2 + 1)];
176 | 
177 |     // init
178 |     constexpr float wp[6] = { 2.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
179 | 
180 |     float3 rgb_data[4][4];
181 |     float y_data[4][4];
182 | 
183 |     // output: dst[((y * 2) * OWIDTH) + (x * 2 + 1)]
184 |     {
185 |         // sample supporting pixels in original image
186 |         for (int sy = -1; sy <= 2; ++sy) {
187 |             for (int sx = -1; sx <= 2; ++sx) {
188 |                 // clamp pixel locations
189 |                 const int csy = clamp((y * 2) + sx - sy, 0, OHEIGHT - 1);
190 |                 const int csx = clamp((x * 2) + sx + sy, 0, OWIDTH - 1);
191 | 
192 |                 // sample & add weighted components
193 |                 rgb_data[sy + 1][sx + 1] = src[(csy * OWIDTH + csx)];
194 | 
195 |                 y_data[sy + 1][sx + 1] = 0.2126f * rgb_data[sy + 1][sx + 1].x + 0.7152f * rgb_data[sy + 1][sx + 1].y + \
196 |                     0.0722f * rgb_data[sy + 1][sx + 1].z;
197 |             }
198 |         }
199 | 
200 |         const float3 min_sample = min4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]);
201 |         const float3 max_sample = max4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]);
202 | 
203 |         const float d_edge = diagonal_edge(y_data, wp);
204 | 
205 |         const float3 rgb1 = W3 * (rgb_data[0][3] + rgb_data[3][0]) + W4 * (rgb_data[1][2] + rgb_data[2][1]);
206 |         const float3 rgb2 = W3 * (rgb_data[0][0] + rgb_data[3][3]) + W4 * (rgb_data[1][1] + rgb_data[2][2]);
207 | 
208 |         // generate and write result
209 |         float3 rgbf = (d_edge <= 0.0f) ? rgb1 : rgb2;
210 | 
211 |         // anti-ringing, clamp
212 |         rgbf = clamp(rgbf, min_sample, max_sample);
213 | 
214 |         // output
215 |         dst[((y * 2) * OWIDTH) + (x * 2 + 1)] = rgbf;
216 |     }
217 | 
218 |     // output: dst[((y * 2 + 1) * OWIDTH) + (x * 2)]
219 |     {
220 |         // sample supporting pixels in original image
221 |         for (int sy = -1; sy <= 2; ++sy) {
222 |             for (int sx = -1; sx <= 2; ++sx) {
223 |                 // clamp pixel locations
224 |                 const int csy = clamp((y * 2) + sx - sy + 1, 0, OHEIGHT - 1);
225 |                 const int csx = clamp((x * 2) + sx + sy - 1, 0, OWIDTH - 1);
226 | 
227 |                 // sample & add weighted components
228 |                 rgb_data[sy + 1][sx + 1] = src[csy * OWIDTH + csx];
229 | 
230 |                 y_data[sy + 1][sx + 1] = 0.2126f * rgb_data[sy + 1][sx + 1].x + 0.7152f * rgb_data[sy + 1][sx + 1].y + \
231 |                     0.0722f * rgb_data[sy + 1][sx + 1].z;
232 |             }
233 |         }
234 | 
235 |         const float3 min_sample = min4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]);
236 |         const float3 max_sample = max4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]);
237 | 
238 |         const float d_edge = diagonal_edge(y_data, wp);
239 | 
240 |         const float3 rgb1 = W3 * (rgb_data[0][3] + rgb_data[3][0]) + W4 * (rgb_data[1][2] + rgb_data[2][1]);
241 |         const float3 rgb2 = W3 * (rgb_data[0][0] + rgb_data[3][3]) + W4 * (rgb_data[1][1] + rgb_data[2][2]);
242 | 
243 |         // generate and write result
244 |         float3 rgbf = (d_edge <= 0.0f) ? rgb1 : rgb2;
245 | 
246 |         // anti-ringing, clamp
247 |         rgbf = clamp(rgbf, min_sample, max_sample);
248 | 
249 |         // output
250 |         dst[((y * 2 + 1) * OWIDTH) + (x * 2)] = rgbf;
251 |     }
252 | }
253 | 
254 | extern "C"
255 | __global__ void super_xbr_pass3(const float3 * __restrict__ src, float3 * __restrict__ dst) {
256 |     // src: 2W x 2H, dst: 2W x 2H
257 |     // x: 0:2W:1, y: 0:2H:1
258 | 
259 |     const int x = blockIdx.x * blockDim.x + threadIdx.x;
260 |     const int y = blockIdx.y * blockDim.y + threadIdx.y;
261 | 
262 |     if (x >= OWIDTH || y >= OHEIGHT)
263 |         return;
264 | 
265 |     // init
266 |     constexpr float wp[6] = { 2.0f, 1.0f, -1.0f, 4.0f, -1.0f, 1.0f };
267 | 
268 |     float3 rgb_data[4][4];
269 |     float y_data[4][4];
270 | 
271 |     // sample supporting pixels in original image
272 |     for (int sy = -2; sy <= 1; ++sy) {
273 |         const int csy = clamp(y + sy, 0, OHEIGHT - 1);
274 | 
275 |         for (int sx = -2; sx <= 1; ++sx) {
276 |             // clamp pixel locations
277 |             const int csx = clamp(x + sx, 0, OWIDTH - 1);
278 | 
279 |             // sample & add weighted components
280 |             rgb_data[sy + 2][sx + 2] = src[csy * OWIDTH + csx];
281 | 
282 |             y_data[sy + 2][sx + 2] = 0.2126f * rgb_data[sy + 2][sx + 2].x + 0.7152f * rgb_data[sy + 2][sx + 2].y + \
283 |                 0.0722f * rgb_data[sy + 2][sx + 2].z;
284 |         }
285 |     }
286 | 
287 |     const float3 min_sample = min4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]);
288 |     const float3 max_sample = max4(rgb_data[1][1], rgb_data[2][1], rgb_data[1][2], rgb_data[2][2]);
289 | 
290 |     const float d_edge = diagonal_edge(y_data, wp);
291 | 
292 |     const float3 rgb1 = W1 * (rgb_data[0][3] + rgb_data[3][0]) + W2 * (rgb_data[1][2] + rgb_data[2][1]);
293 |     const float3 rgb2 = W1 * (rgb_data[0][0] + rgb_data[3][3]) + W2 * (rgb_data[1][1] + rgb_data[2][2]);
294 | 
295 |     // generate and write result
296 |     float3 rgbf = (d_edge <= 0.0f) ? rgb1 : rgb2;
297 | 
298 |     // anti-ringing, clamp
299 |     rgbf = clamp(rgbf, min_sample, max_sample);
300 | 
301 |     // output
302 |     dst[y * OWIDTH + x] = rgbf;
303 | }


--------------------------------------------------------------------------------
/Collections/examples/Super-xBR_cupy/super-xbr_cupy.vpy:
--------------------------------------------------------------------------------
 1 | """Super-xBR in CuPy"""
 2 | 
 3 | from string import Template
 4 | 
 5 | import cupy as cp
 6 | import vapoursynth as vs
 7 | from vapoursynth import core
 8 | import muvsfunc_numpy as mufnp
 9 | 
10 | 
11 | # Load source clip. Only RGBS is supported
12 | src = core.std.BlankClip(format=vs.RGBS)
13 | 
14 | 
15 | # params of Super-xBR
16 | # Super-xBR upscale an image by a factor of 2
17 | wgt1 = 0.129633
18 | wgt2 = 0.175068
19 | 
20 | # whether to enable '--use_fast_math' in NVRTC runtime compilation
21 | # to make use of fast math operations
22 | fast = False
23 | 
24 | # CUDA kernel execution configuration
25 | blk_size = (8, 8)
26 | 
27 | # pre-processing
28 | if src.format.id != vs.RGBS:
29 |     raise vs.Error("Super-xBR: Only 32-bit float RGB is supported!")
30 | 
31 | 
32 | # load CUDA kernel
33 | with open('super-xbr.cu', 'r') as f:
34 |     kernel_source_code = f.read()
35 | 
36 | kernel_source_code = Template(kernel_source_code)
37 | kernel_source_code = kernel_source_code.substitute(
38 |     width=src.width, height=src.height, 
39 |     wgt1=wgt1, wgt2=wgt2)
40 | 
41 | 
42 | if fast:
43 |     pass1 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass1', 
44 |         options=('--use_fast_math', '--std=c++11'))
45 |     pass2 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass2', 
46 |         options=('--use_fast_math', '--std=c++11'))
47 |     pass3 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass3', 
48 |         options=('--use_fast_math', '--std=c++11'))
49 | else:
50 |     pass1 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass1', 
51 |         options=('--std=c++11', ))
52 |     pass2 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass2', 
53 |         options=('--std=c++11', ))
54 |     pass3 = cp.RawKernel(code=kernel_source_code, name='super_xbr_pass3', 
55 |         options=('--std=c++11', ))
56 | 
57 | 
58 | # create NumPy function
59 | def superxbr_core(h_input, pass1, pass2, pass3):
60 |     h, w, _ = h_input.shape
61 |     d_input = cp.asarray(h_input)
62 | 
63 |     d_output = cp.zeros((h * 2, w * 2, 3), dtype=h_input.dtype)
64 |     d_tmp = cp.zeros((h * 2, w * 2, 3), dtype=h_input.dtype)
65 | 
66 |     pass1(((w + blk_size[0] - 1) // blk_size[0], (h + blk_size[1] - 1) // blk_size[1]), blk_size, (d_input, d_output))
67 |     pass2(((w + blk_size[0] - 1) // blk_size[0], (h + blk_size[1] - 1) // blk_size[1]), blk_size, (d_output, d_tmp))
68 |     pass3(((w * 2 + blk_size[0] - 1) // blk_size[0], (h * 2 + blk_size[1] - 1 )// blk_size[1]), blk_size, (d_tmp, d_output))
69 | 
70 |     h_out = cp.asnumpy(d_output)
71 | 
72 |     return h_out
73 | 
74 | 
75 | # process
76 | res = mufnp.numpy_process(
77 |         [core.std.BlankClip(src, width=src.width*2, height=src.height*2), src], 
78 |         superxbr_core, pass1=pass1, pass2=pass2, pass3=pass3, 
79 |         input_per_plane=False, output_per_plane=False, channels_last=True, 
80 |         omit_first_clip=True)
81 | 
82 | 
83 | res.set_output()


--------------------------------------------------------------------------------
/Collections/examples/sigma_filter_numba.vpy:
--------------------------------------------------------------------------------
 1 | """Sigma Filter in Numba"""
 2 | 
 3 | from numba import jit, prange
 4 | 
 5 | import vapoursynth as vs
 6 | from vapoursynth import core
 7 | import numpy as np
 8 | from functools import partial
 9 | 
10 | 
11 | # Load source clip. Only GRAYS is supported
12 | src = core.std.BlankClip(format=vs.GRAYS)
13 | 
14 | # params of mufnp.SigmaFilter()
15 | radius = 3
16 | thr = 0.01
17 | 
18 | 
19 | # pre-processing
20 | if src.format.id != vs.GRAYS:
21 |     raise TypeError('Only 32-bit float grayscale input is supported!')
22 | 
23 | 
24 | _is_api4: bool = hasattr(vs, "__api_version__") and vs.__api_version__.api_major == 4
25 | 
26 | 
27 | def _get_array(frame, plane, read=True):
28 |     if not read and frame.readonly:
29 |         raise ValueError("Frame is readonly")
30 | 
31 |     if _is_api4:
32 |         return frame[plane]
33 |     else:
34 |         if read:
35 |             return frame.get_read_array(plane)
36 |         else:
37 |             return frame.get_write_array(plane)
38 | 
39 | 
40 | def erase_module(func):
41 |     """Erase '__module__' attribute of a user-defined function that breaks numba"""
42 | 
43 |     if hasattr(func, '__module__') and func.__module__ == '__vapoursynth__':
44 |         func.__module__ = None
45 | 
46 |     return func
47 | 
48 | 
49 | @jit(nopython=True, nogil=True)
50 | @erase_module
51 | def clamp(val, low, high):
52 |     return min(max(val, low), high)
53 | 
54 | 
55 | @jit(nopython=True, nogil=True, fastmath=True, parallel=False)
56 | @erase_module
57 | def sigma_filter(src, dst, radius, threshold):
58 |     height = src.shape[0]
59 |     width = src.shape[1]
60 | 
61 |     for y in prange(height):
62 |         for x in range(width):
63 |             center = src[y, x]
64 |             acc = 0.
65 |             count = 0
66 | 
67 |             for j in range(-radius, radius + 1):
68 |                 for i in range(-radius, radius + 1):
69 |                     val = src[clamp(y + j, 0, height - 1), clamp(x + i, 0, width - 1)]
70 | 
71 |                     if abs(center - val) < threshold:
72 |                         acc += val
73 |                         count += 1
74 | 
75 |             dst[y, x] = acc / count
76 | 
77 |     return
78 | 
79 | 
80 | def executor(n, f, radius, thr):
81 |     fout = f.copy()
82 | 
83 |     src = np.asarray(_get_array(f, 0, read=True))
84 |     dst = np.asarray(_get_array(fout, 0, read=False))
85 | 
86 |     sigma_filter(src, dst, radius, thr)
87 | 
88 |     return fout
89 | 
90 | 
91 | # process
92 | res = core.std.ModifyFrame(src, src, partial(executor, radius=radius, thr=thr))
93 | 
94 | # res = mufnp.SigmaFilter(src, radius=radius, thr=thr)
95 | 
96 | res.set_output()
97 | 


--------------------------------------------------------------------------------
/Collections/examples/super_resolution_mxnet.vpy:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' # 0: False 1: Default 2: Full
 3 | 
 4 | import mxnet as mx
 5 | 
 6 | import vapoursynth as vs
 7 | from vapoursynth import core
 8 | import muvsfunc as muf
 9 | import muvsfunc_numpy as mufnp
10 | 
11 | # super resolution using MXNet
12 | 
13 | 
14 | # global params
15 | src = core.std.BlankClip(width=720, height=480, length=1000, format=vs.RGBS) # can be RGB/YUV/GRAY
16 | sr_algorithm = 0 # 0: waifu2x, 1: VDSR (faster to slower)
17 | device_id = 0 # -1: CPU, 0, 1, ...: GPU
18 | 
19 | 
20 | # params of the algos
21 | # (download link for models: https://github.com/WolframRhodium/Super-Resolution-Zoo )
22 | # use the information provided in "info.md" in the model's folder to set the paramter "sr_args"
23 | if sr_algorithm == 0:
24 |     sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 
25 |         device_id=device_id, block_w=128, block_h=128, 
26 |         up_scale=2)
27 | 
28 | 
29 |     # advanced I: padded upsampling to reduce blocking artifacts when small patch size is used
30 | 
31 |     r"""
32 |     # both NumPy and C++ versions are available
33 |     sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 
34 |         device_id=device_id, block_w=128, block_h=128, 
35 |         up_scale=2, pre_upscale=False, pad=(0,5,0,5), crop=(0,10,0,10))
36 |     """
37 | 
38 |     r"""
39 |     # only NumPy version is available
40 |     sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 
41 |         device_id=device_id, block_w=128, block_h=128, 
42 |         up_scale=2, pre_upscale=False, pad=(5,5,5,5), crop=(10,10,10,10))
43 |     """
44 | 
45 |     """ explanation:
46 |     Suppose we want to upsample a 48x48 patch in an image. Let's denote such patch as X.
47 |     The size of patch X is too small so that it's very likely to introduce blocking artifacts in the upsampled output.
48 |     Thus we may want to feed the network with a 58x58 patch Y, where X is located at the center of Y.
49 |     After processing by the network, we can then crop the output, to obtain an upsampled version of X with less blocking artifacts.
50 | 
51 |     Such procedure can be denoted as "pad=(5, 5, 5, 5), crop=(10, 10, 10, 10)".
52 |     The value 5 is obtained by (58-48)/2=5, and the value 10 is obtained by 5*2=10, where 2 is the upsampling factor.
53 |     Note that if pre_upscale is "True", the upsampling factor is always 1, regardless the value of "up_scale".
54 |     """
55 | 
56 | 
57 |     # advanced II: multi-GPU data parallelism
58 | 
59 |     r"""
60 |     # only C++ version is available
61 | 
62 |     # 2 GPUS
63 |     sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 
64 |         device_id=[0, 1], block_w=128, block_h=128, 
65 |         up_scale=2)
66 | 
67 |     # 2 Queues
68 |     sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 
69 |         device_id=[0, 0], block_w=128, block_h=128, 
70 |         up_scale=2)
71 | 
72 |     # 4 Queues 2 GPUs
73 |     sr_args = dict(model_filename=r'waifu2x\upconv_7_anime_style_art_rgb\scale2.0x_model', 
74 |         device_id=[0, 1, 0, 1], block_w=128, block_h=128, 
75 |         up_scale=2)
76 |     """
77 | 
78 | # VDSR
79 | elif sr_algorithm == 1:
80 |     sr_args = dict(model_filename=r'VDSR\pytorch-vdsr@twtygqyy\VDSR', 
81 |         device_id=device_id, block_w=128, block_h=128, 
82 |         up_scale=2, is_rgb_model=False, pre_upscale=True)
83 | 
84 | 
85 | # sr = mufnp.super_resolution(src, **sr_args) # using NumPy
86 | sr = muf.super_resolution(src, **sr_args) # using C++ plugin
87 | 
88 | sr.set_output()


--------------------------------------------------------------------------------
/Collections/examples/super_resolution_opencv.vpy:
--------------------------------------------------------------------------------
  1 | # super resolution using OpenCV
  2 | # note: the input image to the network is not cropped, which might triggers out-of-memory error.
  3 | 
  4 | import os
  5 | 
  6 | # Set OpenCL device in format `<Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<DeviceName or ID>`
  7 | # examples: 'AMD:GPU:', ':GPU:1', 'Intel:CPU:', 
  8 | # https://github.com/opencv/opencv/wiki/OpenCL-optimizations#opencv-opencl-configuration-options
  9 | os.environ['OPENCV_OPENCL_DEVICE'] = 'NVIDIA:GPU:' # use GPU to accelerate processing
 10 | 
 11 | 
 12 | import vapoursynth as vs
 13 | from vapoursynth import core
 14 | import cv2
 15 | import mvsfunc as mvf
 16 | import muvsfunc_numpy as mufnp
 17 | 
 18 | 
 19 | # global params
 20 | src = core.std.BlankClip(width=640, height=360, length=1000, format=vs.RGBS) # can be RGB/YUV/GRAY
 21 | sr_algorithm = 0 # 0: waifu2x, 1: IDN, 2: TSCN, 3: VDSR, 4: DBPN (faster to slower)
 22 | 
 23 | if 'GPU' in os.environ['OPENCV_OPENCL_DEVICE']:
 24 |     if cv2.ocl.haveOpenCL() and cv2.ocl.useOpenCL():
 25 |         backend = cv2.dnn.DNN_BACKEND_OPENCV
 26 |         target = cv2.dnn.DNN_TARGET_OPENCL # available on NVIDIA GPU since OpenCV 4.0.1, but only works on Intel GPU before OpenCV 3.4.2
 27 |     else:
 28 |         backend = cv.dnn.DNN_BACKEND_DEFAULT
 29 |         target = cv2.dnn.DNN_TARGET_CPU
 30 | 
 31 | 
 32 | # params of the algos
 33 | if sr_algorithm == 0:
 34 |     # https://github.com/php-opencv/php-opencv-examples/tree/master/models/waifu2x
 35 |     # other models can be found at
 36 |     # https://github.com/HomeOfVapourSynthEvolution/VapourSynth-Waifu2x-caffe/tree/master/Waifu2x-caffe/models
 37 |     sr_args = dict(prototxt=r'scale2.0x_model.prototxt', 
 38 |         caffe_model=r'scale2.0x_model.caffemodel', up_scale=2, is_rgb_model=True, pad=(7,7,7,7))
 39 | 
 40 | elif sr_algorithm == 1:
 41 |     # https://github.com/Zheng222/IDN-Caffe/tree/master/test/caffemodel
 42 |     sr_args = dict(prototxt=r'IDN_x2_deploy.prototxt', 
 43 |         caffe_model=r'IDN_x2.caffemodel', up_scale=2, is_rgb_model=False, pad=(1,1,1,1), crop=(1,2,1,2),
 44 |         upscale_uv=False, merge_residual=True)
 45 | 
 46 | elif sr_algorithm == 2:
 47 |     # https://github.com/Zheng222/TSCN/tree/master/test
 48 |     sr_args = dict(prototxt=r'TSCN_x2_deploy.prototxt',
 49 |         caffe_model=r'TSCN_x2.caffemodel', up_scale=2, is_rgb_model=False)
 50 | 
 51 | elif sr_algorithm == 3:
 52 |     # https://github.com/huangzehao/caffe-vdsr/tree/master/Train
 53 |     sr_args = dict(prototxt=r'VDSR_net_deploy.prototxt', 
 54 |         caffe_model=r'VDSR_Adam.caffemodel', up_scale=2, is_rgb_model=False, pre_upscale=True, upscale_uv=False)
 55 | 
 56 | elif sr_algorithm == 4:
 57 |     # https://github.com/alterzero/DBPN-caffe
 58 |     # https://drive.google.com/drive/folders/1ahbeoEHkjxoo4NV1wReOmpoRWbl448z-?usp=sharing
 59 |     sr_args = dict(prototxt=r'DBPN_mat_2x.prototxt',
 60 |         caffe_model=r'DBPN_2x.caffemodel', up_scale=2, is_rgb_model=True)
 61 | 
 62 | 
 63 | # internel functions
 64 | def channel_last(arr):
 65 |     """Convert a CHW array to HWC."""
 66 |     ndim = arr.ndim
 67 |     return arr.swapaxes(ndim - 3, ndim - 2).swapaxes(ndim - 2, ndim - 1)
 68 | 
 69 | 
 70 | def super_resolution_core(img, net, pad=None, crop=None):
 71 |     if pad is not None:
 72 |         img = cv2.copyMakeBorder(img, *pad, 1)
 73 | 
 74 |     blob = cv2.dnn.blobFromImage(img)
 75 | 
 76 |     net.setInput(blob, '')
 77 | 
 78 |     super_res = net.forward()
 79 | 
 80 |     if img.ndim == 2:
 81 |         if crop is not None:
 82 |             return super_res[0, 0, crop[0]:-crop[1], crop[2]:-crop[3]]
 83 |         else:
 84 |             return super_res[0, 0, :, :]
 85 |     else:
 86 |         # the output is BGR rather than RGB so channel reversal is needed
 87 |         if crop is not None:
 88 |             return channel_last(super_res[0, ::-1, crop[0]:-crop[1], crop[2]:-crop[3]])
 89 |         else:
 90 |             return channel_last(super_res[0, ::-1, :, :])
 91 | 
 92 | 
 93 | def run_super_resolution(clip, prototxt, caffe_model, up_scale=2, is_rgb_model=True, pad=None, crop=None, backend=None, target=None):
 94 |     """ Super-Resolution without color family hadling
 95 |     """
 96 | 
 97 |     net = cv2.dnn.readNetFromCaffe(prototxt, caffe_model)
 98 | 
 99 |     if backend is not None:
100 |         net.setPreferableBackend(backend)
101 | 
102 |     if target is not None:
103 |         net.setPreferableTarget(target)
104 | 
105 |     if up_scale != 1:
106 |         blank = core.std.BlankClip(clip, width=clip.width*up_scale, height=clip.height*up_scale)
107 |         super_res = mufnp.numpy_process([blank, clip], super_resolution_core, net=net, 
108 |             input_per_plane=(not is_rgb_model), output_per_plane=(not is_rgb_model), pad=pad, crop=crop, 
109 |             omit_first_clip=True)
110 |     else:
111 |         super_res = mufnp.numpy_process(clip, super_resolution_core, net=net, 
112 |             input_per_plane=(not is_rgb_model), output_per_plane=(not is_rgb_model), pad=pad, crop=crop)
113 | 
114 |     return super_res
115 | 
116 | 
117 | def super_resolution(clip, prototxt, caffe_model, up_scale=2, is_rgb_model=True, pad=None, crop=None, backend=None, target=None, pre_upscale=False, upscale_uv=False, merge_residual=False):
118 |     """ Super-Resolution with color family hadling
119 | 
120 |     The color space of the output depends on the algorithm
121 |     """
122 | 
123 |     isGray = clip.format.color_family == vs.GRAY
124 |     isRGB = clip.format.color_family == vs.RGB
125 | 
126 |     if is_rgb_model and not isRGB:
127 |         clip = mvf.ToRGB(clip, depth=32)
128 | 
129 |     elif not is_rgb_model:
130 |         if isRGB:
131 |             clip = mvf.ToYUV(clip, depth=32)
132 | 
133 |         if not isGray and not upscale_uv: # isYUV/RGB and only upscale Y
134 |             clip = mvf.GetPlane(clip)
135 | 
136 |     clip = mvf.Depth(clip, depth=32)
137 | 
138 |     if pre_upscale:
139 |         clip = core.resize.Bicubic(clip, clip.width*up_scale, clip.height*up_scale, filter_param_a=0, filter_param_b=0.5)
140 |         up_scale = 1
141 | 
142 |     super_res = run_super_resolution(clip, prototxt=prototxt, caffe_model=caffe_model, 
143 |         up_scale=up_scale, is_rgb_model=is_rgb_model, pad=pad, crop=crop, backend=backend, target=target)
144 | 
145 |     if merge_residual:
146 |         low_res = core.resize.Bicubic(clip, super_res.width, super_res.height, filter_param_a=0, filter_param_b=0.5)
147 |         super_res = core.std.Expr([super_res, low_res], ['x y +'])
148 | 
149 |     return super_res
150 | 
151 | sr = super_resolution(src, **sr_args, backend=backend, target=target)
152 | 
153 | # sr = core.caffe.Waifu2x(src, noise=-1, scale=2, cudnn=True, model=3)
154 | 
155 | sr.set_output()


--------------------------------------------------------------------------------
/Collections/muvsfunc_misc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Miscellaneous functions:
  3 |     GPS
  4 |     gauss
  5 |     freq_merge
  6 |     band_merge
  7 |     detail_enhancement
  8 |     SSR
  9 |     Wiener2
 10 |     tv
 11 |     BernsteinFilter
 12 |     GPA
 13 |     XDoG
 14 |     sbr_detail
 15 |     fade
 16 |     fast_mandelbrot
 17 | """
 18 | 
 19 | import functools
 20 | import math
 21 | import vapoursynth as vs
 22 | from vapoursynth import core
 23 | import muvsfunc as muf
 24 | import mvsfunc as mvf
 25 | import typing
 26 | 
 27 | _is_api4: bool = hasattr(vs, "__api_version__") and vs.__api_version__.api_major == 4
 28 | 
 29 | def _get_array(frame, plane, read=True):
 30 |     if not read and frame.readonly:
 31 |         raise ValueError("Frame is readonly")
 32 | 
 33 |     if _is_api4:
 34 |         return frame[plane]
 35 |     else:
 36 |         if read:
 37 |             return frame.get_read_array(plane)
 38 |         else:
 39 |             return frame.get_write_array(plane)
 40 | 
 41 | def GPS(clip, gamma=None):
 42 |     """Get Power Spectrum
 43 | 
 44 |     Args:
 45 |         gamma: It enables viewing small valued responses in the spectral display.
 46 | 
 47 |     """
 48 | 
 49 |     w = clip.width
 50 |     h = clip.height
 51 |     max_w_h = max(w, h)
 52 | 
 53 |     clip = core.std.AddBorders(clip, right=max_w_h - w, bottom=max_w_h - h)
 54 |     clip = core.vcfreq.F2Quiver(clip, test=1, frad=16, fspec=[1,2,0,1,7], gamma=gamma)
 55 |     clip = core.std.CropRel(clip, 0, max_w_h // 2).resize.Bicubic(w, h)
 56 |     return clip
 57 | 
 58 | 
 59 | def gauss(clip, sigma=None, algo=0):
 60 |     """Gaussian filter using tcanny
 61 |     Borrowed from https://github.com/IFeelBloated/Oyster
 62 | 
 63 |     Args:
 64 |         sigma: Standard deviation of gaussian.
 65 | 
 66 |         algo: (int) Algorithm. 0:auto, 1:tcanny.TCanny(mode=-1), 2:bilateral.Gaussian()
 67 | 
 68 |     """
 69 | 
 70 |     if (algo == 0 and sigma is not None and sigma >= 10) or algo == 2:
 71 |         return core.bilateral.Gaussian(clip, sigma=sigma)
 72 |     else: # algo == 1 or (algo == 0 and (sigma is None or sigma < 10))
 73 |         return core.tcanny.TCanny(clip, sigma=sigma, mode=-1)
 74 | 
 75 | 
 76 | def freq_merge(src, flt, fun=None, **fun_args):
 77 |     """Replace high freq component in "src" with high freq component in "flt"
 78 |     Borrowed from https://github.com/IFeelBloated/Oyster
 79 | 
 80 |     Args:
 81 |         src, flt: Input.
 82 | 
 83 |         fun: (function) A low-pass filter. Default is gaussian.
 84 |     """
 85 | 
 86 |     if fun is None or not callable(fun):
 87 |         fun = gauss
 88 | 
 89 |     low_src = fun(src, **fun_args)
 90 |     low_flt = fun(flt, **fun_args)
 91 |     return core.std.Expr([low_src, flt, low_flt], ['y z - x +'])
 92 | 
 93 | 
 94 | def band_merge(src, flt, fun=None, fun_args1=None, fun_args2=None, cascade=True):
 95 |     """Replace frequencies within a certain range in "src" with frequencies within a certain range in "flt"
 96 | 
 97 |     Args:
 98 |         src, flt: I nput.
 99 | 
100 |         fun: (function) A low-pass filter. Default is gaussian.
101 | 
102 |         cascade: (bool) Whether to cascade functions. Default is True.
103 | 
104 |     """
105 | 
106 |     if fun is None or not callable(fun):
107 |         fun = gauss
108 | 
109 |     if fun_args1 == None:
110 |         fun_args1 = {}
111 | 
112 |     if fun_args2 == None:
113 |         fun_args2 = {}
114 | 
115 |     low_src1 = fun(src, **fun_args1)
116 |     low_src2 = fun(low_src1 if cascade else src, **fun_args2)
117 |     low_flt1 = fun(flt, **fun_args1)
118 |     low_flt2 = fun(low_flt1 if cascade else flt, **fun_args2)
119 |     return core.std.Expr([low_flt1, low_flt2, src, low_src1, low_src2], ['x y - b + a - z +'])
120 | 
121 | 
122 | def detail_enhancement(clip, guidance=None, iter=3, radius=4, regulation=0.0005, fast=False, **args):
123 |     """Novel detail enhancement filter using guided filter and defilter
124 | 
125 |     Args:
126 |         clip: Gray scale.
127 |         guidance: Guidance clip.
128 | 
129 |     """
130 | 
131 |     return muf.DeFilter(clip, muf.GuidedFilter, guidance=guidance, radius=radius, regulation=regulation, fast=fast, iteration=iter, **args)
132 | 
133 | 
134 | def SSR(clip, sigma=50, full=None, **args):
135 |     """Single-scale Retinex
136 | 
137 |     Args:
138 |         clip: Input. Only the first plane will be processed.
139 | 
140 |         sigma: (int) Standard deviation of gaussian blur. Default is 50.
141 | 
142 |         full: (bool) Whether input clip is of full range. Default is None.
143 | 
144 |     Ref:
145 |         [1] Jobson, D. J., Rahman, Z. U., & Woodell, G. A. (1997). Properties and performance of a center/surround retinex. IEEE transactions on image processing, 6(3), 451-462.
146 | 
147 |     """
148 | 
149 |     bits = clip.format.bits_per_sample
150 |     sampleType = clip.format.sample_type
151 |     isGray = clip.format.color_family == vs.GRAY
152 | 
153 |     if not isGray:
154 |         clip_src = clip
155 |         clip = mvf.GetPlane(clip)
156 | 
157 |     lowFre = gauss(clip, sigma=sigma, **args)
158 | 
159 |     clip = mvf.Depth(clip, 32, fulls=full)
160 |     lowFre = mvf.Depth(lowFre, 32, fulls=full) # core.bilateral.Gaussian() doesn't support float input.
161 | 
162 |     expr = 'x 1 + log y 1 + log -'
163 |     clip = core.std.Expr([clip, lowFre], [expr])
164 | 
165 |     stats = core.std.PlaneStats(clip, plane=[0])
166 | 
167 |     # Dynamic range stretching
168 |     def Stretch(n, f, clip, core):
169 |         alpha = f.props['PlaneStatsMax'] - f.props['PlaneStatsMin']
170 |         beta = f.props['PlaneStatsMin']
171 | 
172 |         expr = 'x {beta} - {alpha} /'.format(beta=beta, alpha=alpha)
173 |         return core.std.Expr([clip], [expr])
174 | 
175 |     clip = core.std.FrameEval(clip, functools.partial(Stretch, clip=clip, core=core), prop_src=stats)
176 | 
177 |     clip = mvf.Depth(clip, depth=bits, sample=sampleType, fulld=full)
178 | 
179 |     if not isGray:
180 |         clip = core.std.ShufflePlanes([clip, clip_src], list(range(clip_src.format.num_planes)), clip_src.format.color_family)
181 | 
182 |     return clip
183 | 
184 | 
185 | def Wiener2(input, radius_v=3, radius_h=None, noise=None, **depth_args):
186 |     """2-D adaptive noise-removal filtering. (wiener2 from MATLAB)
187 | 
188 |     Wiener2 lowpass filters an intensity image that has been degraded by constant power additive noise.
189 |     Wiener2 uses a pixel-wise adaptive Wiener method based on statistics estimated from a local neighborhood of each pixel.
190 | 
191 |     Estimate of the additive noise power will not be returned.
192 | 
193 |     Args:
194 |         input: Input clip. Only the first plane will be processed.
195 | 
196 |         radius_v, radius_h: (int) Size of neighborhoods to estimate the local image mean and standard deviation. The size is (radius_v*2-1) * (radius_h*2-1).
197 |             If "radius_h" is None, it will be set to "radius_v".
198 |             Default is 3.
199 | 
200 |         noise: (float) Variance of addictive noise. If it is not given, average of all the local estimated variances will be used.
201 |             Default is {}.
202 | 
203 |         depth_args: (dict) Additional arguments passed to mvf.Depth() in the form of keyword arguments.
204 |             Default is {}.
205 | 
206 |     Ref:
207 |         [1] Lim, J. S. (1990). Two-dimensional signal and image processing. Englewood Cliffs, NJ, Prentice Hall, 1990, 710 p, p. 538, equations 9.26, 9.27, and 9.29.
208 |         [2] 2-D adaptive noise-removal filtering - MATLAB wiener2 - MathWorks (https://www.mathworks.com/help/images/ref/wiener2.html)
209 | 
210 |     """
211 | 
212 |     funcName = 'Wiener2'
213 | 
214 |     if not isinstance(input, vs.VideoNode) or input.format.num_planes > 1:
215 |         raise TypeError(funcName + ': \"input\" must be a gray-scale/single channel clip!')
216 | 
217 |     bits = input.format.bits_per_sample
218 |     sampleType = input.format.sample_type
219 | 
220 |     if radius_h is None:
221 |         radius_h = radius_v
222 | 
223 |     input32 = mvf.Depth(input, depth=32, sample=vs.FLOAT, **depth_args)
224 | 
225 |     localMean = muf.BoxFilter(input32, radius_h+1, radius_v+1)
226 |     localVar = muf.BoxFilter(core.std.Expr([input32], ['x dup *']), radius_h+1, radius_v+1)
227 |     localVar = core.std.Expr([localVar, localMean], ['x y dup * -'])
228 | 
229 |     if noise is None:
230 |         localVarStats = core.std.PlaneStats(localVar, plane=[0])
231 | 
232 |         def FLT(n, f, clip, core, localMean, localVar):
233 |             noise = f.props['PlaneStatsAverage']
234 | 
235 |             return core.std.Expr([clip, localMean, localVar], ['y z {noise} - 0 max z {noise} max / x y - * +'.format(noise=noise)])
236 | 
237 |         flt = core.std.FrameEval(input32, functools.partial(FLT, clip=input32, core=core, localMean=localMean, localVar=localVar), prop_src=[localVarStats])
238 |     else:
239 |         flt = core.std.Expr([input32, localMean, localVar], ['y z {noise} - 0 max z {noise} max / x y - * +'.format(noise=noise)])
240 | 
241 |     return mvf.Depth(flt, depth=bits, sample=sampleType, **depth_args)
242 | 
243 | 
244 | def tv(I, iter=5, dt=None, ep=1, lam=0, I0=None, C=0):
245 |     """Total Variation Denoising
246 | 
247 |     Args:
248 |         I: Input. Recommended to input floating type clip.
249 | 
250 |         iter: (int) Num of iterations. Default is 5.
251 | 
252 |         dt: (float) Time step. Default is ep/5.
253 | 
254 |         ep: (float) Epsilon (of gradient regularization). Default is 1.
255 | 
256 |         lam: (float) Fidelity term lambda. Default is 0.
257 | 
258 |         I0: (clip) Input (noisy) image. Default is "I".
259 | 
260 | 
261 |     Ref:
262 |         [1] Rudin, L. I., Osher, S., & Fatemi, E. (1992). Nonlinear total variation based noise removal algorithms. Physica D: Nonlinear Phenomena, 60(1-4), 259-268.
263 |         [2] Total Variation Denoising : http://visl.technion.ac.il/~gilboa/PDE-filt/tv_denoising.html
264 | 
265 |     """
266 | 
267 |     if dt is None:
268 |         dt = ep / 5
269 | 
270 |     if I0 is None:
271 |         I0 = I
272 | 
273 |     ep2 = ep * ep
274 | 
275 |     isFloat = I.format.sample_type == vs.FLOAT
276 |     neutral = 0 if isFloat else muf.scale(128, I.format.bits_per_sample)
277 | 
278 |     for i in range(iter):
279 |         I_x = core.std.Convolution(I, [-1, 0, 1], divisor=2, bias=neutral, mode='h') # correct
280 |         I_y = core.std.Convolution(I, [-1, 0, 1], divisor=2, bias=neutral, mode='v') # correct
281 |         I_xx = core.std.Convolution(I, [1, -2, 1], divisor=1 if isFloat else 4, bias=neutral, mode='h') # x4
282 |         I_yy = core.std.Convolution(I, [1, -2, 1], divisor=1 if isFloat else 4, bias=neutral, mode='v') # x4
283 |         Dp = core.std.Convolution(I, [1, 0, 0, 0, 0, 0, 0, 0, 1], divisor=2)
284 |         Dm = core.std.Convolution(I, [0, 0, 1, 0, 0, 0, 1, 0, 0], divisor=2)
285 |         I_xy = core.std.Expr([Dp, Dm], ['x y - 2 / {} +'.format(neutral)]) # correct
286 | 
287 |         if isFloat:
288 |             expr = 'x {dt} a {ep2} z dup * + * 2 y * z * b * - c {ep2} y dup * + * + {ep2} y dup * + z dup * + 1.5 pow / {lam} d x - {C} + * + * +'.format(dt=dt, ep2=ep2, lam=lam, C=C)
289 |         else: # isInteger
290 |             expr = 'x {dt} a {neutral} - 4 * {ep2} z {neutral} - dup * + * 2 y {neutral} - * z {neutral} - * b {neutral} - * - c {neutral} - 4 * {ep2} y {neutral} - dup * + * + {ep2} y {neutral} - dup * + z {neutral} - dup * + 1.5 pow / {lam} d x - {C} + * + * +'.format(dt=dt, neutral=neutral, ep2=ep2, lam=lam, C=C)
291 | 
292 |         I = core.std.Expr([I, I_x, I_y, I_xx, I_xy, I_yy, I0], [expr])
293 | 
294 |     return I
295 | 
296 | 
297 | def BernsteinFilter(clip, iter=30, **depth_args):
298 |     """Bernstein Filter
299 | 
300 |     Bernstein filter is an efficient filter solver, which can implicitly minimize the mean curvature.
301 | 
302 |     Internal precision is always float.
303 | 
304 |     Args:
305 |         clip: Input.
306 | 
307 |         iter: (int) Num of iterations. Default is 30
308 | 
309 |         depth_args: (dict) Additional arguments passed to mvf.Depth() in the form of keyword arguments.
310 |             Default is {}.
311 | 
312 |     Ref:
313 |         [1] Gong, Y. (2016, March). Bernstein filter: A new solver for mean curvature regularized models. In Acoustics, Speech and Signal Processing (ICASSP), 2016 IEEE International Conference on (pp. 1701-1705). IEEE.
314 | 
315 |     """
316 | 
317 |     bits = clip.format.bits_per_sample
318 |     sample = clip.format.sample_type
319 | 
320 |     clip = mvf.Depth(clip, depth=32, sample=vs.FLOAT, **depth_args)
321 | 
322 |     for i in range(iter):
323 |         d1 = core.std.Convolution(clip, [1, -2, 1], divisor=2, mode='h')
324 |         d2 = core.std.Convolution(clip, [1, -2, 1], divisor=2, mode='v')
325 |         clip = core.std.Expr([clip, d1, d2], ['y abs z abs < x y + x z + ?'])
326 | 
327 |     return mvf.Depth(clip, depth=bits, sample=sample, **depth_args)
328 | 
329 | 
330 | def GPA(clip, sigmaS=3, sigmaR=0.15, mode=0, iteration=0, eps=1e-3, **depth_args):
331 |     """Fast and Accurate Bilateral Filtering using Gaussian-Polynomial Approximation
332 | 
333 |     This filter approximates the bilateral filter when the range kernel is Gaussian.
334 |     The exponential function of the weight function of bilateral filter is approximated,
335 |     and the bilateral is therefore decomposed into a series of spatial convolutions.
336 | 
337 |     The number of iteration depends on the value of "sigmaR", which increases as "sigmaR" decreases.
338 |     A small value of "sigmaR" may lead to presicion problem.
339 | 
340 |     All the internal calculations are done at 32-bit float.
341 | 
342 |     Part of description of bilateral filter is copied from
343 |     https://github.com/HomeOfVapourSynthEvolution/VapourSynth-Bilateral
344 | 
345 |     Args:
346 |         clip: Input clip.
347 | 
348 |         sigmaS: (float) Sigma of Gaussian function to calculate spatial weight.
349 |             The scale of this parameter is equivalent to pixel distance.
350 |             Larger sigmaS results in larger filtering radius as well as stronger smoothing.
351 |             Default is 3.
352 | 
353 |         sigmaR: (float) Sigma of Gaussian function to calculate range weight.
354 |             The scale of this parameter is the same as pixel value ranging in [0,1].
355 |             Smaller sigmaR preserves edges better, may also leads to weaker smoothing.
356 |             It should be pointed out that a small "sigmaR" results in more iteration and higher error.
357 |             Default is 0.15.
358 | 
359 |         mode: (0 or 1) 0: Guassian bilateral filter, 1: Box bilateral filter
360 |             Default is 0.
361 | 
362 |         iteration: (int) Number of iteration or the order of approximation.
363 |             If it is 0, it is calculated automatically according to "sigmaR" and "eps".
364 |             Default is 0.
365 | 
366 |         eps: (float) Filtering Accuracy.
367 |             Default is 1e-3.
368 | 
369 |         depth_args: (dict) Additional arguments passed to mvf.Depth().
370 |             Default is {}.
371 | 
372 |     Ref:
373 |         [1] Chaudhury, K. N., & Dabhade, S. D. (2016). Fast and provably accurate bilateral filtering. IEEE Transactions on Image Processing, 25(6), 2519-2528.
374 |         [2] http://www.mathworks.com/matlabcentral/fileexchange/56158
375 | 
376 |     """
377 | 
378 |     def estimate_iteration(sigmaR, T, eps):
379 |         if sigmaR > 70:
380 |             return 10
381 |         elif sigmaR < 5:
382 |             return 800
383 |         else:
384 |             lam = (T / sigmaR) ** 2
385 |             p = 1 + math.log(lam)
386 |             q = -lam - math.log(eps)
387 |             t = q / math.e / lam
388 |             W = t - t ** 2 + 1.5 * t ** 3 - (8 / 3) * t ** 4
389 |             N = min(max(q / W, 10), 300)
390 | 
391 |             if sigmaR < 30:
392 |                 for i in range(5):
393 |                     N -= (N * math.log(N) - p * N - q) / (math.log(N) + 1 - p)
394 | 
395 |             return math.ceil(N)
396 | 
397 |     T = 0.5
398 |     bits = clip.format.bits_per_sample
399 |     sampleType = clip.format.sample_type
400 | 
401 |     if mode == 0: # Gaussian bilateral filter
402 |         Filter = functools.partial(core.tcanny.TCanny, sigma=sigmaS, mode=-1)
403 |     else: # Box bilateral filter
404 |         Filter = functools.partial(muf.BoxFilter, radius=sigmaS + 1)
405 | 
406 |     if iteration == 0:
407 |         iteration = estimate_iteration(sigmaR * 255, T, eps)
408 | 
409 |     clip = mvf.Depth(clip, depth=32, sample=vs.FLOAT, **depth_args)
410 | 
411 |     H = core.std.Expr(clip, f'x {T} - {sigmaR} /')
412 |     F = core.std.Expr(H, '-0.5 x dup * * exp')
413 |     G = core.std.BlankClip(clip, color=[1] * clip.format.num_planes)
414 |     P = core.std.BlankClip(clip, color=[0] * clip.format.num_planes)
415 |     Q = core.std.BlankClip(clip, color=[0] * clip.format.num_planes)
416 |     Fbar = Filter(F)
417 | 
418 |     for i in range(1, iteration+1):
419 |         sqrt_i = math.sqrt(i)
420 |         inv_sqrt_i = 1 / sqrt_i
421 |         Q = core.std.Expr([Q, G, Fbar], 'x y z * +')
422 |         F = core.std.Expr([H, F], f'x y * {inv_sqrt_i} *')
423 |         Fbar = Filter(F)
424 |         P = core.std.Expr([P, G, Fbar], f'x y z * {sqrt_i} * +')
425 |         G = core.std.Expr([H, G], f'x y * {inv_sqrt_i} *')
426 | 
427 |     res = core.std.Expr([P, Q], f'x {sigmaR} * y 1e-5 + / {T} +')
428 | 
429 |     return mvf.Depth(res, depth=bits, sample=sampleType, **depth_args)
430 | 
431 | 
432 | def XDoG(clip, sigma=1.0, k=1.6, p=20, epsilon=0.7, lamda=0.01):
433 |     """XDoG - An eXtended difference-of-Gaussian filter
434 | 
435 |     Args:
436 |         clip: Input clip.
437 | 
438 |         sigma: (float) Strength of gaussian filter.
439 |             Default is 1.
440 | 
441 |         k: (float) Amplifier of "sigma" for second gaussian filtering.
442 |             Default is 1.6.
443 | 
444 |         p: (float) Amplifier of difference of gaussian.
445 |             Default is 20.
446 | 
447 |         epsilon: (float, 0~1) Threshold of DoG response. Scaled automatically.
448 |             Default is 0.7.
449 | 
450 |         lamda: (float) Multiplier in the thresholding function.
451 |             Default is 0.01.
452 | 
453 |     Ref:
454 |         [1] Winnemöller, H., Kyprianidis, J. E., & Olsen, S. C. (2012). XDoG: an extended difference-of-Gaussians compendium including advanced image stylization. Computers & Graphics, 36(6), 740-753.
455 | 
456 |     """
457 | 
458 |     bits = clip.format.bits_per_sample
459 |     peak =  (1 << bits) - 1
460 |     epsilon = muf.scale(epsilon, bits)
461 | 
462 |     f1 = core.tcanny.TCanny(clip, sigma=sigma, mode=-1)
463 |     f2 = core.tcanny.TCanny(clip, sigma=sigma * k, mode=-1)
464 | 
465 |     return core.std.Expr([f1, f2], f'x y - {p} * x + {epsilon} >= 1 2 2 2 x y - {p} * x + {epsilon} - {lamda} * * exp 1 + / - ? {peak} *')
466 | 
467 | 
468 | def sbr_detail(clip, r=1, planes=None, mode=1):
469 |     """sbr() inspired detail detection algorithm
470 | 
471 |     Code is modified from sbr() in https://github.com/HomeOfVapourSynthEvolution/havsfunc/blob/master/havsfunc.py.
472 | 
473 |     args:
474 |         clip: RGB/YUV/Gray, 8..16 bit integer, 16..32 bit float.
475 | 
476 |         r: (int) Radius in pixels of the smoothing filter.
477 |             Default is 1.
478 | 
479 |         planes: (int []) Whether to process the corresponding plane.
480 |             By default, every plane will be processed.
481 |             The unprocessed planes will be copied from "input".
482 | 
483 |         mode: (int, 0~2) Detail detection method, insensitive to sensitive.
484 |             The result of mode 2 is a combination os mode 1 and mode 2.
485 |             Default is 1.
486 |     """
487 | 
488 |     funcName = 'sbr_detail'
489 | 
490 |     if not isinstance(clip, vs.VideoNode):
491 |         raise TypeError(funcName + ': This is not a clip')
492 | 
493 |     if planes is None:
494 |         planes = list(range(clip.format.num_planes))
495 |     elif isinstance(planes, int):
496 |         planes = [planes]
497 | 
498 |     if clip.format.sample_type == vs.INTEGER:
499 |         neutral = 1 << (clip.format.bits_per_sample - 1)
500 |         peak = (1 << clip.format.bits_per_sample) - 1
501 |     else: # clip.format.sample_type == vs.FLOAT
502 |         neutral = 0.5
503 |         peak = 1.0
504 | 
505 |     matrix1 = [1, 2, 1, 2, 4, 2, 1, 2, 1] # RemoveGrain(11)
506 |     matrix2 = [1, 1, 1, 1, 1, 1, 1, 1, 1] # RemoveGrain(20)
507 | 
508 |     RG11 = core.std.Convolution(clip, matrix=matrix1, planes=planes)
509 |     for i in range(r - 1):
510 |         RG11 = core.std.Convolution(RG11, matrix=matrix2, planes=planes)
511 | 
512 |     RG11D = core.std.MakeDiff(clip, RG11, planes=planes)
513 | 
514 |     RG11DS = core.std.Convolution(RG11D, matrix=matrix1, planes=planes)
515 |     for i in range(r - 1):
516 |         RG11DS = core.std.Convolution(RG11DS, matrix=matrix2, planes=planes)
517 | 
518 |     if mode == 0:
519 |         expr = f'x y - x {neutral} - * 0 < {peak} 0 ?'
520 |     elif mode == 1:
521 |         expr = f'x y - abs x {neutral} - abs < {peak} 0 ?'
522 |     elif mode == 2:
523 |         expr = f'x y - x {neutral} - * 0 < x y - abs x {neutral} - abs < or {peak} 0 ?'
524 | 
525 |     detail_mask = core.std.Expr([RG11D, RG11DS], [expr if i in planes else '' for i in range(clip.format.num_planes)])
526 | 
527 |     return detail_mask
528 | 
529 | 
530 | def fade(clip, start=0, end=None, mode='in', base=None):
531 |     """Fade-in/out effect implementation
532 | 
533 |     args:
534 |         clip: RGB/YUV/Gray, 8..16 bit integer, 16..32 bit float.
535 | 
536 |         start: (int) Frame number of started frame.
537 |             Default is 0.
538 | 
539 |         end: (int) Frame number of ended frame.
540 |             Default points to the last frame.
541 | 
542 |         mode: ("in" or "out") Fade mode.
543 |             Default is "in".
544 | 
545 |         base: (clip) Base clip of fade effect.
546 |             Default is black picture.
547 |     """
548 | 
549 |     funcName = 'fade'
550 | 
551 |     if not isinstance(clip, vs.VideoNode):
552 |         raise TypeError(funcName + ': This is not a clip')
553 | 
554 |     if end is None:
555 |         end = clip.num_frames - 1
556 | 
557 |     def fade_core(n, clip, start=None, end=None, mode=None, base=None):
558 |         if n < start or n > end or end - start <= 0:
559 |             return clip
560 |         else:
561 |             length = end - start
562 | 
563 |             if mode == 'in':
564 |                 i = (n - start) / length
565 |             elif mode == 'out':
566 |                 i = (end - n) / length
567 |             else:
568 |                 raise ValueError('Unknown fading mode.')
569 | 
570 |             if base is None:
571 |                 y_expr = 'x {} *'.format(i)
572 | 
573 |                 if clip.format.color_family != vs.YUV or clip.format.sample_type == vs.FLOAT:
574 |                     return core.std.Expr([clip], [y_expr])
575 |                 else:
576 |                     neutral = 1 << (clip.format.bits_per_sample - 1)
577 |                     uv_expr = 'x {} * {} +'.format(i, (1 - i) * neutral)
578 |                     return core.std.Expr([clip], [y_expr, uv_expr])
579 |             else:
580 |                 return core.std.Expr([clip, base], ['x {} * y {} * +'.format(i, 1 - i)])
581 | 
582 |     return core.std.FrameEval(clip, functools.partial(fade_core, clip=clip, start=start, end=end, mode=mode, base=base))
583 | 
584 | 
585 | def fast_mandelbrot(width=1920, height=1280, iterations=50,
586 |     real_range=(-2, 1), imag_range=(-1, 1), c=0+0j, julia_set=False, backend=None):
587 | 
588 |     import array
589 | 
590 |     def meshgrid_core(n, f, low, high, horizontal):
591 |         assert low < high, f"{low} < {high}"
592 | 
593 |         f = f.copy()
594 |         mem_view = _get_array(f, plane=0, read=False)
595 |         height, width = mem_view.shape
596 | 
597 |         if horizontal:
598 |             data = array.array('f', (((high - low) * j / (width - 1) + low) for j in range(width)))
599 | 
600 |             for i in range(height):
601 |                 if _is_api4:
602 |                     for j in range(width):
603 |                         mem_view[i, j] = data[j]
604 |                 else:
605 |                     mem_view[i, :] = data
606 |         else:
607 |             for i in range(height):
608 |                 if _is_api4:
609 |                     for j in range(width):
610 |                         mem_view[i, j] = (low - high) * i / (height - 1) + high
611 |                 else:
612 |                     mem_view[i, :] = array.array('f', [(low - high) * i / (height - 1) + high]) * width
613 | 
614 |         return f
615 | 
616 |     c = complex(c)
617 | 
618 |     ones = core.std.BlankClip(format=vs.GRAYS, width=width, height=height, length=1, color=1)
619 | 
620 |     if hasattr(core, "akarin"):
621 |         features = core.akarin.Version()["expr_features"]
622 | 
623 |         if b"X" in features and b"width" in features:
624 |             z_real = core.akarin.Expr([ones], f"{real_range[1] - real_range[0]} X * width 1 - / {real_range[0]} +")
625 |         else:
626 |             z_real = core.std.ModifyFrame(
627 |                 ones, ones,
628 |                 functools.partial(meshgrid_core, horizontal=True, low=real_range[0], high=real_range[1]))
629 | 
630 |         if b"Y" in features and b"height" in features:
631 |             z_imag = core.akarin.Expr([ones], f"{imag_range[0] - imag_range[1]} Y * height 1 - / {imag_range[1]} +")
632 |         else:
633 |             z_imag = core.std.ModifyFrame(
634 |                 ones, ones,
635 |                 functools.partial(meshgrid_core, horizontal=False, low=imag_range[0], high=imag_range[1]))
636 | 
637 |     if julia_set:
638 |         inner = (
639 |             f"dup2 dup2 * dup0 + {c.imag} + " # new z_imag
640 |             f"dup3 dup0 * dup3 dup0 * - {c.real} + " # new z_real
641 |             "dup1 dup0 * dup1 dup0 * + 4 < " # mask
642 |             "swap1 dup1 swap6 ? " # update z_real
643 |             "swap4 swap1 dup1 swap4 ? " # update z_imag
644 |             f"swap2 swap1 dup0 {1/iterations} - swap1 ? " # update counter
645 |         )
646 | 
647 |         expr = f"x y z {inner * iterations} 1 swap2 ? 1 swap2 ?"
648 | 
649 |     else:
650 |         inner = (
651 |             "dup2 dup2 * dup0 + y + " # new z_imag
652 |             "dup3 dup0 * dup3 dup0 * - x + " # new z_real
653 |             "dup1 dup0 * dup1 dup0 * + 4 < " # mask
654 |             "swap1 dup1 swap6 ? " # update z_real
655 |             "swap4 swap1 dup1 swap4 ? " # update z_imag
656 |             f"swap2 swap1 dup0 {1/iterations} - swap1 ? " # update counter
657 |         )
658 | 
659 |         expr = f"{c.real} {c.imag} z {inner * iterations} 1 swap2 ? 1 swap2 ?"
660 | 
661 |     if backend is None:
662 |         if hasattr(core, "akarin"):
663 |             return core.akarin.Expr([z_real, z_imag, ones], expr)
664 |         else:
665 |             return core.std.Expr([z_real, z_imag, ones], expr)
666 |     else:
667 |         return backend([z_real, z_imag, ones], expr)
668 | 
669 | 


--------------------------------------------------------------------------------
/Collections/net_interp.py:
--------------------------------------------------------------------------------
 1 | # modified from https://github.com/xinntao/ESRGAN/blob/50fbd2de1d80a014e1c0e1c165913a128f5a8384/net_interp.py
 2 | 
 3 | import argparse
 4 | import caffe_pb2
 5 | import numpy as np
 6 | 
 7 | 
 8 | r"""
 9 | Network interpolator for waifu2x-caffe
10 | 
11 | It applies linear interpolation in the parameter space of two waifu2x-caffe models of the same architecture, 
12 | which allows continuous imagery effect transition, e.g. adjusting the denoising strength.
13 | 
14 | caffe_pb2 is required, see the "protobuf" part of 
15 | https://mxnet.incubator.apache.org/versions/master/faq/caffe.html#how-to-build
16 | 
17 | usage:
18 |     network_interp.py -m1 ".\upconv_7_anime_style_art_rgb\scale2.0x_model.json.caffemodel" -m2 ".\upconv_7_anime_style_art_rgb\noise0_scale2.0x_model.json.caffemodel" --weight 0.5
19 | 
20 | ref: 
21 |     [1] X. Wang, K. Yu, C. Dong, et al. Deep Network Interpolation for Continuous Imagery Effect Transition. CVPR 2019.
22 |     [2] https://github.com/xinntao/DNI
23 | """
24 | 
25 | # parsing parameters
26 | parser = argparse.ArgumentParser(description="Network interpolator for waifu2x-caffe")
27 | parser.add_argument("-m1", "--model_1", type=str, required=True, help="the first model to interpolate (*.caffemodel)")
28 | parser.add_argument("-m2", "--model_2", type=str, required=True, help="the second model to interpolate (*.caffemodel)")
29 | parser.add_argument("-w", "--weight", type=float, required=True, help="weight used for interpolation [0-1]")
30 | parser.add_argument("-o", "--output", type=str, default="interpolated.caffemodel", help="model output file name")
31 | parser.add_argument("-v", "--verbose", type=bool, default=False, help="increase output verbosity")
32 | 
33 | args = parser.parse_args()
34 | 
35 | model_1_filename = args.model_1
36 | model_2_filename = args.model_2
37 | weight = args.weight
38 | output_filename = args.output
39 | verbose = args.verbose
40 | 
41 | # process
42 | print(f"Loading {model_1_filename}\n")
43 | proto_1 = caffe_pb2.NetParameter()
44 | with open(model_1_filename, "rb") as f:
45 |     proto_1.ParseFromString(f.read())
46 | 
47 | print(f"Loading {model_2_filename}\n")
48 | proto_2 = caffe_pb2.NetParameter()
49 | with open(model_2_filename, "rb") as f:
50 |     proto_2.ParseFromString(f.read())
51 | 
52 | 
53 | print(f"Start interpolation with weight={weight}:")
54 | for idx, layer in enumerate(proto_2.layer):
55 |     if len(layer.blobs) > 0:
56 |         for i in range(len(layer.blobs)):
57 |             tmp_1_data = np.asarray(proto_1.layer[idx].blobs[i].data)
58 |             tmp_2_data = np.asarray(proto_2.layer[idx].blobs[i].data)
59 | 
60 |             assert tmp_1_data.shape == tmp_2_data.shape
61 | 
62 |             if verbose:
63 |                 print(f'Interpolating layer "{layer.name}": {layer.type}, size={tmp_1_data.shape}')
64 | 
65 |             proto_1.layer[idx].blobs[i].data[:] = (1.0 - weight) * tmp_1_data + weight * tmp_2_data
66 |     else:
67 |         if verbose:
68 |             print(f'Skipping layer "{layer.name}": {layer.type}')
69 | 
70 | print(f"\nSaving interpolated model to {output_filename}")
71 | 
72 | with open(output_filename, "wb") as f:
73 |     f.write(proto_1.SerializeToString())
74 | 


--------------------------------------------------------------------------------
/Collections/resize.py:
--------------------------------------------------------------------------------
 1 | def resize(clip, w=None, h=None, sx=0, sy=0, sw=None, sh=None, kernel="spline36", a1=None, a2=None, mpeg2_cplace=True):
 2 |     """Experimental wrapper function for vszimg resizer in a fmtconv-like API"""
 3 | 
 4 |     assert core.version_number() >= 44
 5 | 
 6 |     def _expand(shift, num_planes):
 7 |         if isinstance(shift, (int, float)):
 8 |             return [shift for _ in range(num_planes)]
 9 |         elif len(shift) > 0:
10 |             _shift = list(shift)
11 |             while len(_shift) < num_planes:
12 |                 _shift.append(_shift[-1])
13 |             return _shift
14 | 
15 |     if w is None:
16 |         w = clip.width
17 |     if h is None:
18 |         h = clip.height
19 |     if sw is None:
20 |         sw = clip.width
21 |     if sh is None:
22 |         sh = clip.height
23 | 
24 |     kernel = kernel.capitalize()
25 |     if kernel == "Bicubic":
26 |         a1, a2 = 0, 0.5
27 |     elif kernel == "Lanczos":
28 |         a1 = 3
29 | 
30 |     num_planes = clip.format.num_planes
31 |     sx = _expand(sx, num_planes)
32 |     sy = _expand(sy, num_planes)
33 |     sw = _expand(sw, num_planes)
34 |     sh = _expand(sh, num_planes)
35 | 
36 |     if num_planes == 1:
37 |         res = eval(f"core.resize.{kernel}")(clip, w, h, src_left=sx[0], src_top=sy[0], src_width=sw[0], src_height=sh[0], filter_param_a=a1, filter_param_b=a2)
38 |     else:
39 |         # copied from nnedi3_resample.py
40 |         hSubS = 1 << clip.format.subsampling_w
41 |         hCPlace = 0.5 - hSubS / 2 if mpeg2_cplace else 0
42 |         hScale = w / clip.width
43 | 
44 |         vSubS = 1 << clip.format.subsampling_h
45 |         vCPlace = 0
46 |         vScale = h / clip.height
47 | 
48 |         planes = [core.std.ShufflePlanes(clip, i, vs.GRAY) for i in range(num_planes)]
49 |         for i in range(num_planes):
50 |             if i == 0:
51 |                 planes[i] = eval(f"core.resize.{kernel}")(planes[0], w, h, src_left=sx[0], src_top=sy[0], src_width=sw[0], src_height=sh[0], filter_param_a=a1, filter_param_b=a2)
52 |             else:
53 |                 planes[i] = eval(f"core.resize.{kernel}")(planes[i], w // (1 << clip.format.subsampling_w), h // (1 << clip.format.subsampling_h), src_left=((sx[i]-hCPlace) * hScale + hCPlace) / hScale / hSubS, src_top=((sy[i]-vCPlace) * vScale + vCPlace) / vScale / vSubS, src_width=sw[i] // (1 << clip.format.subsampling_w), src_height=sh[i] // (1 << clip.format.subsampling_h), filter_param_a=a1, filter_param_b=a2)
54 |         res = core.std.ShufflePlanes(planes, [0] * num_planes, clip.format.color_family)
55 | 
56 |     return res
57 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # muvsfunc
 2 | Muonium's VapourSynth functions
 3 | 
 4 | ## Dependencies
 5 | [VapourSynth](https://github.com/vapoursynth/vapoursynth) R39-R57
 6 | 
 7 | ### Scripts
 8 | - [mvsfunc](https://github.com/HomeOfVapourSynthEvolution/mvsfunc)
 9 | 
10 | - [nnedi3_resample](https://github.com/HomeOfVapourSynthEvolution/nnedi3_resample)
11 | 
12 | and the dependencies of them.
13 | 
14 | ### Plugins
15 | - [AWarpSharp2](https://github.com/dubhater/vapoursynth-awarpsharp2)
16 | 
17 | - [Bilateral](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-Bilateral)
18 | 
19 | - [CAS](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-CAS)
20 | 
21 | - [CTMF](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-CTMF)
22 | 
23 | - [descale](https://github.com/Irrational-Encoding-Wizardry/descale)
24 | 
25 | - [DFTTest](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-DFTTest)
26 | 
27 | - [EEDI2](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-EEDI2)
28 | 
29 | - [fmtconv](https://github.com/EleonoreMizo/fmtconv)
30 | 
31 | - [misc](https://github.com/vapoursynth/vs-miscfilters-obsolete) (required by VS R55 and later)
32 | 
33 | - [MVTools](https://github.com/dubhater/vapoursynth-mvtools)
34 | 
35 | - [nnedi3](https://github.com/dubhater/vapoursynth-nnedi3)
36 | 
37 | - [RemoveGrain](https://github.com/vapoursynth/vs-removegrain) (required by VS R55 and later)
38 | 
39 | - [SangNom](https://bitbucket.org/James1201/vapoursynth-sangnom)
40 | 
41 | - [TCanny](https://github.com/HomeOfVapourSynthEvolution/VapourSynth-TCanny)
42 | 
43 | - [TemporalMedian](https://github.com/dubhater/vapoursynth-temporalmedian)
44 | 
45 | - [VSFilter](https://github.com/HomeOfVapourSynthEvolution/VSFilter) (only required by `TextSub16()`)
46 | 
47 | - [VSFilterMod](https://github.com/sorayuki/VSFilterMod) (only required by `TextSub16()`)
48 | 
49 | - [vs_mxnet](https://github.com/kice/vs_mxnet) (only required by `super_resolution()`)
50 | 
51 | ### Python Packages
52 | - [matplotlib](https://github.com/matplotlib/matplotlib) (only required by `getnative()`)
53 | 
54 | - [MXNet](https://github.com/apache/incubator-mxnet) (only required by `super_resolution()`)
55 | 
56 | ### Optional dependencies
57 | - [Akarin's Expr](https://github.com/AkarinVS/vapoursynth-plugin) (performance optimizations)
58 | 
59 | 
60 | ## Files
61 | `muvsfunc.py` is the main script. It contains some algorithms like `GradFun3`, `GuidedFilter`, `TextSub16`, some helper functions like `MergeChroma`, and some ideas that I develop like `LDMerge`, `AnimeMask`.
62 | 
63 | `muvsfunc_misc.py` is a complement of the previous script, containing some out-dated algorithms like `SSR`(Single-scale Retinex), some helper functions like `gauss`, `band_merge`, and also one of my idea named `detail_enhancement`. It may or may not be merged to the main script some day.
64 | 
65 | `muvsfunc_numpy.py` contains algorithms that are processed in `numpy.ndarray` rather than C/C++. Due to the low performance, they are mainly for research. Here is my current interest.
66 | 
67 | `LUM.py` and `SuperRes.py`(it's not the SuperRes in madVR or MPDN) are the dross of history. You won't need to use them.
68 | 
69 | ## Resources
70 | 
71 | #### **_[OpenCV for VapourSynth](https://github.com/WolframRhodium/muvsfunc/wiki/OpenCV-Python-for-VapourSynth)_**
72 | 
73 | #### [muvs tutorial](https://github.com/WolframRhodium/muvsfunc/wiki/muvs-tutorial)
74 | 


--------------------------------------------------------------------------------
/muvs.py:
--------------------------------------------------------------------------------
   1 | """
   2 | An interface to VapourSynth
   3 | 
   4 | *** DO NOT PUBLISH MODULES THAT DEPEND ON THIS ***
   5 | 
   6 | objects:
   7 |     core (resembles vapoursynth.core)
   8 | 
   9 | functions:
  10 |     pollute (poisons for foreign modules)
  11 |     expr (switch for arithmetic expression)
  12 |     Expr (resembles core.std.Expr(), but with infix expression)
  13 |     record (computational graph recorder, resembles open())
  14 |     Recorder (base class for recorder)
  15 | 
  16 | functions for arithmetic expression:
  17 |     Abs, Exp, Not, And, Or, Xor, Log, 
  18 |     Sqrt, Min, Max, Conditional
  19 | 
  20 | """
  21 | 
  22 | from abc import ABC, abstractmethod, abstractstaticmethod
  23 | from collections import OrderedDict
  24 | import collections.abc
  25 | from contextlib import contextmanager
  26 | import functools
  27 | import inspect
  28 | import itertools
  29 | import math
  30 | import numbers
  31 | import operator as op
  32 | from typing import Callable, Dict, List, MutableMapping, MutableSet
  33 | from typing import Optional, Sequence, Union
  34 | import weakref
  35 | 
  36 | import vapoursynth as vs
  37 | from vapoursynth import core as _vscore
  38 | 
  39 | 
  40 | __all__ = [
  41 |     "core", "expr", "pollute", "Expr", "record", "Recorder", 
  42 |     "Abs", "Exp", "Not", "And", "Or", "Xor", "Log", "Sqrt", 
  43 |     "Min", "Max", "Conditional"]
  44 | 
  45 | 
  46 | _is_api4: bool = hasattr(vs, "__api_version__") and vs.__api_version__.api_major == 4
  47 | 
  48 | class _Core:
  49 |     def __init__(self):
  50 |         self._registered_funcs = {} # type: Dict[str, Callable[..., '_VideoNode']]
  51 | 
  52 |     def __setattr__(self, name, value):
  53 |         if name in ["num_threads", "max_cache_size"]:
  54 |             setattr(_vscore, name, value)
  55 |         else:
  56 |             if callable(value):
  57 |                 if name[0].isupper() and not hasattr(_vscore, name):
  58 |                     self._registered_funcs[name] = value
  59 |                 else:
  60 |                     raise AttributeError("Attribute name should be capitalized")
  61 |             else:
  62 |                 vars(self)[name] = value
  63 | 
  64 |     def __getattr__(self, name):
  65 |         try:
  66 |             attr = getattr(_vscore, name)
  67 |         except AttributeError as e:
  68 |             if name in self._registered_funcs:
  69 |                 return self._registered_funcs[name]
  70 |             else:
  71 |                 raise e
  72 |         else:
  73 |             if isinstance(attr, vs.Plugin):
  74 |                 return _Plugin(attr)
  75 |             else:
  76 |                 return attr
  77 | 
  78 |     def __dir__(self) -> List[str]:
  79 |         return dir(_vscore) + sorted(list(self._registered_funcs.keys()))
  80 | 
  81 |     def register_functions(self, **kwargs: Dict[str, Callable[..., '_VideoNode']]):
  82 |         if all((name[0].isupper() and not hasattr(_vscore, name)) 
  83 |                for name in kwargs.keys()):
  84 | 
  85 |             self._registered_funcs.update(kwargs)
  86 |         else:
  87 |             raise ValueError("Registration error.")
  88 | 
  89 | core = _Core()
  90 | 
  91 | 
  92 | arithmetic_expr : bool = False
  93 | 
  94 | @contextmanager
  95 | def expr():
  96 |     global arithmetic_expr
  97 |     prev_expr = arithmetic_expr
  98 | 
  99 |     arithmetic_expr = True
 100 | 
 101 |     try:
 102 |         yield None
 103 |     finally:
 104 |         arithmetic_expr = prev_expr
 105 | 
 106 | 
 107 | class Recorder:
 108 |     _live_recorders : MutableSet["Recorder"] = weakref.WeakSet()
 109 | 
 110 |     def __init__(self):
 111 |         self.buffer : List[str] = []
 112 |         self.is_recording : bool = False
 113 |         Recorder._live_recorders.add(self)
 114 | 
 115 |     def start_recording(self, include_header=False):
 116 |         self.is_recording = True
 117 | 
 118 |         if include_header:
 119 |             self.buffer.append(
 120 |                 "import vapoursynth as vs\n"
 121 |                 "from vapoursynth import core\n"
 122 |                 "\n"
 123 |                 f"core.num_threads = {core.num_threads}\n"
 124 |                 f"core.max_cache_size = {core.max_cache_size}\n"
 125 |                 "\n")
 126 | 
 127 |     def end_recording(self, filename_or_stream, mode='a', **open_kwargs):
 128 |         self.is_recording = False
 129 | 
 130 |         if self.buffer:
 131 |             if isinstance(filename_or_stream, str):
 132 |                 with open(filename_or_stream, mode=mode, **open_kwargs) as f:
 133 |                     f.writelines(self.buffer)
 134 |             else:
 135 |                 stream = filename_or_stream
 136 |                 stream.writelines(self.buffer)
 137 | 
 138 |             self.buffer.clear()
 139 | 
 140 |     def write(self, text):
 141 |         assert isinstance(text, str)
 142 |         self.buffer.append(text)
 143 | 
 144 | 
 145 | @contextmanager
 146 | def record(filename_or_stream, mode='a', include_header=False, **open_kwargs):
 147 |     recorder = Recorder()
 148 | 
 149 |     recorder.start_recording(include_header)
 150 | 
 151 |     try:
 152 |         yield recorder
 153 |     finally:
 154 |         recorder.end_recording(filename_or_stream=filename_or_stream, mode=mode, **open_kwargs)
 155 | 
 156 | 
 157 | def _build_repr() -> Callable[..., str]:
 158 |     _clip_name_mapping = weakref.WeakKeyDictionary() # type: MutableMapping[vs.VideoNode, str]
 159 |     counter = 0
 160 | 
 161 |     def closure(obj, default_prefix="unknown") -> str:
 162 |         if isinstance(obj, vs.VideoNode):
 163 |             if obj in _clip_name_mapping:
 164 |                 return _clip_name_mapping[obj]
 165 | 
 166 |             else:
 167 |                 nonlocal counter
 168 |                 name = f"{default_prefix}{counter}"
 169 |                 _clip_name_mapping[obj] = name
 170 |                 counter += 1
 171 |                 return name
 172 | 
 173 |         elif isinstance(obj, _VideoNode):
 174 |             return closure(obj._node, default_prefix)
 175 | 
 176 |         elif isinstance(obj, collections.abc.Sequence) and not isinstance(obj, (str, bytes, bytearray)):
 177 |             return f"[{', '.join(closure(elem, default_prefix) for elem in obj)}]"
 178 | 
 179 |         elif isinstance(obj, (
 180 |             vs.ColorFamily, vs.SampleType,
 181 |             getattr(vs, "PresetFormat", getattr(vs, "PresetVideoFormat", None))
 182 |         )):
 183 |             return f"vs.{obj.name}"
 184 | 
 185 |         elif isinstance(obj, (vs.VideoFormat if _is_api4 else vs.Format)):
 186 |             arg_str = ', '.join(f"{k}={closure(v)}" for k, v in obj._as_dict().items())
 187 |             return f"core.query_video_format({arg_str})" if _is_api4 else f"core.register_format({arg_str})"
 188 | 
 189 |         else:
 190 |             return repr(obj)
 191 | 
 192 |     return closure
 193 | 
 194 | _repr = _build_repr()
 195 | 
 196 | 
 197 | class _Plugin:
 198 |     def __init__(self, plugin: vs.Plugin, injected_clip: Optional[vs.VideoNode] = None):
 199 |         if isinstance(plugin, vs.Plugin):
 200 |             self._plugin = plugin
 201 |         else:
 202 |             raise TypeError(f"{type(self).__name__!r}: Unknown plugin ({type(plugin)})")
 203 | 
 204 |         if injected_clip is None or isinstance(injected_clip, vs.VideoNode):
 205 |             self._injected_clip = injected_clip
 206 |         else:
 207 |             raise TypeError(f"{type(self).__name__!r}: Unknown injected clip ({type(injected_clip)})")
 208 | 
 209 |     def __getattr__(self, function_name):
 210 |         attr = getattr(self._plugin, function_name)
 211 | 
 212 |         if isinstance(attr, vs.Function):
 213 |             func = attr
 214 | 
 215 |             @functools.wraps(func)
 216 |             def closure(*args, **kwargs):
 217 |                 if self._injected_clip is not None:
 218 |                     args = (self._injected_clip, ) + args
 219 | 
 220 |                 def get_node(obj):
 221 |                     if isinstance(obj, vs.VideoNode):
 222 |                         return obj
 223 |                     elif isinstance(obj, _VideoNode):
 224 |                         return obj._node
 225 |                     elif isinstance(obj, _ArithmeticExpr):
 226 |                         return obj.compute()._node
 227 |                     elif (
 228 |                         isinstance(obj, collections.abc.Sequence) and 
 229 |                         not isinstance(obj, (str, bytes, bytearray))
 230 |                     ):
 231 |                         return type(obj)(get_node(item) for item in obj)
 232 |                     elif callable(obj):
 233 |                         class _remove_wrap:
 234 |                             """Fixes callables that returns VideoNode"""
 235 |                             def __init__(self, func):
 236 |                                 self.func = func
 237 | 
 238 |                             def __call__(self, *args, **kwargs):
 239 |                                 output = self.func(*args, **kwargs)
 240 |                                 if isinstance(output, _VideoNode):
 241 |                                     output = output._node
 242 |                                 return output
 243 | 
 244 |                             def __repr__(self):
 245 |                                 return repr(self.func)
 246 | 
 247 |                         return _remove_wrap(obj)
 248 |                     else:
 249 |                         return obj
 250 | 
 251 |                 def get_key(key):
 252 |                     if key.startswith('_'):
 253 |                         return key[1:]
 254 |                     else:
 255 |                         return key
 256 | 
 257 |                 args = get_node(args)
 258 |                 kwargs = dict((get_key(key), get_node(value)) for key, value in kwargs.items())
 259 | 
 260 |                 func_arg_names = (
 261 |                     key[:key.find(':')] 
 262 |                     for key in func.signature.split(';') 
 263 |                     if key != '')
 264 | 
 265 |                 for _, arg_name in zip(args, func_arg_names):
 266 |                     if arg_name in kwargs:
 267 |                         raise TypeError(
 268 |                             f"{func.plugin.namespace}.{func.name}() "
 269 |                             f"got multiple values for argument \'{arg_name}\'")
 270 | 
 271 |                 # process
 272 |                 output = func(*args, **kwargs)
 273 | 
 274 |                 if isinstance(output, vs.VideoNode):
 275 |                     _ = _repr(output, default_prefix="clip") # register output
 276 | 
 277 |                     for recorder in Recorder._live_recorders:
 278 |                         if recorder.is_recording:
 279 |                             recorder.buffer.append(self._get_str(func, args, kwargs, output) + '\n')
 280 | 
 281 |                     return _VideoNode(output)
 282 |                 elif isinstance(output, list) and len(output) > 0 and isinstance(output[0], vs.VideoNode):
 283 |                     for item in output:
 284 |                         _ = _repr(item, default_prefix="clip") # register output
 285 |                     
 286 |                     for recorder in Recorder._live_recorders:
 287 |                         if recorder.is_recording:
 288 |                             recorder.buffer.append(self._get_str(func, args, kwargs, output, check_output=False) + '\n')
 289 | 
 290 |                     return list(_VideoNode(item) for item in output)
 291 |                 else:
 292 |                     return output
 293 | 
 294 |             return closure
 295 | 
 296 |         else:
 297 |             return attr
 298 | 
 299 |     def __hash__(self):
 300 |         return hash(self._plugin)
 301 | 
 302 |     def __dir__(self):
 303 |         return dir(self._plugin)
 304 | 
 305 |     @staticmethod
 306 |     def _get_str(func: vs.Function, args, kwargs, output, check_output=True):
 307 |         output_str = ""
 308 | 
 309 |         if check_output:
 310 |             def diff_str(clip1: vs.VideoNode, clip2: vs.VideoNode):
 311 |                 """Compare two clips and output a string of their difference"""
 312 |                 res = []
 313 |                 for attr in ["width", "height", "num_frames"]:
 314 |                     if getattr(clip1, attr) != getattr(clip2, attr):
 315 |                         res.append(f"{attr}: {getattr(clip1, attr)} -> {getattr(clip2, attr)}")
 316 |                 if clip1.format.name != clip2.format.name:
 317 |                     res.append(f"format: {clip1.format.name} -> {clip2.format.name}")
 318 |                 if clip1.fps != clip2.fps:
 319 |                     res.append(f"fps: {clip1.fps_num}/{clip1.fps_den} -> {clip2.fps_num}/{clip2.fps_den}")
 320 |                 return ', '.join(res)
 321 | 
 322 |             if len(args) > 0 and isinstance(args[0], vs.VideoNode):
 323 |                 if diff_str(args[0], output) != "":
 324 |                     output_str += f"# {diff_str(args[0], output)}\n"
 325 |             elif kwargs.get("clip", None):
 326 |                 if diff_str(kwargs["clip"], output) != "":
 327 |                     output_str += f"# {diff_str(kwargs['clip'], output)}\n"
 328 |             else:
 329 |                 output_str += (f"# output: {output.width} x {output.height}, {output.format.name}, "
 330 |                             f"{output.num_frames} frames, {output.fps_num}/{output.fps_den} fps\n")
 331 | 
 332 |         args_dict = inspect.signature(func).bind(*args, **kwargs).arguments
 333 | 
 334 |         # replace clip in args_dict.values() with name of clip
 335 |         call_args = ', '.join(f"{k}={_repr(v)}" for k, v in args_dict.items() if v is not None)
 336 |         call_str = f"core.{func.plugin.namespace}.{func.name}({call_args})"
 337 | 
 338 |         output_str += f"{_repr(output, default_prefix='clip')} = {call_str}\n"
 339 | 
 340 |         return output_str
 341 | 
 342 | 
 343 | ########################## Expr IR Start ##########################
 344 | class ExprIR(ABC):
 345 |     """ AST-style expression """
 346 | 
 347 |     @abstractmethod
 348 |     def __eq__(self, other):
 349 |         pass
 350 | 
 351 |     @abstractmethod
 352 |     def __repr__(self):
 353 |         """ Infix and function call style """
 354 |         pass
 355 | 
 356 |     @abstractmethod
 357 |     def __str__(self):
 358 |         """ Postfix style """
 359 |         pass
 360 | 
 361 | class DupN(ExprIR):
 362 |     def __eq__(self, other):
 363 |         return isinstance(other, DupN)
 364 |         
 365 |     def __repr__(self):
 366 |         return "DupN()"
 367 | 
 368 |     def __str__(self):
 369 |         return "dup"
 370 | dup = DupN()
 371 | 
 372 | class UnaryBaseOp(ExprIR):
 373 |     @abstractstaticmethod
 374 |     def cast(x):
 375 |         pass
 376 | 
 377 |     def __init__(self, x):
 378 |         self.x = self.cast(x)
 379 | 
 380 |     def __eq__(self, other):
 381 |         return isinstance(other, type(self)) and self.x == other.x
 382 | 
 383 |     def __repr__(self):
 384 |         return f"{type(self).__name__}({self.x!r})"
 385 | 
 386 |     def __str__(self):
 387 |         return f"{self.x!s} {type(self).__name__.lower()}"
 388 | 
 389 | class ConstantN(UnaryBaseOp):
 390 |     def __str__(self):
 391 |         return f"{self.x!s}"
 392 | 
 393 |     @staticmethod
 394 |     def cast(x):
 395 |         assert isinstance(x, numbers.Real)
 396 |         return x
 397 | ConstantN_0 = ConstantN(0)
 398 | ConstantN_1 = ConstantN(1)
 399 | 
 400 | class VarN(UnaryBaseOp):
 401 |     def __eq__(self, other):
 402 |         return isinstance(other, VarN) and hash(self.x) == hash(other.x)
 403 | 
 404 |     def __str__(self):
 405 |         return f"{self.x!s}"
 406 | 
 407 |     @staticmethod
 408 |     def cast(x):
 409 |         assert isinstance(x, _VideoNode)
 410 |         return x
 411 | 
 412 | def Cast(x):
 413 |     if isinstance(x, ExprIR):
 414 |         return x
 415 |     elif isinstance(x, numbers.Real):
 416 |         return ConstantN(x)
 417 |     elif isinstance(x, _VideoNode):
 418 |         return VarN(x)
 419 |     elif isinstance(x, vs.VideoNode):
 420 |         return VarN(_VideoNode(x))
 421 |     else:
 422 |         raise TypeError(f"Unkonwn input ({type(x)})")
 423 | 
 424 | class UnaryOp(UnaryBaseOp):
 425 |     @abstractstaticmethod
 426 |     def compute(x):
 427 |         pass
 428 | 
 429 |     def __str__(self):
 430 |         return f"{self.x!s} {self.op_name}"
 431 | 
 432 |     @staticmethod
 433 |     def cast(x):
 434 |         return Cast(x)
 435 | 
 436 | class NotN(UnaryOp):
 437 |     op_name = "not"
 438 |     compute = op.not_
 439 | 
 440 | class AbsN(UnaryOp):
 441 |     op_name = "abs"
 442 |     compute = abs
 443 | 
 444 | class SqrtN(UnaryOp):
 445 |     op_name = "sqrt"
 446 |     compute = math.sqrt
 447 | 
 448 | class LogN(UnaryOp):
 449 |     op_name = "log"
 450 |     compute = math.log
 451 | 
 452 | class ExpN(UnaryOp):
 453 |     op_name = "exp"
 454 |     compute = math.exp
 455 | 
 456 | class BinaryOp(ExprIR):
 457 |     @abstractstaticmethod
 458 |     def compute(x, y):
 459 |         pass
 460 | 
 461 |     def __init__(self, x, y):
 462 |         self.x, self.y = self.cast(x, y)
 463 | 
 464 |     def __eq__(self, other):
 465 |         return (
 466 |             isinstance(other, type(self)) and 
 467 |             self.x == other.x and 
 468 |             self.y == other.y
 469 |         )
 470 | 
 471 |     def __repr__(self):
 472 |         return f"{type(self).__name__}({self.x!r}, {self.y!r})"
 473 | 
 474 |     def __str__(self):
 475 |         return f"{self.x!s} {self.y!s} {self.op_name}"
 476 | 
 477 |     @staticmethod
 478 |     def cast(x, y):
 479 |         return Cast(x), Cast(y)
 480 | 
 481 | class AddN(BinaryOp):
 482 |     op_name = "+"
 483 |     compute = op.add
 484 | 
 485 | class SubN(BinaryOp):
 486 |     op_name = "-"
 487 |     compute = op.sub
 488 | 
 489 | class MulN(BinaryOp):
 490 |     op_name = "*"
 491 |     compute = op.mul
 492 | 
 493 | class DivN(BinaryOp):
 494 |     op_name = "/"
 495 |     compute = op.truediv
 496 | 
 497 | class PowN(BinaryOp):
 498 |     op_name = "pow"
 499 |     compute = op.pow
 500 | 
 501 | class AndN(BinaryOp):
 502 |     op_name = "and"
 503 |     compute = op.and_
 504 | 
 505 | class OrN(BinaryOp):
 506 |     op_name = "or"
 507 |     compute = op.or_
 508 | 
 509 | class XorN(BinaryOp):
 510 |     op_name = "xor"
 511 |     compute = op.xor
 512 | 
 513 | class LtN(BinaryOp):
 514 |     op_name = "<"
 515 |     compute = op.lt
 516 | 
 517 | class LeN(BinaryOp):
 518 |     op_name = "<="
 519 |     compute = op.le
 520 | 
 521 | class EqN(BinaryOp):
 522 |     op_name = "="
 523 |     compute = op.eq
 524 | 
 525 | class NeN(BinaryOp):
 526 |     op_name = "= not"
 527 |     compute = op.ne
 528 | 
 529 | class GeN(BinaryOp):
 530 |     op_name = ">="
 531 |     compute = op.ge
 532 | 
 533 | class GtN(BinaryOp):
 534 |     op_name = ">"
 535 |     compute = op.gt
 536 | 
 537 | class MaxN(BinaryOp):
 538 |     op_name = "max"
 539 |     compute = max
 540 | 
 541 | class MinN(BinaryOp):
 542 |     op_name = "min"
 543 |     compute = min
 544 | 
 545 | class ConditionalN(ExprIR):
 546 |     def __init__(self, x, y, z):
 547 |         self.x, self.y, self.z = self.cast(x, y, z)
 548 | 
 549 |     def __eq__(self, other):
 550 |         return (
 551 |             isinstance(other, ConditionalN) and 
 552 |             self.x == other.x and 
 553 |             self.y == other.y and 
 554 |             self.z == other.z
 555 |         )
 556 | 
 557 |     def __repr__(self):
 558 |         return f"ConditionalN({self.x!r}, {self.y!r}, {self.z!r})"
 559 | 
 560 |     def __str__(self):
 561 |         return f"{self.x!s} {self.y!s} {self.z!s} ?"
 562 | 
 563 |     @staticmethod
 564 |     def cast(x, y, z):
 565 |         return Cast(x), Cast(y), Cast(z)
 566 | 
 567 |     @staticmethod
 568 |     def compute(x, y, z):
 569 |         return y if x else z
 570 | 
 571 | def _simplify(expr: ExprIR) -> ExprIR:
 572 |     assert isinstance(expr, ExprIR)
 573 | 
 574 |     while True:
 575 |         prev_expr = expr
 576 | 
 577 |         # early skipping
 578 |         if isinstance(expr, (DupN, ConstantN, VarN)):
 579 |             return expr
 580 |         # constant foldings and universal eliminations
 581 |         elif isinstance(expr, UnaryOp) and isinstance(expr.x, ConstantN):
 582 |             # num op -> op(num)
 583 |             return ConstantN(expr.compute(expr.x.x))
 584 |         elif isinstance(expr, BinaryOp):
 585 |             if isinstance(expr.x, ConstantN):
 586 |                 if isinstance(expr.y, ConstantN):
 587 |                     # num1 num2 op -> op(num1, num2)
 588 |                     return ConstantN(expr.compute(expr.x.x, expr.y.x))
 589 |                 elif expr.y == DupN:
 590 |                     # num dup op -> op(num, num)
 591 |                     return ConstantN(expr.compute(expr.x.x, expr.x.x))
 592 |             elif expr.x == expr.y:
 593 |                 # x x op -> x dup op
 594 |                 expr = type(expr)(expr.x, dup)
 595 | 
 596 |         # operator specific simplifications
 597 |         if isinstance(expr, SqrtN):
 598 |             x = _simplify(expr.x)
 599 |             if isinstance(x, MulN) and isinstance(x.y, DupN):
 600 |                 # x dup * sqrt -> x abs
 601 |                 expr = AbsN(x.x)
 602 |             else:
 603 |                 expr = SqrtN(x)
 604 |         elif isinstance(expr, LogN):
 605 |             x = _simplify(expr.x)
 606 |             if isinstance(x, ExpN):
 607 |                 # x exp log -> x
 608 |                 expr = x.x
 609 |             else:
 610 |                 expr = LogN(x)
 611 |         elif isinstance(expr, ExpN):
 612 |             x = _simplify(expr.x)
 613 |             if isinstance(x, LogN):
 614 |                 # x log exp -> x
 615 |                 expr = x.x
 616 |             else:
 617 |                 expr = ExpN(x)
 618 |         elif isinstance(expr, AddN):
 619 |             if expr.x == ConstantN_0:
 620 |                 # 0 x + -> x
 621 |                 expr = expr.y
 622 |             elif expr.y == ConstantN_0:
 623 |                 # x 0 + -> x
 624 |                 expr = expr.x
 625 |         elif isinstance(expr, SubN):
 626 |             if isinstance(expr.y, DupN):
 627 |                 # x dup - -> 0
 628 |                 return ConstantN_0
 629 |             elif expr.y == ConstantN_0:
 630 |                 # x 0 - -> x
 631 |                 expr = expr.x
 632 |         elif isinstance(expr, MulN):
 633 |             if expr.x == ConstantN_1:
 634 |                 # 1 x * -> x
 635 |                 expr = expr.y
 636 |             elif expr.y == ConstantN_1:
 637 |                 # x 1 * -> x
 638 |                 expr = expr.x
 639 |         elif isinstance(expr, DivN):
 640 |             if isinstance(expr.y, DupN):
 641 |                 # x dup / -> 1
 642 |                 return ConstantN_1
 643 |             elif expr.y == ConstantN_1:
 644 |                 # x 1 / -> x
 645 |                 expr = expr.x
 646 |         elif isinstance(expr, PowN):
 647 |             if isinstance(expr.x, ConstantN):
 648 |                 if expr.x == ConstantN_0:
 649 |                     # 0 x pow -> 0
 650 |                     expr = ConstantN_0
 651 |                 elif expr.x == ConstantN_1:
 652 |                     # 1 x pow -> 1
 653 |                     expr = ConstantN_1
 654 |                 elif expr.x == ConstantN(math.e):
 655 |                     # math.e x pow -> x exp
 656 |                     expr = ExpN(expr.y)
 657 |             elif isinstance(expr.y, ConstantN):
 658 |                 if expr.y == ConstantN_0:
 659 |                     # x 0 pow -> 1
 660 |                     expr = ConstantN_1
 661 |                 elif expr.y == ConstantN_1:
 662 |                     # x 1 pow -> x
 663 |                     expr = expr.x
 664 |                 elif expr.y == ConstantN(2):
 665 |                     # x 2 pow -> x dup *
 666 |                     expr = MulN(expr.x, dup)
 667 |                 elif expr.y == ConstantN(0.5):
 668 |                     # x 0.5 pow -> x sqrt
 669 |                     expr = SqrtN(expr.x)
 670 |                 elif expr.y == ConstantN(-0.5):
 671 |                     # x -0.5 pow -> x dup sqrt /
 672 |                     expr = DivN(expr.x, SqrtN(dup))
 673 |         elif isinstance(expr, (MaxN, MinN)) and isinstance(expr.y, DupN):
 674 |             # x dup {max/min} -> x
 675 |             expr = expr.x
 676 |         elif isinstance(expr, ConditionalN):
 677 |             if isinstance(expr.x, ConstantN):
 678 |                 # num x y ? -> (num ? x : y)
 679 |                 expr = ConditionalN.compute(expr.x, expr.y, expr.z)
 680 |             elif expr.y == expr.z:
 681 |                 # _ x x ? -> x
 682 |                 expr = expr.y
 683 | 
 684 |         # non-local simplification of binary operations
 685 |         if isinstance(expr, BinaryOp):
 686 |             expr = type(expr)(_simplify(expr.x), _simplify(expr.y))
 687 | 
 688 |             if isinstance(expr.x, ConstantN):
 689 |                 if isinstance(expr.y, UnaryOp):
 690 |                     if isinstance(expr.y.x, DupN):
 691 |                         # num dup op1 op2 -> num num op1 op2
 692 |                         expr = type(expr)(expr.x, type(expr.y)(expr.x))
 693 |                 elif isinstance(expr.y, BinaryOp):
 694 |                     if isinstance(expr.y.x, DupN):
 695 |                         # num dup x op1 op2 -> num num1 x op1 op2
 696 |                         expr = type(expr)(expr.x, type(expr.y)(expr.x, expr.y.y))
 697 |             elif isinstance(expr.y, BinaryOp) and expr.x == expr.y.x:
 698 |                 # x x y op1 op2 -> x dup y op1 op2
 699 |                 expr = type(expr)(expr.x, type(expr.y)(dup, expr.y.y))
 700 | 
 701 |         if expr == prev_expr:
 702 |             # no progress
 703 |             return expr
 704 |         else:
 705 |             prev_expr = expr
 706 |             # continue
 707 | 
 708 | def postfix(expr: ExprIR, namer: Optional[Callable[[VarN], str]] = None) -> str:
 709 |     assert isinstance(expr, ExprIR)
 710 | 
 711 |     if isinstance(expr, ConstantN):
 712 |         return str(expr)
 713 |     elif isinstance(expr, VarN):
 714 |         if namer is None:
 715 |             return str(expr)
 716 |         else:
 717 |             return namer(expr)
 718 |     elif isinstance(expr, DupN):
 719 |         return "dup"
 720 |     elif isinstance(expr, UnaryOp):
 721 |         return f"{postfix(expr.x, namer)} {expr.op_name}"
 722 |     elif isinstance(expr, BinaryOp):
 723 |         first = postfix(expr.x, namer)
 724 |         return f"{first} {postfix(expr.y, namer)} {expr.op_name}"
 725 |     elif isinstance(expr, ConditionalN):
 726 |         first = postfix(expr.x, namer)
 727 |         second = postfix(expr.y, namer)
 728 |         return f"{first} {second} {postfix(expr.z, namer)} ?"
 729 |     else:
 730 |         raise TypeError(f"Unknwon type {type(expr)}")
 731 | 
 732 | 
 733 | def infix(expr: ExprIR, namer: Optional[Callable[[VarN], str]] = None, 
 734 |     top: Optional[str] = None
 735 | ) -> str:
 736 |     assert isinstance(expr, ExprIR)
 737 | 
 738 |     if isinstance(expr, ConstantN):
 739 |         return str(expr)
 740 |     elif isinstance(expr, VarN):
 741 |         if namer is None:
 742 |             return str(expr)
 743 |         else:
 744 |             return namer(expr)
 745 |     elif isinstance(expr, DupN):
 746 |         if top:
 747 |             return top
 748 |         else:
 749 |             raise ValueError("Empty dup node")
 750 |     elif isinstance(expr, UnaryOp):
 751 |         return f"{expr.op_name}({infix(expr.x, namer, top=top)})"
 752 |     elif isinstance(expr, BinaryOp):
 753 |         first = infix(expr.x, namer, top=top)
 754 |         return f"({first} {expr.op_name} {infix(expr.y, namer, top=first)})"
 755 |     elif isinstance(expr, ConditionalN):
 756 |         first = infix(expr.x, namer, top=top)
 757 |         second = infix(expr.y, namer, top=first)
 758 |         return f"({second} if {first} else {infix(expr.z, namer, top=second)})"
 759 |     else:
 760 |         raise TypeError(f"Unknwon type {type(expr)}")
 761 | 
 762 | ########################## Expr IR End ##########################
 763 | 
 764 | def namer_factory():
 765 |     alphabet = "xyzabcdefghijklmnopqrstuvw"
 766 |     mapping = OrderedDict() # type: MutableMapping[_VideoNode, str]
 767 | 
 768 |     def namer(obj: VarN) -> str:
 769 |         x = obj.x
 770 |         if x in mapping or len(mapping) < len(alphabet):
 771 |             return mapping.setdefault(x, f"{alphabet[len(mapping)]}")
 772 |         else:
 773 |             raise RuntimeError(f"{type(self).__name__!r}: Too many nodes")
 774 | 
 775 |     return namer
 776 | 
 777 | 
 778 | class _Fake_VideoNode:
 779 |     """ Fake VideoNode used to bypass instance check in other scripts """
 780 |     pass
 781 | 
 782 | 
 783 | class _ArithmeticExpr(_Fake_VideoNode):
 784 |     def __init__(self, obj): 
 785 |         self._expr = Cast(obj) # type: ExprIR
 786 |         self._cached_clip = None # type: Optional[_VideoNode]
 787 | 
 788 |     def __getattr__(self, name):
 789 |         if hasattr(_vscore, name) or hasattr(self.clips[0], name):
 790 |             return getattr(self.compute(), name)
 791 |         else:
 792 |             raise AttributeError(f"{type(self).__name__!r} object has no attribute {name!r}")
 793 | 
 794 |     def __bool__(self):
 795 |         raise RuntimeError("Impossible")
 796 | 
 797 |     def __hash__(self):
 798 |         return hash(self.clips + (self.expr,))
 799 | 
 800 |     def __str__(self):
 801 |         def namer(x: VarN):
 802 |             return _repr(x.x)
 803 |         return infix(self._expr, namer=namer).strip("()")
 804 | 
 805 |     @property
 806 |     def clips(self):
 807 |         from collections import OrderedDict
 808 | 
 809 |         clips_dict = OrderedDict()
 810 |         exprs = [self._expr]
 811 | 
 812 |         while exprs:
 813 |             expr = exprs.pop()
 814 |             if isinstance(expr, VarN):
 815 |                 clips_dict.setdefault(expr.x, None)
 816 |             elif isinstance(expr, UnaryOp):
 817 |                 exprs.append(expr.x)
 818 |             elif isinstance(expr, BinaryOp):
 819 |                 exprs.extend([expr.y, expr.x])
 820 |             elif isinstance(expr, ConditionalN):
 821 |                 exprs.extend([expr.z, expr.y, expr.x])
 822 | 
 823 |         return tuple(clips_dict.keys())
 824 | 
 825 |     def get_expr(self, namer) -> str:
 826 |         return postfix(self._expr, namer=namer)
 827 | 
 828 |     @property
 829 |     def expr(self) -> str:
 830 |         return self.get_expr(namer=namer_factory())
 831 | 
 832 |     @property
 833 |     def lut_func(self) -> Callable[..., numbers.Integral]:
 834 |         clips = self.clips
 835 | 
 836 |         assert len(clips) in [1, 2]
 837 | 
 838 |         func_impl = infix(self._expr, namer=namer_factory())
 839 |         func_impl = f"min(max(int({func_impl} + 0.5), 0), {(2 ** clips[0].format.bits_per_sample) - 1})" # clamp
 840 | 
 841 |         if len(clips) == 1:
 842 |             lut_str = f"lambda x: {func_impl}"
 843 |         else: # len(clips) == 2
 844 |             lut_str = f"lambda x, y: {func_impl}"
 845 | 
 846 |         class _LambdaFunction:
 847 |             def __init__(self, func_str: str):
 848 |                 self.func = eval(func_str, {"exp": math.exp, "log": math.log, "sqrt": math.sqrt})
 849 |                 self.func_str = func_str
 850 | 
 851 |             def __call__(self, *args, **kwargs):
 852 |                 return self.func(*args, **kwargs)
 853 | 
 854 |             def __repr__(self):
 855 |                 return self.func_str
 856 | 
 857 |         return _LambdaFunction(lut_str)
 858 | 
 859 |     def compute(self, planes=None, bits=None, use_lut=None, 
 860 |         simplify: Union[bool, Callable[[ExprIR], ExprIR]] = True
 861 |     ) -> '_VideoNode':
 862 | 
 863 |         if arithmetic_expr:
 864 |             cacheable = planes is None and bits is None and use_lut is None
 865 | 
 866 |             if cacheable and self._cached_clip is not None:
 867 |                 return self._cached_clip
 868 | 
 869 |             if simplify:
 870 |                 if callable(simplify):
 871 |                     self._expr = simplify(self._expr)
 872 |                 else:
 873 |                     self._expr = _simplify(self._expr)
 874 | 
 875 |                 if len(self.clips) == 0:
 876 |                     raise ValueError("ArithmeticExpr becomes empty")
 877 | 
 878 |             if self.expr in ['', 'x']: # empty expr
 879 |                 return _VideoNode(self.clips[0]._node)
 880 |             else:
 881 |                 clips = self.clips
 882 |                 if len(clips) > 26:
 883 |                     raise RuntimeError("Too many clips.")
 884 | 
 885 |                 if bits is None:
 886 |                     not_equal_bits = (
 887 |                         lambda clip1, clip2: 
 888 |                             clip1.format.bits_per_sample != clip2.format.bits_per_sample)
 889 | 
 890 |                     if len(clips) >= 2 and any(not_equal_bits(clips[0], clip) for clip in clips[1:]):
 891 |                         raise ValueError('"bits" must be specified.')
 892 |                     else:
 893 |                         bits = clips[0].format.bits_per_sample
 894 | 
 895 |                 is_int = lambda clip: clip.format.sample_type == vs.INTEGER
 896 |                 get_bits = lambda clip: clip.format.bits_per_sample
 897 |                 lut_available = (
 898 |                     lambda clips: 
 899 |                         len(clips) <= 2 and all(map(is_int, clips)) and sum(map(get_bits, clips)) <= 20)
 900 | 
 901 |                 if use_lut is None:
 902 |                     use_lut = lut_available(clips) and len(self.expr.split()) >= 15
 903 |                 elif use_lut and not lut_available(clips):
 904 |                     raise ValueError("Lut computation is not available")
 905 | 
 906 |                 # process
 907 |                 if use_lut: # std.Lut() / std.Lut2()
 908 |                     if len(clips) == 1:
 909 |                         res = core.std.Lut(clips[0], planes=planes, bits=bits, function=self.lut_func)
 910 |                     else: # len(clips) == 2
 911 |                         res = core.std.Lut2(
 912 |                             clips[0], clips[1], planes=planes, bits=bits, function=self.lut_func)
 913 | 
 914 |                 else: # std.Expr()
 915 |                     if planes is None:
 916 |                         expr = self.expr
 917 |                     else:
 918 |                         if isinstance(planes, int):
 919 |                             planes = [planes]
 920 | 
 921 |                         expr = [
 922 |                             (self.expr if i in planes else "") 
 923 |                             for i in range(clips[0].format.num_planes)]
 924 | 
 925 |                     in_format = clips[0].format
 926 | 
 927 |                     if bits == in_format.bits_per_sample:
 928 |                         out_format = None
 929 |                     else:
 930 |                         query_video_format = core.query_video_format if _is_api4 else core.register_format
 931 |                         out_format = query_video_format(
 932 |                             color_family=in_format.color_family, 
 933 |                             sample_type=vs.INTEGER if bits <= 16 else vs.FLOAT, 
 934 |                             bits_per_sample=bits, 
 935 |                             subsampling_w=in_format.subsampling_w, 
 936 |                             subsampling_h=in_format.subsampling_h
 937 |                         )
 938 | 
 939 |                     res = core.std.Expr(clips=clips, expr=expr, format=out_format)
 940 | 
 941 |                 if cacheable:
 942 |                     self._cached_clip = res
 943 | 
 944 |                 return res
 945 | 
 946 |         else:
 947 |             raise RuntimeError("Arithmetic expression is disabled.")
 948 | 
 949 |     # Arithmetic methods
 950 |     def _operate(self, 
 951 |         op: Union[UnaryOp, BinaryOp, ConditionalN], 
 952 |         *operands: Sequence[Union[numbers.Real, vs.VideoNode, "_VideoNode", ExprIR]]
 953 |     ) -> "_ArithmeticExpr":
 954 |         unwrap = lambda x: x._expr if isinstance(x, type(self)) else x
 955 |         result = op(*map(unwrap, operands))
 956 |         return type(self)(result)
 957 | 
 958 |     # unary operations
 959 |     def __neg__(self):
 960 |         return self._operate(SubN, 0, self)
 961 | 
 962 |     def __pos__(self):
 963 |         return self
 964 | 
 965 |     def __abs__(self):
 966 |         return self._operate(AbsN, self)
 967 | 
 968 |     def __invert__(self):
 969 |         return self._operate(NotN, self)
 970 | 
 971 |     def __exp__(self):
 972 |         return self._operate(ExpN, self)
 973 | 
 974 |     def __log__(self):
 975 |         return self._operate(LogN, self)
 976 | 
 977 |     def __sqrt__(self):
 978 |         return self._operate(SqrtN, self)
 979 | 
 980 |     # binary operations
 981 |     def __lt__(self, other):
 982 |         return self._operate(LtN, self, other)
 983 | 
 984 |     def __le__(self, other):
 985 |         return self._operate(LeN, self, other)
 986 | 
 987 |     def __eq__(self, other):
 988 |         return self._operate(EqN, self, other)
 989 | 
 990 |     def __ne__(self, other):
 991 |         return self._operate(NeN, self, other)
 992 | 
 993 |     def __gt__(self, other):
 994 |         return self._operate(GtN, self, other)
 995 | 
 996 |     def __ge__(self, other):
 997 |         return self._operate(GeN, self, other)
 998 | 
 999 |     def __add__(self, other):
1000 |         return self._operate(AddN, self, other)
1001 | 
1002 |     def __radd__(self, other):
1003 |         return self._operate(AddN, other, self)
1004 | 
1005 |     def __sub__(self, other):
1006 |         return self._operate(SubN, self, other)
1007 | 
1008 |     def __rsub__(self, other):
1009 |         return self._operate(SubN, other, self)
1010 | 
1011 |     def __mul__(self, other):
1012 |         return self._operate(MulN, self, other)
1013 | 
1014 |     def __rmul__(self, other):
1015 |         return self._operate(MulN, other, self)
1016 | 
1017 |     def __truediv__(self, other):
1018 |         return self._operate(DivN, self, other)
1019 | 
1020 |     def __rtruediv__(self, other):
1021 |         return self._operate(DivN, other, self)
1022 | 
1023 |     def __pow__(self, other, module=None):
1024 |         if module is None:
1025 |             return self._operate(PowN, self, other)
1026 |         else:
1027 |             raise NotImplemented
1028 | 
1029 |     def __rpow__(self, other):
1030 |         return self._operate(PowN, other, self)
1031 | 
1032 |     def __and__(self, other):
1033 |         return self._operate(AndN, self, other)
1034 | 
1035 |     def __rand__(self, other):
1036 |         return self._operate(AndN, other, self)
1037 | 
1038 |     def __or__(self, other):
1039 |         return self._operate(OrN, self, other)
1040 | 
1041 |     def __ror__(self, other):
1042 |         return self._operate(OrN, other, self)
1043 | 
1044 |     def __xor__(self, other):
1045 |         return self._operate(XorN, self, other)
1046 | 
1047 |     def __rxor__(self, other):
1048 |         return self._operate(XorN, other, self)
1049 | 
1050 |     # custom binary operations
1051 |     def __max__(self, other):
1052 |         return self._operate(MaxN, self, other)
1053 | 
1054 |     def __rmax__(self, other):
1055 |         return self._operate(MaxN, other, self)
1056 | 
1057 |     def __min__(self, other):
1058 |         return self._operate(MinN, self, other)
1059 | 
1060 |     def __rmin__(self, other):
1061 |         return self._operate(MinN, other, self)
1062 | 
1063 |     # custom ternary operation
1064 |     def __conditional__(self, other_true, other_false):
1065 |         return self._operate(ConditionalN, self, other_true, other_false)
1066 | 
1067 |     def __rconditional__(self, other_condition, other_false):
1068 |         return self._operate(ConditionalN, other_true, self, other_false)
1069 | 
1070 |     def __rrconditional__(self, other_condition, other_true):
1071 |         return self._operate(ConditionalN, other_true, other_false, self)
1072 | 
1073 | 
1074 | def _build_VideoNode(fake_vn=None):
1075 |     _plane_idx_mapping = {
1076 |         vs.YUV: {'Y': 0, 'U': 1, 'V': 2}, 
1077 |         vs.RGB: {'R': 0, 'G': 1, 'B': 2}, 
1078 |         vs.GRAY: {'GRAY': 0, 'Y': 0}
1079 |     }
1080 | 
1081 |     def __init__(self, node: vs.VideoNode):
1082 |         if not isinstance(node, vs.VideoNode):
1083 |             raise TypeError(f"{type(self).__name__!r}: Unknown input ({type(node)})")
1084 |         self._node = node
1085 | 
1086 |     def __getattr__(self, name):
1087 |         if name[0].isupper(): # non-standard attributes
1088 |             if (self.format.color_family in _plane_idx_mapping and
1089 |                 name in _plane_idx_mapping[self.format.color_family]):
1090 | 
1091 |                 idx = _plane_idx_mapping[self.format.color_family][name]
1092 |                 return self.std.ShufflePlanes(planes=idx, colorfamily=vs.GRAY)
1093 | 
1094 |             elif hasattr(core, name):
1095 |                 func = getattr(core, name)
1096 |                 return functools.partial(func, self)
1097 |             else:
1098 |                 raise AttributeError(f"{type(self).__name__!r} object has no attribute {name!r}")
1099 | 
1100 |         elif hasattr(_vscore, name) and isinstance(getattr(_vscore, name), vs.Plugin):
1101 |             plugin = getattr(_vscore, name)
1102 |             return _Plugin(plugin, self._node)
1103 |         else:
1104 |             attr = getattr(self._node, name)
1105 | 
1106 |             if callable(attr): # set_output(), etc
1107 |                 @functools.wraps(attr)
1108 |                 def closure(*args, **kwargs):
1109 |                     for recorder in Recorder._live_recorders:
1110 |                         if recorder.is_recording:
1111 |                             args_str = ', '.join(map(_repr, args))
1112 |                             kwargs_str = ', '.join(f"{k}={_repr(v)}" for k, v in kwargs.items())
1113 |                             call_str = ', '.join(s for s in [args_str, kwargs_str] if s != '')
1114 |                             recorder.buffer.append(f"{_repr(self)}.{name}({call_str})\n")
1115 | 
1116 |                     return attr(*args, **kwargs)
1117 | 
1118 |                 return closure
1119 | 
1120 |             else:
1121 |                 return attr
1122 | 
1123 |     def __len__(self):
1124 |         return self.num_frames
1125 | 
1126 |     def __str__(self):
1127 |         return f"muvs {self._node!s}"
1128 | 
1129 |     def __bool__(self):
1130 |         raise RuntimeError("Impossible")
1131 | 
1132 |     def __dir__(self):
1133 |         return dir(self._node) + list(_plane_idx_mapping[self.format.color_family].keys())
1134 | 
1135 |     def __hash__(self):
1136 |         return hash(self._node)
1137 | 
1138 |     def __iter__(self):
1139 |         return (type(self)(clip) for clip in iter(self._node))
1140 | 
1141 |     def __getitem__(self, val):
1142 |         if isinstance(val, slice):
1143 |             start, stop, step = val.indices(self.num_frames)
1144 | 
1145 |             if step > 0:
1146 |                 stop -= 1
1147 |             else: # step < 0
1148 |                 start, stop = stop + 1, start
1149 | 
1150 |             ret = self
1151 | 
1152 |             if start != 0 or stop != ret.num_frames - 1:
1153 |                 ret = ret.std.Trim(first=start, last=stop)
1154 | 
1155 |             if step < 0:
1156 |                 ret = ret.std.Reverse()
1157 | 
1158 |             if abs(step) != 1:
1159 |                 ret = ret.std.SelectEvery(cycle=abs(step), offsets=[0])
1160 | 
1161 |             if ret is self: # shallow copy
1162 |                 ret = type(self)(self._node)
1163 | 
1164 |             return ret
1165 | 
1166 |         elif isinstance(val, int):
1167 |             if val < 0:
1168 |                 n = self.num_frames + val
1169 |             else:
1170 |                 n = val
1171 | 
1172 |             if n < 0 or (self.num_frames > 0 and n >= self.num_frames):
1173 |                 raise IndexError("index out of range")
1174 |             else:
1175 |                 return self.std.Trim(first=n, length=1)
1176 |         else:
1177 |             raise TypeError(f"indices must be integers or slices, not {type(val)}")
1178 | 
1179 |     methods = locals().copy()
1180 | 
1181 |     create_method = (lambda name: 
1182 |                           lambda self, *args: 
1183 |                               getattr(_ArithmeticExpr(self), name)(*args))
1184 | 
1185 |     magic_methods = [
1186 |         "__neg__", "__pos__", "__abs__", "__exp__", "__log__", "__invert__", "__sqrt__", "__lt__", 
1187 |         "__le__", "__eq__", "__ne__", "__gt__", "__ge__", "__add__", "__radd__", "__sub__", 
1188 |         "__rsub__", "__mul__", "__rmul__", "__truediv__", "__rtruediv__", "__pow__", "__rpow__", 
1189 |         "__and__", "__rand__", "__xor__", "__rxor__", "__or__", "__ror__", "__min__", "__rmin__", 
1190 |         "__max__", "__rmax__", "__conditional__", "__rconditional__", "__rrconditional__"
1191 |     ]
1192 | 
1193 |     methods.update((name, create_method(name)) for name in magic_methods)
1194 | 
1195 |     return type("_VideoNode", (fake_vn,) if fake_vn is not None else (), methods)
1196 | 
1197 | _VideoNode = _build_VideoNode(_Fake_VideoNode)
1198 | 
1199 | 
1200 | def Expr(exprs, format=None, 
1201 |     simplify: Union[bool, Callable[[ExprIR], ExprIR]] = True
1202 | ) -> '_VideoNode':
1203 |     if isinstance(exprs, _VideoNode):
1204 |         exprs = [_ArithmeticExpr(exprs)]
1205 |     elif isinstance(exprs, _ArithmeticExpr):
1206 |         exprs = [exprs]
1207 |     elif isinstance(exprs, collections.abc.Sequence):
1208 |         if len(exprs) == 0:
1209 |             raise ValueError("Empty expression")
1210 | 
1211 |         for i in range(len(exprs)):
1212 |             if isinstance(exprs[i], _VideoNode):
1213 |                 exprs[i] = _ArithmeticExpr(exprs[i])
1214 |             elif exprs[i] is not None and not isinstance(exprs[i], (_ArithmeticExpr, numbers.Real)):
1215 |                 raise TypeError(f"Invalid type ({type(exprs[i])})")
1216 | 
1217 |     if simplify:
1218 |         for i in range(len(exprs)):
1219 |             if isinstance(exprs[i], _ArithmeticExpr):
1220 |                 if callable(simplify):
1221 |                     exprs[i] = _ArithmeticExpr(simplify(exprs[i]._expr))
1222 |                 else:
1223 |                     exprs[i] = _ArithmeticExpr(_simplify(exprs[i]._expr))
1224 | 
1225 |     for expr in exprs:
1226 |         if isinstance(expr, _ArithmeticExpr):
1227 |             num_planes = expr.clips[0].format.num_planes
1228 | 
1229 |             for i in range(len(exprs), num_planes):
1230 |                 exprs.append(exprs[-1])
1231 | 
1232 |             break
1233 |     else:
1234 |         raise ValueError("No clip is given")
1235 | 
1236 |     namer = namer_factory()
1237 | 
1238 |     expr_strs = []
1239 |     for i in range(num_planes):
1240 |         if exprs[i] is None:
1241 |             expr_strs.append("")
1242 |         elif isinstance(exprs[i], numbers.Real):
1243 |             expr_strs.append(str(exprs[i]))
1244 |         else:
1245 |             expr_str = exprs[i].get_expr(namer=namer)
1246 | 
1247 |             if expr_str == 'x':
1248 |                 expr_strs.append('')
1249 |             else:
1250 |                 expr_strs.append(expr_str)
1251 | 
1252 |     clips = (
1253 |         tuple(OrderedDict((obj, None) for obj in itertools.chain.from_iterable(
1254 |             expr.clips for expr in exprs 
1255 |             if isinstance(expr, _ArithmeticExpr)
1256 |         )).keys()))
1257 | 
1258 |     return core.std.Expr(clips, expr_strs, format)
1259 | 
1260 | 
1261 | # custom operations
1262 | Abs = abs
1263 | 
1264 | def Exp(x):
1265 |     if isinstance(x, (_ArithmeticExpr, _VideoNode)):
1266 |         return x.__exp__()
1267 |     else:
1268 |         return math.exp(x)
1269 | 
1270 | 
1271 | def Not(x):
1272 |     if isinstance(x, (_ArithmeticExpr, _VideoNode)):
1273 |         return x.__invert__()
1274 |     else:
1275 |         return not x
1276 | 
1277 | 
1278 | def And(x, y):
1279 |     if isinstance(x, (_ArithmeticExpr, _VideoNode)):
1280 |         return x.__and__(y)
1281 |     elif isinstance(y, (_ArithmeticExpr, _VideoNode)):
1282 |         return y.__rand__(x)
1283 |     else:
1284 |         return x and y
1285 | 
1286 | 
1287 | def Or(x, y):
1288 |     if isinstance(x, (_ArithmeticExpr, _VideoNode)):
1289 |         return x.__or__(y)
1290 |     elif isinstance(y, (_ArithmeticExpr, _VideoNode)):
1291 |         return y.__ror__(x)
1292 |     else:
1293 |         return x or y
1294 | 
1295 | 
1296 | def Xor(x, y):
1297 |     if isinstance(x, (_ArithmeticExpr, _VideoNode)):
1298 |         return x.__xor__(y)
1299 |     elif isinstance(y, (_ArithmeticExpr, _VideoNode)):
1300 |         return y.__rxor__(x)
1301 |     else:
1302 |         return (x and not y) or (not x and y)
1303 | 
1304 | 
1305 | def Log(x):
1306 |     if isinstance(x, (_ArithmeticExpr, _VideoNode)):
1307 |         return x.__log__()
1308 |     else:
1309 |         return math.log(x)
1310 | 
1311 | 
1312 | def Sqrt(x):
1313 |     if isinstance(x, (_ArithmeticExpr, _VideoNode)):
1314 |         return x.__sqrt__()
1315 |     else:
1316 |         return math.sqrt(x)
1317 | 
1318 | 
1319 | def Min(x, y):
1320 |     if isinstance(x, (_ArithmeticExpr, _VideoNode)):
1321 |         return x.__min__(y)
1322 |     elif isinstance(y, (_ArithmeticExpr, _VideoNode)):
1323 |         return y.__rmin__(x)
1324 |     else:
1325 |         return min(x, y)
1326 | 
1327 | 
1328 | def Max(x, y):
1329 |     if isinstance(x, (_ArithmeticExpr, _VideoNode)):
1330 |         return x.__max__(y)
1331 |     elif isinstance(y, (_ArithmeticExpr, _VideoNode)):
1332 |         return y.__rmax__(x)
1333 |     else:
1334 |         return max(x, y)
1335 | 
1336 | 
1337 | def Conditional(condition, condition_if_true, condition_if_false):
1338 |     try:
1339 |         return condition_if_true if condition else condition_if_false
1340 |     except RuntimeError:
1341 |         if isinstance(condition, (_ArithmeticExpr, _VideoNode)):
1342 |             return condition.__conditional__(condition_if_true, condition_if_false)
1343 |         elif isinstance(condition_if_true, (_ArithmeticExpr, _VideoNode)):
1344 |             return condition_if_true.__rconditional__(condition, condition_if_false)
1345 |         elif isinstance(condition_if_false, (_ArithmeticExpr, _VideoNode)):
1346 |             return condition_if_false.__rrconditional__(condition, condition_if_true)
1347 |         else:
1348 |             raise TypeError(f"'Conditional': Unknown input ({type(condition)}, "
1349 |                             f"{type(condition_if_true)}, {type(condition_if_false)})")
1350 | 
1351 | 
1352 | def pollute(*modules):
1353 |     class _FakeVS:
1354 |         def __init__(self):
1355 |             self.VideoNode = _Fake_VideoNode
1356 |             self.core = core
1357 |             self.get_core = lambda : core
1358 | 
1359 |         def __getattr__(self, name):
1360 |             return getattr(vs, name)
1361 | 
1362 |     _vs = _FakeVS()
1363 | 
1364 |     # modify symbol table of each module
1365 |     if len(modules) == 0:
1366 |         import sys
1367 |         for name, module in sys.modules.items():
1368 |             if (
1369 |                 name not in ("__vapoursynth__", "__main__") and
1370 |                 getattr(module, "core", None) is not core and
1371 |                 ((getattr(module, "vs", None) is vs) or 
1372 |                  (getattr(module, "core", None) is _vscore))
1373 |             ):
1374 |                 module.core = core
1375 |                 module.vs = _vs
1376 |     else:
1377 |         for module in modules:
1378 |             module.core = core
1379 |             module.vs = _vs
1380 | 


--------------------------------------------------------------------------------