├── .gitignore
├── EEDI3 - Readme.txt
├── EEDI3
    ├── Eedi3Sse.cpp
    ├── Eedi3Sse.h
    ├── PlanarFrame.cpp
    ├── PlanarFrame.h
    ├── avisynth.h
    ├── avs
    │   ├── alignment.h
    │   ├── capi.h
    │   ├── config.h
    │   ├── cpuid.h
    │   ├── minmax.h
    │   ├── types.h
    │   └── win.h
    ├── eedi3.cpp
    ├── eedi3.h
    ├── eedi3.rc
    ├── eedi3.sln
    ├── eedi3.vcxproj
    ├── eedi3.vcxproj.filters
    └── resource.h
├── LICENSE
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | 
  4 | # User-specific files
  5 | *.suo
  6 | *.user
  7 | *.userosscache
  8 | *.sln.docstates
  9 | 
 10 | # User-specific files (MonoDevelop/Xamarin Studio)
 11 | *.userprefs
 12 | 
 13 | # Build results
 14 | [Dd]ebug/
 15 | [Dd]ebugPublic/
 16 | [Rr]elease/
 17 | [Rr]eleases/
 18 | x64/
 19 | x86/
 20 | build/
 21 | bld/
 22 | [Bb]in/
 23 | [Oo]bj/
 24 | 
 25 | # Visual Studo 2015 cache/options directory
 26 | .vs/
 27 | 
 28 | # MSTest test Results
 29 | [Tt]est[Rr]esult*/
 30 | [Bb]uild[Ll]og.*
 31 | 
 32 | # NUNIT
 33 | *.VisualState.xml
 34 | TestResult.xml
 35 | 
 36 | # Build Results of an ATL Project
 37 | [Dd]ebugPS/
 38 | [Rr]eleasePS/
 39 | dlldata.c
 40 | 
 41 | *_i.c
 42 | *_p.c
 43 | *_i.h
 44 | *.ilk
 45 | *.meta
 46 | *.obj
 47 | *.pch
 48 | *.pdb
 49 | *.pgc
 50 | *.pgd
 51 | *.rsp
 52 | *.sbr
 53 | *.tlb
 54 | *.tli
 55 | *.tlh
 56 | *.tmp
 57 | *.tmp_proj
 58 | *.log
 59 | *.vspscc
 60 | *.vssscc
 61 | .builds
 62 | *.pidb
 63 | *.svclog
 64 | *.scc
 65 | 
 66 | # Chutzpah Test files
 67 | _Chutzpah*
 68 | 
 69 | # Visual C++ cache files
 70 | ipch/
 71 | *.aps
 72 | *.ncb
 73 | *.opensdf
 74 | *.sdf
 75 | *.cachefile
 76 | 
 77 | # Visual Studio profiler
 78 | *.psess
 79 | *.vsp
 80 | *.vspx
 81 | 
 82 | # TFS 2012 Local Workspace
 83 | $tf/
 84 | 
 85 | # Guidance Automation Toolkit
 86 | *.gpState
 87 | 
 88 | # ReSharper is a .NET coding add-in
 89 | _ReSharper*/
 90 | *.[Rr]e[Ss]harper
 91 | *.DotSettings.user
 92 | 
 93 | # JustCode is a .NET coding addin-in
 94 | .JustCode
 95 | 
 96 | # TeamCity is a build add-in
 97 | _TeamCity*
 98 | 
 99 | # DotCover is a Code Coverage Tool
100 | *.dotCover
101 | 
102 | # NCrunch
103 | _NCrunch_*
104 | .*crunch*.local.xml
105 | 
106 | # MightyMoose
107 | *.mm.*
108 | AutoTest.Net/
109 | 
110 | # Web workbench (sass)
111 | .sass-cache/
112 | 
113 | # Installshield output folder
114 | [Ee]xpress/
115 | 
116 | # DocProject is a documentation generator add-in
117 | DocProject/buildhelp/
118 | DocProject/Help/*.HxT
119 | DocProject/Help/*.HxC
120 | DocProject/Help/*.hhc
121 | DocProject/Help/*.hhk
122 | DocProject/Help/*.hhp
123 | DocProject/Help/Html2
124 | DocProject/Help/html
125 | 
126 | # Click-Once directory
127 | publish/
128 | 
129 | # Publish Web Output
130 | *.[Pp]ublish.xml
131 | *.azurePubxml
132 | # TODO: Comment the next line if you want to checkin your web deploy settings 
133 | # but database connection strings (with potential passwords) will be unencrypted
134 | *.pubxml
135 | *.publishproj
136 | 
137 | # NuGet Packages
138 | *.nupkg
139 | # The packages folder can be ignored because of Package Restore
140 | **/packages/*
141 | # except build/, which is used as an MSBuild target.
142 | !**/packages/build/
143 | # Uncomment if necessary however generally it will be regenerated when needed
144 | #!**/packages/repositories.config
145 | 
146 | # Windows Azure Build Output
147 | csx/
148 | *.build.csdef
149 | 
150 | # Windows Store app package directory
151 | AppPackages/
152 | 
153 | # Others
154 | *.[Cc]ache
155 | ClientBin/
156 | [Ss]tyle[Cc]op.*
157 | ~$*
158 | *~
159 | *.dbmdl
160 | *.dbproj.schemaview
161 | *.pfx
162 | *.publishsettings
163 | node_modules/
164 | bower_components/
165 | 
166 | # RIA/Silverlight projects
167 | Generated_Code/
168 | 
169 | # Backup & report files from converting an old project file
170 | # to a newer Visual Studio version. Backup files are not needed,
171 | # because we have git ;-)
172 | _UpgradeReport_Files/
173 | Backup*/
174 | UpgradeLog*.XML
175 | UpgradeLog*.htm
176 | 
177 | # SQL Server files
178 | *.mdf
179 | *.ldf
180 | 
181 | # Business Intelligence projects
182 | *.rdl.data
183 | *.bim.layout
184 | *.bim_*.settings
185 | 
186 | # Microsoft Fakes
187 | FakesAssemblies/
188 | 
189 | # Node.js Tools for Visual Studio
190 | .ntvs_analysis.dat
191 | 
192 | # Visual Studio 6 build log
193 | *.plg
194 | 
195 | # Visual Studio 6 workspace options file
196 | *.opt
197 | 


--------------------------------------------------------------------------------
/EEDI3 - Readme.txt:
--------------------------------------------------------------------------------
  1 |                                                                                                     |
  2 |                                    nnedi3 for Avisynth v2.6.x                                       |
  3 |                                     v0.9.2.3 (March 23, 2018)                                       |
  4 |                                           by tritical                                               |
  5 |                                      modified by Firesledge                                         |
  6 |                                        modified by Elegant                                          |
  7 |                                        modified by pinterf                                          |
  8 |                                                                                                     |
  9 |                                            HELP FILE                                                |
 10 | -----------------------------------------------------------------------------------------------------
 11 | -----------------------------------------------------------------------------------------------------
 12 | 
 13 | 
 14 | 
 15 | INFO:
 16 | 
 17 |       eedi3 is an intra-field only deinterlacer. It takes in a frame, throws away one field, and
 18 |    then interpolates the missing pixels using only information from the kept field. It has same
 19 |    rate and double rate modes, and works with YV12, YUY2, and RGB24 input. nnedi3 is also very good
 20 |    for enlarging images by powers of 2, and includes a function 'nnedi3_rpow2' for that purpose.
 21 | 
 22 |      eedi3 works by finding the best non-decreasing (non-crossing) warping between two lines by minimizing a cost functional.
 23 |    The cost is based on neighborhood similarity (favor connecting regions that look similar), the vertical difference created
 24 |    by the interpolated values (favor small differences), the interpolation directions (favor short connections vs long), and
 25 |    the change in interpolation direction from pixel to pixel (favor small changes).
 26 | 
 27 |    *** Note: eedi3 is threaded using openmp, and is compiled with visual studio 2017.
 28 |    *** Therefore, it needs the visual studio 2017 redistributable package to be
 29 |    *** installed.
 30 |    *** https://www.visualstudio.com/downloads/ look for Microsoft Visual C++ Redistributable for Visual Studio 2017 
 31 | 
 32 | 
 33 | 
 34 | FUNCTIONS:
 35 | 
 36 | 
 37 |   "eedi3", "c[field]i[dh]b[Y]b[U]b[V]b[alpha]f[beta]f[gamma]f[nrad]i[mdis]i[hp]b[ucubic]b[cost3]b[vcheck]i[vthresh0]f[vthresh1]f[vthresh2]f[sclip]c[threads]i[mclip]c[opt]i"
 38 | 
 39 |   "eedi3_rpow2", "c[rfactor]i[alpha]f[beta]f[gamma]f[nrad]i[mdis]i[hp]b[ucubic]b[cost3]b[vcheck]i[vthresh0]f[vthresh1]f[vthresh2]f[cshift]s[fwidth]i[fheight]i[ep0]f[ep1]f[threads]i[opt]i"
 40 | 
 41 | 
 42 | 
 43 | PARAMETERS (eedi3):
 44 | 
 45 |    field -
 46 | 
 47 |       Controls the mode of operation (double vs same rate) and which field is kept.
 48 |       Possible settings:
 49 | 
 50 |          -2 = double rate (alternates each frame), uses avisynth's internal parity value to start
 51 |          -1 = same rate, uses avisynth's internal parity value
 52 |           0 = same rate, keep bottom field
 53 |           1 = same rate, keep top field
 54 |           2 = double rate (alternates each frame), starts with bottom
 55 |           3 = double rate (alternates each frame), starts with top
 56 | 
 57 |       If field is set to -1, then eedi3 calls child->GetParity(0) during initialization.
 58 |       If it returns true, then field is set to 1. If it returns false, then field is set to 0.
 59 |       If field is set to -2, then the same thing happens, but instead of setting field to 1
 60 |       or 0 it is set to 3 or 2.
 61 | 
 62 |       Default:  -1  (int)
 63 | 
 64 | 
 65 |    dh -
 66 | 
 67 |       Doubles the height of the input. Each line of the input is copied to every other line
 68 |       of the output and the missing lines are interpolated. If field=0, the input is copied
 69 |       to the odd lines of the output. If field=1, the input is copied to the even lines of
 70 |       the output. field must be set to either -1, 0, or 1 when using dh=true.
 71 | 
 72 |       Default:  false  (int)
 73 | 
 74 | 
 75 |    Y, U, V -
 76 | 
 77 |       These control whether or not the specified plane is processed. Set to true to
 78 |       process or false to ignore. Ignored planes are not copied, zero'd, or even
 79 |       considered. So what the ignored planes happen to contain on output is unpredictable.
 80 |       For RGB24 input Y=B, U=G, V=R.
 81 | 
 82 |       Default:  Y = true  (bool)
 83 |                 U = true  (bool)
 84 |                 V = true  (bool)
 85 | 
 86 | 
 87 |    alpha/beta/gamma (defaults: 0.2,0.25,20.0):
 88 | 
 89 |      These trade off line/edge connection vs artifacts created. alpha and beta must be in the range [0,1], and the sum
 90 |      alpha+beta must be in the range [0,1]. alpha is the weight given to connecting similar neighborhoods.. the larger
 91 |      it is the more lines/edges should be connected. beta is the weight given to vertical difference created by the
 92 |      interpolation... the larger beta is the less edges/lines will be connected (at 1.0 you get no edge directedness at all). 
 93 |      The remaining weight (1.0-alpha-beta) is given to interpolation direction (large directions (away from vertical) 
 94 |      cost more)... so the more weight you have here the more shorter connections will be favored. Finally, gamma penalizes
 95 |      changes in interpolation direction, the larger gamma is the smoother the interpolation field between two lines 
 96 |      (range is [0,inf].
 97 | 
 98 |      If lines aren't getting connected then increase alpha and maybe decrease beta/gamma. Go the other way if you are 
 99 |      getting unwanted artifacts.
100 | 
101 | 
102 |    nrad/mdis (defaults: 2,20):
103 | 
104 |      nrad sets the radius used for computing neighborhood similarity. Valid range is [0,3]. mdis sets the maximum
105 |      connection radius. Valid range is [1,40]. If mdis=20, then when interpolating pixel (50,10) (x,y), the farthest
106 |      connections allowed would be between (30,9)/(70,11) and (70,9)/(30,11). Larger mdis will allow connecting lines
107 |      of smaller slope, but also increases the chance of artifacts. Larger mdis will be slower. Larger nrad will be
108 |      slower.
109 | 
110 | 
111 |    hp/ucubic/cost3 (defaults: false, true, true):
112 | 
113 |       These are speed vs quality options. hp=true, use half pel steps, hp=false, use full pel steps. ucubic=true, use 
114 |       cubic 4 point interpolation, ucubic=false, use 2 point linear interpolation. cost3=true, use 3 neighborhood cost
115 |       function to define similarity, cost3=false, use 1 neighborhood cost function.
116 | 
117 | 
118 |    vcheck/vthresh0/vthresh1/vthresh2/sclip (defaults: 2, 32.0, 64.0, 4.0, NULL):
119 | 
120 |       vcheck settings:
121 | 
122 |           0 - no reliability check
123 |           1 - weak reliability check
124 |           2 - med reliability check
125 |           3 - strong reliability check
126 | 
127 |       If vcheck is greater than 0, then the resulting interpolation is checked for reliability/consistency. Assume
128 |       we interpolated pixel 'fh' below using dir=4 (i.e. averaging pixels bl and cd).
129 | 
130 |            aa ab ac ad ae af ag ah ai aj ak al am an ao ap
131 |                                 eh          el
132 |            ba bb bc bd be bf bg bh bi bj bk bl bm bn bo bp
133 |                     fd          fh          fl
134 |            ca cb cc cd ce cf cg ch ci cj ck cl cm cn co cp
135 |                     gd          gh
136 |            da db dc dd de df dg dh di dj dk dl dm dn do dp
137 | 
138 |       When checking pixel 'fh' the following is computed:
139 | 
140 |             d0 = abs((el+fd)/2 - bh)
141 |             d1 = abs((fl+gd)/2 - ch)
142 | 
143 |             q2 = abs(bh-fh)+abs(ch-fh)
144 |             q3 = abs(el-bl)+abs(fl-bl)
145 |             q4 = abs(fd-cd)+abs(gd-cd)
146 | 
147 |             d2 = abs(q2-q3)
148 |             d3 = abs(q2-q4)
149 | 
150 |             mdiff0 = vcheck == 1 ? min(d0,d1) : vcheck == 2 ? ((d0+d1+1)>>1) : max(d0,d1)
151 |             mdiff1 = vcheck == 1 ? min(d2,d3) : vcheck == 2 ? ((d2+d3+1)>>1) : max(d2,d3)
152 | 
153 |             a0 = mdiff0/vthresh0;
154 |             a1 = mdiff1/vthresh1;
155 |             a2 = max((vthresh2-abs(dir))/vthresh2,0.0f)
156 | 						
157 |             a = min(max(max(a0,a1),a2),1.0f)
158 | 						
159 |             final_value = (1.0-a)*fh + a*cint
160 | 
161 | 
162 |         ** If sclip is supplied, cint is the corresponding value from sclip. If sclip isn't supplied,
163 |            then vertical cubic interpolation is used to create it.
164 | 
165 | 
166 |    mclip (default: not set)
167 | 
168 |       A mask to use edge-directed interpolation only on specified pixels. The clip should have the
169 |       same format as the input clip. Pixels where the mask is 0 are generated using cubic linear or
170 |       bicubic interpolation. The main goal of the mask is to save calculations.
171 |       This parameter does not exist in eedi3_rpow2.
172 | 
173 | 
174 |    opt (default: 0)
175 | 
176 |       Sets which cpu optimizations to use. Possible settings:
177 | 
178 |          0 = auto detect
179 |          1 = use c
180 |          2 = use sse2
181 | 
182 | 
183 |    threads (default: 0):
184 | 
185 |       Sets the number of threads used by openmp.
186 | 
187 |          0 = default (environment variable OMP_NUM_THREADS)
188 |          > 0 = calls omp_set_num_threads(threads)
189 | 
190 | 
191 | 
192 | PARAMETERS (eedi3_rpow2):
193 | 
194 |    rfactor -
195 | 
196 |       Image enlargement factor. Must be a power of 2 in the range [2,1024].
197 | 
198 |       Default:  not set  (int)
199 | 
200 | 
201 |    cshift -
202 | 
203 |       Sets the resizer used for correcting the image center shift that eedi3_rpow2
204 |       introduces. This can be any of Avisynth's internal resizers, such as "spline36resize",
205 |       "lanczosresize", etc... If not specified the shift is not corrected. The correction
206 |       is accomplished by using the subpixel cropping capability of Avisynth's internal
207 |       resizers.
208 | 
209 |       Default:  not set  (string)
210 | 
211 | 
212 |    fwidth/fheight -
213 |    
214 |       If correcting the image center shift by using the 'cshift' parameter, fwidth/fheight
215 |       allow you to set a new output resolution. First, the image is enlarged by 'rfactor'
216 |       using eedi3. Once that is completed the image center shift is corrected, and the
217 |       image is resampled to fwidth x fheight resolution. The shifting and resampling
218 |       happen in one call using the internal Avisynth resizer you specify via the 'cshift'
219 |       string. If fwidth/fheight are not specified, then they are set equal to rfactor*width
220 |       and rfactor*height respectively (in other words they do nothing).
221 | 
222 |       Default:  not set  (int)
223 |                 not set  (int)
224 | 
225 | 
226 |    ep0/ep1 -
227 | 
228 |       Some Avisynth resizers take optional arguments, such as 'taps' for lanczosresize or
229 |       'p' for gaussresize. ep0/ep1 allow you to pass values for these optional arguments
230 |       when using the 'cshift' parameter. If the resizer only takes one optional argument
231 |       then ep0 is used. If the argument that the resizer takes is not a float value,
232 |       then ep0 gets rounded to an integer. If the resizer takes two optional arguments,
233 |       then ep0 corresponds to the first one, and ep1 corresponds to the second. The only
234 |       resizer that takes more than one optional argument is bicubicresize(), which takes
235 |       'b' and 'c'. So ep0 = b, and ep1 = c. If ep0/ep1 are not set then the default value
236 |       for the optional argument(s) of the resizer is used.
237 | 
238 |       Default:  not set  (float)
239 |                 not set  (float)
240 | 
241 | 
242 |    alpha/beta/gamma/nrad/mdis/hp/ucubic/cost3/vcheck/vthresh0/vthresh1/vthresh2/threads/opt -
243 | 
244 |       Same as corresponding parameters in eedi3.
245 | 
246 | 
247 | 
248 | eedi3_rpow2 EXAMPLES:
249 | 
250 | 
251 |    a.) enlarge image by 4x, don't correct for center shift.
252 | 
253 |           eedi3_rpow2(rfactor=4)
254 | 
255 | 
256 |    b.) enlarge image by 2x, correct for center shift using spline36resize.
257 | 
258 |           eedi3_rpow2(rfactor=2,cshift="spline36resize")
259 | 
260 | 
261 |    c.) enlarge image by 8x, correct for center shift and downsample from
262 |        8x to 7x using lanczosresize with 5 taps.
263 | 
264 |           eedi3_rpow2(rfactor=8,cshift="lanczosresize",fwidth=width*7,fheight=height*7,ep0=5)
265 | 
266 | 
267 | 
268 | CHANGE LIST:
269 |     v0.9.2.3 - March 23, 2018
270 |        - Y8, Y16 and Y24 colorspaces as input: go live.
271 | 
272 |     v0.9.2.2 - March 23, 2018
273 |        - Moved to Visual Studio 2017
274 |        - Removed external asm files (integrated/replaced with intrinsics)
275 |        - Update Avisynth headers
276 |        - fix YUY2 x64 artifacts
277 |        - https://github.com/pinterf/EEDI3 
278 | 
279 |     v0.9.2.1 - July 29th, 2015
280 |        - Switched to AVS 2.6 API
281 |        - Added x64 support
282 |        - Updated some variable types for clarity
283 |        - Compiled with Visual Studio 2013
284 |        - https://github.com/Elegant996/EEDI3
285 | 
286 |     v0.9.2 - November 11, 2013
287 |        - Added SSE2 optimizations for full-pel interpolation
288 |        - Added mclip masking
289 |        - Compiled with Visual Studio 2012
290 | 
291 |     v0.9.1 - July 23, 2010
292 | 
293 |        - fix field=0/1 flipped with rgb24 input
294 |        - fix possible reading off the edge of a frame with cost3=true
295 | 
296 | 


--------------------------------------------------------------------------------
/EEDI3/Eedi3Sse.cpp:
--------------------------------------------------------------------------------
  1 | /*****************************************************************************
  2 | 
  3 |         Eedi3Sse.cpp
  4 | 
  5 | Optimizations for SSE/SSE2:
  6 | 
  7 | Process 8 lines at once.
  8 | First make a buffer of a capacity of 8*N lines, where N is the number of source
  9 | lines actually needed for processing a single output line (here: 4).
 10 | The PlanarFrame thing could be bypassed here when vcheck is 0.
 11 | Fill the buffer in the following way:
 12 |   For each source line offset o (-3, -1, +1, +3)
 13 |     For each column x (in 0..width-1, with a margin if necessary)
 14 |       For each line k in (0..7)
 15 |         Put the pixel src (x, y + o + k) in location (x * 8 + k, o)
 16 |         We could do the hpel interpolation here too.
 17 | Then do the exact C++ processing, but use vectors instead of scalars,
 18 | and multiply each horizontal coordinate by 8.
 19 | For AA purpose, we could interpolate columns with no additional transpose
 20 | cost (but what about the vcheck?)
 21 | 
 22 | With 8-bit input, the connection cost could be computed in 16-bit integer.
 23 | We could use multipliers like 32 or even 16 to get alpha and beta as integer
 24 | (they probably don't need a high accuracy).
 25 | 
 26 | Question: The C++ code uses double precision in the path cost section.
 27 | Is it really needed? The results are stored as float anyway...
 28 | We could go just with float and use single precision SEE registers.
 29 | Float overflow shouldn't be a concern here.
 30 | The visible problem is that the path cost loop acts like an integrator
 31 | of positive values, so the loss of precision could be an issue.
 32 | Test to conduct: check what is the maximum pcost value for a 8K picture
 33 | full of very contrasted hard edges and important background noise.
 34 | 
 35 | The inner loop in the path cost calculation could benefit from unrolling
 36 | (index clipped by u-1 and u+1).
 37 | 
 38 | The backtrack can be done only in C++ because of the indexing.
 39 | 
 40 | The "block" part of the algorithm would benefit from AVX2 (everything done
 41 | in one pass).
 42 | 
 43 | Other notes:
 44 | 
 45 | With the C++ version, 16-bit data interpolation could be done almost for free,
 46 | just templatize the function to use uint8_t/short as input and output
 47 | data type and saturate the result to the correct values.
 48 | 
 49 | TO DO:
 50 | 
 51 | - Understand and modify the vcheck code in order to remove the intermediate
 52 |   copies if possible.
 53 | - 16-bit input/output
 54 | - hpel interpolation
 55 | - Check the amount of horizontal margins really required. Why 12?
 56 |   It seems that the margin doesn't need to be greater than nrad (max 3).
 57 | - Optimize the core code of prepare/copy_result_lines*() with SSE2 code
 58 |   (transpose matrix)
 59 | - Treat the (expanded) mask differently, making it work like if masked areas
 60 |   were picture boundaries (precompute umax for every x). This should save
 61 |   more CPU.
 62 | 
 63 | 
 64 | Copyright (C) 2010 Kevin Stone - some part by Laurent de Soras, 2013
 65 | 
 66 | This program is free software; you can redistribute it and/or modify
 67 | it under the terms of the GNU General Public License as published by
 68 | the Free Software Foundation; either version 2 of the License, or
 69 | (at your option) any later version.
 70 | 
 71 | This program is distributed in the hope that it will be useful,
 72 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 73 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 74 | GNU General Public License for more details.
 75 | 
 76 | You should have received a copy of the GNU General Public License
 77 | along with this program; if not, write to the Free Software
 78 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 79 | 
 80 | *Tab=3***********************************************************************/
 81 | 
 82 | 
 83 | 
 84 | #if defined (_MSC_VER)
 85 | #pragma warning (1 : 4130 4223 4705 4706)
 86 | #pragma warning (4 : 4355 4786 4800)
 87 | #endif
 88 | 
 89 | 
 90 | 
 91 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 92 | 
 93 | #include	"Eedi3Sse.h"
 94 | 
 95 | #include	<algorithm>
 96 | 
 97 | #include	<cassert>
 98 | #include	<cfloat>
 99 | #include	<cstring>
100 | 
101 | 
102 | 
103 | static __forceinline __m128i	difabs16(__m128i a, __m128i b)
104 | {
105 |   return (_mm_or_si128(_mm_subs_epu16(a, b), _mm_subs_epu16(b, a)));
106 | }
107 | 
108 | 
109 | 
110 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
111 | 
112 | 
113 | 
114 | /*
115 | ==============================================================================
116 | Name: prepare_lines_8bits
117 | Description:
118 |   Reorders data before processing a set of lines.
119 |   At most COL_H (8) lines will be processed per call.
120 | Input parameters:
121 |   - dst_pitch: In columns. One column = 8 pixels from 8 interpolated lines
122 |     on the same abscissa.
123 |   - src_ptr: Points on the top first *existing* reference line.
124 |   - src_pitch: In bytes. Strides from one reference line to the next one.
125 |   - bpp: Bytes per pixel (source). 1 for planar, or the horizontal step for
126 |     interleaved formats.
127 |   - width: Source width, in pixels
128 |   - height: Number of existing reference lines
129 |   - src_y: third reference line of the group of 4*8 to prepare.
130 |     The interpolated line is located between the second and the third.
131 | Output parameters:
132 |   - dst_ptr: contains 4 reference lines (existing or mirrored) made of
133 |     width + MARGIN_H * 2 columns. Points on the left margin.
134 | Throws: Nothing
135 | ==============================================================================
136 | */
137 | 
138 | void	Eedi3Sse::prepare_lines_8bits(uint16_t *dst_ptr, int dst_pitch, const uint8_t *src_ptr, int src_pitch, int bpp, int width, int height, int src_y)
139 | {
140 |   assert(dst_ptr != 0);
141 |   assert(dst_pitch > 0);
142 |   assert(src_ptr != 0);
143 |   assert(src_pitch > 0);
144 |   assert(bpp > 0);
145 |   assert(width > 0);
146 |   assert(width * bpp <= src_pitch);
147 |   assert(width + MARGIN_H * 2 <= dst_pitch);
148 |   assert(height > 0);
149 |   assert(src_y >= 0);
150 |   assert(src_y <= height);
151 | 
152 |   for (int y = src_y - 2; y < src_y + 2; ++y)
153 |   {
154 |     uint16_t *     dst2_ptr = dst_ptr;
155 | 
156 |     for (int y2 = y; y2 < y + COL_H; ++y2)
157 |     {
158 |       int            real_y = (y2 < 0) ? -1 - y2 : y2;
159 |       if (real_y >= height)
160 |       {
161 |         real_y = height * 2 - 1 - real_y;
162 |       }
163 |       real_y = std::max(real_y, 0);
164 | 
165 |       const uint8_t* line_ptr = src_ptr + real_y * src_pitch;
166 | 
167 |       for (int x = 0; x < MARGIN_H; ++x)
168 |       {
169 |         const int      src_x = std::min(MARGIN_H - 1 - x, width - 1);
170 |         const uint16_t pix = line_ptr[src_x * bpp] << 8;
171 |         dst2_ptr[x * COL_H] = pix;
172 |       }
173 |       for (int x = 0; x < width; ++x)
174 |       {
175 |         const uint16_t pix = line_ptr[x * bpp] << 8;
176 |         dst2_ptr[(MARGIN_H + x) * COL_H] = pix;
177 |       }
178 |       for (int x = 0; x < MARGIN_H; ++x)
179 |       {
180 |         const int      src_x = std::max(width - 1 - x, 0);
181 |         const uint16_t pix = line_ptr[src_x * bpp] << 8;
182 |         dst2_ptr[(MARGIN_H + width + x) * COL_H] = pix;
183 |       }
184 | 
185 |       ++dst2_ptr;
186 |     }
187 | 
188 |     dst_ptr += dst_pitch * COL_H;
189 |   }
190 | }
191 | 
192 | 
193 | 
194 | void	Eedi3Sse::prepare_mask_8bits(uint8_t *dst_ptr, const uint8_t *src_ptr, int src_pitch, int bpp, int width, int height, int src_y)
195 | {
196 |   assert(dst_ptr != 0);
197 |   assert(src_ptr != 0);
198 |   assert(src_pitch > 0);
199 |   assert(bpp > 0);
200 |   assert(width > 0);
201 |   assert(width * bpp <= src_pitch);
202 |   assert(height > 0);
203 |   assert(src_y >= 0);
204 |   assert(src_y <= height);
205 | 
206 |   for (int y = src_y; y < src_y + COL_H; ++y)
207 |   {
208 |     int            real_y = y;
209 |     if (real_y >= height)
210 |     {
211 |       real_y = height * 2 - 1 - real_y;
212 |     }
213 |     real_y = std::max(real_y, 0);
214 | 
215 |     const uint8_t* line_ptr = src_ptr + real_y * src_pitch;
216 | 
217 |     for (int x = 0; x < width; ++x)
218 |     {
219 |       dst_ptr[x * COL_H] = line_ptr[x * bpp];
220 |     }
221 | 
222 |     ++dst_ptr;
223 |   }
224 | }
225 | 
226 | 
227 | 
228 | /*
229 | ==============================================================================
230 | Name: copy_result_lines_8bits
231 | Description:
232 |   Unpack the result lines to the destination frame.
233 |   At most COL_H (8) lines are processed per call.
234 |   16-bit data is rounded to 8 bits for implementation simplicity.
235 | Input parameters:
236 |   - dst_pitch: In bytes. Strides from one line of the destination frame to
237 |     the next one.
238 |   - src_ptr: Pointer on the interpolation result data.
239 |   - src_pitch: In columns. One column = 8 pixels from 8 interpolated lines
240 |     on the same abscissa.
241 |   - bpp: Bytes per pixel (destination). 1 for planar, or the horizontal step
242 |     for interleaved formats.
243 |   - width: Destination frame width, in pixels
244 |   - height: Destination frame height, in pixels
245 |   - dst_y: Position of the first line to unpack.
246 | Output parameters:
247 |   - dst_ptr: Pointer on the top left of the destination frame (no margin).
248 | Throws: Nothing
249 | ==============================================================================
250 | */
251 | 
252 | void	Eedi3Sse::copy_result_lines_8bits(uint8_t *dst_ptr, int dst_pitch, const uint16_t *src_ptr, int src_pitch, int bpp, int width, int height, int dst_y)
253 | {
254 |   assert(dst_ptr != 0);
255 |   assert(dst_pitch > 0);
256 |   assert(src_ptr != 0);
257 |   assert(src_pitch > 0);
258 |   assert(bpp > 0);
259 |   assert(width > 0);
260 |   assert(width * bpp <= dst_pitch);
261 |   assert(width <= src_pitch);
262 |   assert(height > 0);
263 |   assert(dst_y >= 0);
264 |   assert(dst_y < height);
265 | 
266 |   const int      y_end = std::min(dst_y + COL_H, height);
267 |   for (int y2 = dst_y; y2 < y_end; ++y2)
268 |   {
269 |     uint8_t *      line_ptr = dst_ptr + y2 * dst_pitch;
270 | 
271 |     for (int x = 0; x < width; ++x)
272 |     {
273 |       const uint16_t pix = src_ptr[x * COL_H];
274 |       line_ptr[x * bpp] = std::min(std::max((pix + 0x80) >> 8, 0), 255);
275 |     }
276 | 
277 |     ++src_ptr;
278 |   }
279 | }
280 | 
281 | 
282 | 
283 | void	Eedi3Sse::copy_result_dmap(int16_t *dst_ptr, int dst_pitch, const int16_t *src_ptr, int src_pitch, int width, int height, int dst_y)
284 | {
285 |   assert(dst_ptr != 0);
286 |   assert(dst_pitch > 0);
287 |   assert(src_ptr != 0);
288 |   assert(src_pitch > 0);
289 |   assert(width > 0);
290 |   assert(width <= dst_pitch);
291 |   assert(width <= src_pitch);
292 |   assert(height > 0);
293 |   assert(dst_y >= 0);
294 |   assert(dst_y < height);
295 | 
296 |   const int      y_end = std::min(dst_y + COL_H, height);
297 |   for (int y2 = dst_y; y2 < y_end; ++y2)
298 |   {
299 |     int16_t *      line_ptr = dst_ptr + y2 * dst_pitch;
300 | 
301 |     for (int x = 0; x < width; ++x)
302 |     {
303 |       line_ptr[x] = src_ptr[x * COL_H];
304 |     }
305 | 
306 |     ++src_ptr;
307 |   }
308 | }
309 | 
310 | 
311 | 
312 | /*
313 | ==============================================================================
314 | Name: interp_lines_full_pel
315 | Description:
316 |   Interpolates COL_H lines at once, full-pixel precision.
317 |   Data must be packed with prepare_lines*() and the result unpacked with
318 |   copy_result_lines*().
319 | Input parameters:
320 |   - src_ptr: Pointer on an array of 4 source lines with their margins
321 |     (MARGIN_H pixels on both sides)
322 |     Each line actually contains 8 packed lines, so each __m128i vector is
323 |     made of 8 16-bit unsigned pixels from 8 different lines, on the same
324 |     abscissa.
325 |     It must point to the first left pixel of the left margin.
326 |     The interpolated line will be located between the second and the third
327 |     source lines.
328 |   - msk_ptr: A pointer on mask data, or 0 if all pixels must be processed.
329 |     Mask data contains 8 lines of boolean bytes (0 or != 0), packed by 8
330 |     pixels to follow the output format.
331 |     The mask hasn't any margin.
332 |   - width: Number of pixels (or __m128 units) to process.
333 |   - pitch: Pitch of the source in pixels (or __m128 units). Should obviously
334 |     take the left and right margins into account.
335 |   - alpha: See user documentation.
336 |   - beta: See user documentation.
337 |   - gamma: See user documentation.
338 |   - nrad: See user documentation.
339 |   - mdis: See user documentation.
340 |   - ucubic: See user documentation.
341 |   - cost3: See user documentation.
342 | Output parameters:
343 |   - dst_ptr: Pointer on a buffer receiving the interpolated line.
344 |     8 packed unsigned 16-bit pixels per vector.
345 |     Only unmasked pixels are valid.
346 |   - dmap_ptr: A buffer of signed 16-bit int corresponding to the interpolated
347 |     pixels, in packed form again.
348 |     The values give the horizontal slope of the interpolation direction for
349 |     each pixel and are later used in the vcheck pass.
350 |     Only unmasked pixels are valid.
351 | Input/output parameters:
352 |   - tmp_ptr: A temporary buffer. Its size in bytes is at least:
353 |     ((4 * (mdis * 2 + 1) + 1) * VECTSIZE * 4 + 1) * width
354 | Throws: Nothing
355 | ==============================================================================
356 | */
357 | 
358 | void	Eedi3Sse::interp_lines_full_pel(const __m128i *src_ptr, __m128i *dst_ptr, const uint8_t *msk_ptr, uint8_t *tmp_ptr, __m128i *dmap_ptr, int width, int pitch, float alpha, float beta, float gamma, int nrad, int mdis, bool ucubic, bool cost3)
359 | {
360 |   assert(src_ptr != 0);
361 |   assert(dst_ptr != 0);
362 |   assert(tmp_ptr != 0);
363 |   assert(dmap_ptr != 0);
364 |   assert(width > 0);
365 |   assert(pitch > 0);
366 |   assert(alpha >= 0);
367 |   assert(beta >= 0);
368 |   assert(alpha + beta <= 1.0f);
369 |   assert(gamma >= 0);
370 |   assert(nrad >= 0);
371 |   assert(nrad <= 3);
372 |   assert(mdis > 0);
373 | 
374 |   // First, shifts everything so we point on actual data.
375 |   src_ptr += MARGIN_H;
376 | 
377 |   const __m128i* src3p_ptr = src_ptr;
378 |   const __m128i* src1p_ptr = src_ptr + 1 * pitch;
379 |   const __m128i* src1n_ptr = src_ptr + 2 * pitch;
380 |   const __m128i* src3n_ptr = src_ptr + 3 * pitch;
381 | 
382 |   /*** Debugging code *********************************************************/
383 | #if 0 // EDI bypass (simple cubic interpolation)
384 |   const __m128i  zero = _mm_setzero_si128();
385 |   const __m128i	nine16 = _mm_set1_epi16(9);
386 |   const __m128i	sign16 = _mm_set1_epi16(-0x8000);
387 |   const __m128i	cubic_cst = _mm_set1_epi32(-0x8000 * 8 + 4); // Rounding and sign change
388 |   for (int x = 0; x < width; ++x)
389 |   {
390 |     __m128i s1p = _mm_load_si128(src1p_ptr + x);
391 |     __m128i s1n = _mm_load_si128(src1n_ptr + x);
392 |     __m128i s3p = _mm_load_si128(src3p_ptr + x);
393 |     __m128i s3n = _mm_load_si128(src3n_ptr + x);
394 |     _mm_store_si128(dst_ptr + x, interp_cubic8(
395 |       s1p, s1n, s3p, s3n, nine16, sign16, cubic_cst, zero
396 |     ));
397 |     _mm_store_si128(dmap_ptr + x, zero);
398 |   }
399 | #else
400 | /****************************************************************************/
401 | 
402 |   const int      tpitch = mdis * 2 + 1;
403 |   int            tmpofs = 0;
404 | 
405 | #define Eedi3Sse_DECL( T, N, S) \
406 | 	T  *          N = reinterpret_cast <T *> (tmp_ptr + tmpofs); \
407 | 	tmpofs += S * sizeof (T);
408 | 
409 |   // ccosts is grouped in 2 separate chunks of 4 packed lines,
410 |   // pcosts, pbackt and fpath are chunks of 4 packed lines,
411 |   // bmask contains a single boolean for each 8-line column
412 |   Eedi3Sse_DECL(float, ccosts, 2 * width * tpitch * VECTSIZE); // Array of mdis*2+1 costs for each pixel of the line
413 |   Eedi3Sse_DECL(float, pcosts, width * tpitch * VECTSIZE);
414 |   Eedi3Sse_DECL(int32_t, pbackt, width * tpitch * VECTSIZE);
415 |   Eedi3Sse_DECL(int32_t, fpath, width          * VECTSIZE);
416 |   Eedi3Sse_DECL(bool, bmask, width);
417 | 
418 | #undef Eedi3Sse_DECL
419 | 
420 |   if (msk_ptr != 0)
421 |   {
422 |     memset(ccosts, 0, 2 * tpitch * width * VECTSIZE * sizeof(float));
423 |     expand_mask(bmask, msk_ptr, width, mdis);
424 |   }
425 | 
426 |   const __m128i  zero = _mm_setzero_si128();
427 |   const __m128   alpha_4 = _mm_set1_ps((cost3) ? alpha / 3.f : alpha);
428 |   const __m128   ab_4 = _mm_set1_ps(1.0f - alpha - beta);
429 | 
430 |   const int      tpitch_v = tpitch * VECTSIZE;
431 |   const int      ofs_p4 = width * tpitch_v;
432 | 
433 |   // -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
434 |   // Calculate all connection costs
435 | 
436 |   // beta is calibrated for 8-bit content. We have to scale it because it is
437 |   // not multiplied by a pixel value in the formula, contrary to the global
438 |   // result which depends on the data scale.
439 |   const float    beta16 = beta * 256;
440 | 
441 |   if (!cost3)
442 |   {
443 |     for (int x = 0; x < width; ++x)
444 |     {
445 |       if (msk_ptr == 0 || bmask[x] != 0)
446 |       {
447 |         const int      umax = std::min(std::min(x, width - 1 - x), mdis);
448 |         for (int u = -umax; u <= umax; ++u)
449 |         {
450 |           __m128i        s_0;
451 |           __m128i			s_1;
452 |           sum_nrad(
453 |             s_0, s_1, zero, nrad, x - u, x + u,
454 |             src3p_ptr, src1p_ptr, src1n_ptr, src3n_ptr
455 |           );
456 | 
457 |           // should use cubic if ucubic=true
458 |           const __m128i  x1pr = _mm_load_si128(src1p_ptr + x + u);
459 |           const __m128i  x1nl = _mm_load_si128(src1n_ptr + x - u);
460 |           const __m128i  ip = _mm_avg_epu16(x1pr, x1nl);
461 | 
462 |           const __m128i  x1p = _mm_load_si128(src1p_ptr + x);
463 |           const __m128i  x1n = _mm_load_si128(src1n_ptr + x);
464 |           const __m128i  vdp = difabs16(x1p, ip);
465 |           const __m128i  vdn = difabs16(x1n, ip);
466 |           const __m128i  v0 = _mm_add_epi32(
467 |             _mm_unpacklo_epi16(vdp, zero),
468 |             _mm_unpacklo_epi16(vdn, zero)
469 |           );
470 |           const __m128i  v1 = _mm_add_epi32(
471 |             _mm_unpackhi_epi16(vdp, zero),
472 |             _mm_unpackhi_epi16(vdn, zero)
473 |           );
474 | 
475 |           const __m128   cc_a0 = _mm_mul_ps(_mm_cvtepi32_ps(s_0), alpha_4);
476 |           const __m128   cc_a1 = _mm_mul_ps(_mm_cvtepi32_ps(s_1), alpha_4);
477 | 
478 |           const __m128   cc_b = _mm_set1_ps(beta16 * std::abs(u));
479 | 
480 |           const __m128   cc_c0 = _mm_mul_ps(_mm_cvtepi32_ps(v0), ab_4);
481 |           const __m128   cc_c1 = _mm_mul_ps(_mm_cvtepi32_ps(v1), ab_4);
482 | 
483 |           const __m128   cc0 = _mm_add_ps(_mm_add_ps(cc_a0, cc_b), cc_c0);
484 |           const __m128   cc1 = _mm_add_ps(_mm_add_ps(cc_a1, cc_b), cc_c1);
485 |           const int      dpos = (x * tpitch + mdis + u) * VECTSIZE;
486 |           _mm_store_ps(ccosts + dpos, cc0);
487 |           _mm_store_ps(ccosts + dpos + ofs_p4, cc1);
488 |         }
489 |       }
490 |     }
491 |   }
492 |   else	// cost3
493 |   {
494 |     for (int x = 0; x < width; ++x)
495 |     {
496 |       if (msk_ptr == 0 || bmask[x] != 0)
497 |       {
498 |         const int      umax = std::min(std::min(x, width - 1 - x), mdis);
499 |         for (int u = -umax; u <= umax; ++u)
500 |         {
501 |           const bool     s1_flag = ((u >= 0 && x >= u * 2)
502 |             || (u <= 0 && x < width + u * 2));
503 |           const bool     s2_flag = ((u <= 0 && x >= u * -2)
504 |             || (u >= 0 && x < width + u * 2));
505 | 
506 |           __m128i        s0_0;
507 |           __m128i        s0_1;
508 |           sum_nrad(
509 |             s0_0, s0_1, zero, nrad, x - u, x + u,
510 |             src3p_ptr, src1p_ptr, src1n_ptr, src3n_ptr
511 |           );
512 | 
513 |           __m128i        s1_0;
514 |           __m128i        s1_1;
515 |           if (s1_flag)
516 |           {
517 |             sum_nrad(
518 |               s1_0, s1_1, zero, nrad, x - 2 * u, x,
519 |               src3p_ptr, src1p_ptr, src1n_ptr, src3n_ptr
520 |             );
521 |           }
522 | 
523 |           __m128i        s2_0;
524 |           __m128i        s2_1;
525 |           if (s2_flag)
526 |           {
527 |             sum_nrad(
528 |               s2_0, s2_1, zero, nrad, x, x + 2 * u,
529 |               src3p_ptr, src1p_ptr, src1n_ptr, src3n_ptr
530 |             );
531 |           }
532 | 
533 |           s1_0 = (s1_flag) ? s1_0 : ((s2_flag) ? s2_0 : s0_0);
534 |           s1_1 = (s1_flag) ? s1_1 : ((s2_flag) ? s2_1 : s0_1);
535 |           s2_0 = (s2_flag) ? s2_0 : ((s1_flag) ? s1_0 : s0_0);
536 |           s2_1 = (s2_flag) ? s2_1 : ((s1_flag) ? s1_1 : s0_1);
537 | 
538 |           const __m128i  s_0 = _mm_add_epi32(_mm_add_epi32(s0_0, s1_0), s2_0);
539 |           const __m128i  s_1 = _mm_add_epi32(_mm_add_epi32(s0_1, s1_1), s2_1);
540 | 
541 |           // should use cubic if ucubic=true
542 |           const __m128i  x1pr = _mm_load_si128(src1p_ptr + x + u);
543 |           const __m128i  x1nl = _mm_load_si128(src1n_ptr + x - u);
544 |           const __m128i  ip = _mm_avg_epu16(x1pr, x1nl);
545 | 
546 |           const __m128i  x1p = _mm_load_si128(src1p_ptr + x);
547 |           const __m128i  x1n = _mm_load_si128(src1n_ptr + x);
548 |           const __m128i  vdp = difabs16(x1p, ip);
549 |           const __m128i  vdn = difabs16(x1n, ip);
550 |           const __m128i  v0 = _mm_add_epi32(
551 |             _mm_unpacklo_epi16(vdp, zero),
552 |             _mm_unpacklo_epi16(vdn, zero)
553 |           );
554 |           const __m128i  v1 = _mm_add_epi32(
555 |             _mm_unpackhi_epi16(vdp, zero),
556 |             _mm_unpackhi_epi16(vdn, zero)
557 |           );
558 | 
559 |           const __m128   cc_a0 = _mm_mul_ps(_mm_cvtepi32_ps(s_0), alpha_4);
560 |           const __m128   cc_a1 = _mm_mul_ps(_mm_cvtepi32_ps(s_1), alpha_4);
561 | 
562 |           const __m128   cc_b = _mm_set1_ps(beta16 * std::abs(u));
563 | 
564 |           const __m128   cc_c0 = _mm_mul_ps(_mm_cvtepi32_ps(v0), ab_4);
565 |           const __m128   cc_c1 = _mm_mul_ps(_mm_cvtepi32_ps(v1), ab_4);
566 | 
567 |           const __m128   cc0 = _mm_add_ps(_mm_add_ps(cc_a0, cc_b), cc_c0);
568 |           const __m128   cc1 = _mm_add_ps(_mm_add_ps(cc_a1, cc_b), cc_c1);
569 |           const int      dpos = (x * tpitch + mdis + u) * VECTSIZE;
570 |           _mm_store_ps(ccosts + dpos, cc0);
571 |           _mm_store_ps(ccosts + dpos + ofs_p4, cc1);
572 |         }
573 |       }
574 |     }
575 |   }
576 | 
577 |   const __m128   fltmax = _mm_set1_ps(FLT_MAX);
578 |   const __m128   fltmax9 = _mm_set1_ps(FLT_MAX * 0.9f);
579 |   const __m128i	nine16 = _mm_set1_epi16(9);
580 |   const __m128i	sign16 = _mm_set1_epi16(-0x8000);
581 |   const __m128i	cubic_cst = _mm_set1_epi32(-0x8000 * 8 + 4); // Rounding and sign change
582 | 
583 |   // Same reason as beta16
584 |   const float    gamma16 = gamma * 256;
585 | 
586 |   // The following operations are done in 2 passes (the "blocks"), because
587 |   // we can process only VECTSIZE pixels at once (FP32 data).
588 |   // Note: ccosts pointer is shifted at the end of the block
589 |   for (int block = 0; block < 2; ++block)
590 |   {
591 |     // -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
592 |     // Calculate path costs
593 | 
594 |     for (int k = 0; k < VECTSIZE; ++k)
595 |     {
596 |       const int      p = mdis * VECTSIZE + k;
597 |       pcosts[p] = ccosts[p];
598 |     }
599 | 
600 |     for (int x = 1; x < width; ++x)
601 |     {
602 |       float *        tT = ccosts + x * tpitch_v;
603 |       float *        ppT = pcosts + (x - 1) * tpitch_v;
604 |       float *        pT = pcosts + x * tpitch_v;
605 |       __m128i *      piT = reinterpret_cast <__m128i *> (pbackt + (x - 1) * tpitch_v);
606 | 
607 |       if (msk_ptr != 0 && bmask[x] == 0)
608 |       {
609 |         if (x == 1)
610 |         {
611 |           const int      umax = std::min(std::min(x, width - 1 - x), mdis);
612 |           const int      p = (mdis - umax) * VECTSIZE;
613 |           memcpy(pT + p, tT + p, (umax * 2 + 1) * VECTSIZE * sizeof(*pT));
614 |           memset(piT, 0, tpitch * sizeof(*piT));
615 |         }
616 |         else
617 |         {
618 |           memcpy(pT, ppT, tpitch_v * sizeof(*pT));
619 |           memcpy(piT, piT - tpitch, tpitch * sizeof(*piT));
620 |           const int      pumax = std::min(x - 1, width - x);
621 |           if (pumax < mdis)
622 |           {
623 |             const __m128i  a = _mm_set1_epi32(1 - pumax);
624 |             const __m128i  b = _mm_set1_epi32(pumax - 1);
625 |             _mm_store_si128(piT + mdis - pumax, a);
626 |             _mm_store_si128(piT + mdis + pumax, b);
627 |           }
628 |         }
629 |       }
630 | 
631 |       else
632 |       {
633 |         const int      umax = std::min(std::min(x, width - 1 - x), mdis);
634 |         for (int u = -umax; u <= umax; ++u)
635 |         {
636 |           __m128i        idx = _mm_setzero_si128(); // 32-bit signed int
637 |           __m128         bval = fltmax;
638 |           const int      umax2 = std::min(std::min(x - 1, width - x), mdis);
639 |           const int      vmax = std::min(umax2, u + 1);
640 |           for (int v = std::max(-umax2, u - 1); v <= vmax; ++v)
641 |           {
642 |             __m128         y = _mm_load_ps(ppT + (mdis + v) * VECTSIZE);
643 |             const __m128   a = _mm_set1_ps(gamma16 * std::abs(u - v));
644 |             y = _mm_add_ps(y, a);
645 |             const __m128   ccost = _mm_min_ps(y, fltmax9);
646 |             const __m128i  v4 = _mm_set1_epi32(v);
647 |             const __m128i  tst =  // if (ccost < bval)
648 |               _mm_castps_si128(_mm_cmplt_ps(ccost, bval));
649 |             idx = select(tst, v4, idx);
650 |             bval = _mm_min_ps(ccost, bval);
651 |           }
652 |           const int      mu = (mdis + u) * VECTSIZE;
653 |           __m128         y = _mm_add_ps(bval, _mm_load_ps(tT + mu));
654 |           y = _mm_min_ps(y, fltmax9);
655 |           _mm_store_ps(pT + mu, y);
656 |           _mm_store_si128(piT + mdis + u, idx);
657 |         }
658 |       }
659 |     }
660 | 
661 |     // -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
662 |     // Backtrack
663 | 
664 |     _mm_store_si128(reinterpret_cast <__m128i *> (fpath) + width - 1, zero);
665 |     for (int x = width - 2; x >= 0; --x)
666 |     {
667 |       const int      idx_n = (x + 1)             * VECTSIZE;
668 |       const int      idx_c = x * VECTSIZE;
669 |       const int      idx_p = (x * tpitch + mdis) * VECTSIZE;
670 |       for (int k = 0; k < VECTSIZE; ++k)
671 |       {
672 |         const int      n = fpath[idx_n + k];
673 |         fpath[idx_c + k] = pbackt[idx_p + n * VECTSIZE + k];
674 |       }
675 |     }
676 | 
677 |     // -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
678 |     // Interpolate
679 | 
680 |     const int      bv = block * VECTSIZE;
681 |     for (int x = 0; x < width; ++x)
682 |     {
683 |       if (msk_ptr != 0 && bmask[x] == 0)
684 |       {
685 |         // Does both blocks at once.
686 |         if (block == 0)
687 |         {
688 |           _mm_store_si128(dmap_ptr + x, zero);
689 | 
690 |           __m128i        res;
691 |           if (ucubic)
692 |           {
693 |             res = interp_cubic8(
694 |               _mm_load_si128(src1p_ptr + x),
695 |               _mm_load_si128(src1n_ptr + x),
696 |               _mm_load_si128(src3p_ptr + x),
697 |               _mm_load_si128(src3n_ptr + x),
698 |               nine16, sign16, cubic_cst, zero
699 |             );
700 |           }
701 |           else
702 |           {
703 |             res = _mm_avg_epu16(
704 |               _mm_load_si128(src1p_ptr + x),
705 |               _mm_load_si128(src1n_ptr + x)
706 |             );
707 |           }
708 |           _mm_store_si128(dst_ptr + x, res);
709 |         }
710 |       }
711 | 
712 |       else
713 |       {
714 |         uint16_t * const       dst16_ptr =
715 |           reinterpret_cast <uint16_t *> (dst_ptr + x) + bv;
716 | 
717 |         assert(sizeof(*dmap_ptr) == sizeof(int16_t) * VECTSIZE * 2);
718 |         assert(sizeof(*fpath) == sizeof(int32_t));
719 |         __m128i        dir4 =
720 |           _mm_load_si128(reinterpret_cast <const __m128i *> (fpath) + x);
721 |         dir4 = _mm_packs_epi32(dir4, zero);   // Contains 16-bit data
722 |         _mm_storel_epi64(reinterpret_cast <__m128i *> (
723 |           reinterpret_cast <int64_t *> (dmap_ptr + x) + block
724 |           ), dir4);
725 | 
726 |         /*** To do: use interp_cubic4() and check if it's faster ***/
727 | 
728 |         for (int k = 0; k < VECTSIZE; ++k)
729 |         {
730 |           // Cast to int16_t because _mm_extract_epi16 extends with 0s
731 |           // and we need the sign.
732 |           const int      dir = int16_t(_mm_extract_epi16(dir4, 0));
733 | 
734 |           const uint16_t * const src1p16_ptr =
735 |             reinterpret_cast <const uint16_t *> (src1p_ptr + x + dir) + bv;
736 |           const uint16_t * const src1n16_ptr =
737 |             reinterpret_cast <const uint16_t *> (src1n_ptr + x - dir) + bv;
738 |           const int      sum_1 = src1p16_ptr[k] + src1n16_ptr[k];
739 | 
740 |           const int      ad = std::abs(dir);
741 |           if (ucubic && x >= ad * 3 && x <= width - 1 - ad * 3)
742 |           {
743 |             const uint16_t * const src3p16_ptr =
744 |               reinterpret_cast <const uint16_t *> (src3p_ptr + x + dir * 3) + bv;
745 |             const uint16_t * const src3n16_ptr =
746 |               reinterpret_cast <const uint16_t *> (src3n_ptr + x - dir * 3) + bv;
747 |             const int      sum_3 = src3p16_ptr[k] + src3n16_ptr[k];
748 | 
749 |             const int      interp = (9 * sum_1 - sum_3 + 8) >> 4;
750 |             dst16_ptr[k] =
751 |               uint16_t(std::min(std::max(interp, 0), 65535));
752 |           }
753 |           else
754 |           {
755 |             dst16_ptr[k] = uint16_t((sum_1 + 1) >> 1);
756 |           }
757 | 
758 |           dir4 = _mm_srli_si128(dir4, 2);
759 |         }
760 |       }
761 |     }  // for x
762 | 
763 |     ccosts += ofs_p4;
764 | 
765 |   }  // for block
766 | /****************************************************************************/
767 | #endif // EDI bypass
768 | /****************************************************************************/
769 | }
770 | 
771 | 
772 | 
773 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
774 | 
775 | 
776 | 
777 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
778 | 
779 | 
780 | 
781 | void	Eedi3Sse::expand_mask(bool dst_ptr[], const uint8_t msk_ptr[], int width, int mdis)
782 | {
783 |   assert(dst_ptr != 0);
784 |   assert(msk_ptr != 0);
785 |   assert(width > 0);
786 |   assert(mdis > 0);
787 | 
788 |   const int64_t *   msk8_ptr = reinterpret_cast <const int64_t *> (msk_ptr);
789 | 
790 |   const int	minmdis = (width < mdis) ? width : mdis;
791 | 
792 |   int			last = -666999;
793 | 
794 |   for (int x = 0; x < minmdis; ++x)
795 |   {
796 |     if (msk8_ptr[x] != 0)
797 |     {
798 |       last = x + mdis;
799 |     }
800 |   }
801 | 
802 |   for (int x = 0; x < width - minmdis; ++x)
803 |   {
804 |     if (msk8_ptr[x + mdis] != 0)
805 |     {
806 |       last = x + mdis * 2;
807 |     }
808 |     dst_ptr[x] = (x <= last);
809 |   }
810 | 
811 |   for (int x = width - minmdis; x < width; ++x)
812 |   {
813 |     dst_ptr[x] = (x <= last);
814 |   }
815 | }
816 | 
817 | 
818 | 
819 | void	Eedi3Sse::sum_nrad(__m128i &s_0, __m128i &s_1, const __m128i &zero, int nrad, int xl, int xr, const __m128i *src3p, const __m128i *src1p, const __m128i *src1n, const __m128i *src3n)
820 | {
821 |   assert(nrad >= 0);
822 |   assert(src3p != 0);
823 |   assert(src1p != 0);
824 |   assert(src1n != 0);
825 |   assert(src3n != 0);
826 | 
827 |   s_0 = zero;
828 |   s_1 = zero;
829 |   for (int k = -nrad; k <= nrad; ++k)
830 |   {
831 |     const int      xrk = xr + k;
832 |     const int      xlk = xl + k;
833 |     const __m128i  x3pr = _mm_load_si128(src3p + xrk);
834 |     const __m128i  x1pl = _mm_load_si128(src1p + xlk);
835 |     const __m128i  x1pr = _mm_load_si128(src1p + xrk);
836 |     const __m128i  x1nl = _mm_load_si128(src1n + xlk);
837 |     const __m128i  x1nr = _mm_load_si128(src1n + xrk);
838 |     const __m128i  x3nl = _mm_load_si128(src3n + xlk);
839 | 
840 |     const __m128i  d3p1p = difabs16(x3pr, x1pl);
841 |     const __m128i  d1p1n = difabs16(x1pr, x1nl);
842 |     const __m128i  d1n3n = difabs16(x1nr, x3nl);
843 | 
844 |     s_0 = _mm_add_epi32(s_0, _mm_unpacklo_epi16(d3p1p, zero));
845 |     s_1 = _mm_add_epi32(s_1, _mm_unpackhi_epi16(d3p1p, zero));
846 |     s_0 = _mm_add_epi32(s_0, _mm_unpacklo_epi16(d1p1n, zero));
847 |     s_1 = _mm_add_epi32(s_1, _mm_unpackhi_epi16(d1p1n, zero));
848 |     s_0 = _mm_add_epi32(s_0, _mm_unpacklo_epi16(d1n3n, zero));
849 |     s_1 = _mm_add_epi32(s_1, _mm_unpackhi_epi16(d1n3n, zero));
850 |   }
851 | }
852 | 
853 | 
854 | 
855 | __m128i	Eedi3Sse::interp_cubic8(const __m128i &src1p, const __m128i &src1n, const __m128i &src3p, const __m128i &src3n, const __m128i &nine16, const __m128i &sign16, const __m128i &cubic_cst, const __m128i &zero)
856 | {
857 |   assert(&src1p != 0);
858 |   assert(&src1n != 0);
859 |   assert(&src3p != 0);
860 |   assert(&src3n != 0);
861 |   assert(&nine16 != 0);
862 |   assert(&sign16 != 0);
863 |   assert(&cubic_cst != 0);
864 |   assert(&zero != 0);
865 | 
866 |   const __m128i  avg1 = _mm_avg_epu16(src1p, src1n);
867 |   const __m128i  avg3 = _mm_avg_epu16(src3p, src3n);
868 |   const __m128i  a3_0 = _mm_unpacklo_epi16(avg3, zero);
869 |   const __m128i  a3_1 = _mm_unpackhi_epi16(avg3, zero);
870 |   const __m128i  hi = _mm_mulhi_epu16(avg1, nine16);
871 |   const __m128i  lo = _mm_mullo_epi16(avg1, nine16);
872 |   __m128i        s0 = _mm_unpacklo_epi16(lo, hi);
873 |   __m128i        s1 = _mm_unpackhi_epi16(lo, hi);
874 |   s0 = _mm_sub_epi32(s0, a3_0);
875 |   s1 = _mm_sub_epi32(s1, a3_1);
876 |   s0 = _mm_add_epi32(s0, cubic_cst);
877 |   s1 = _mm_add_epi32(s1, cubic_cst);
878 |   s0 = _mm_srai_epi32(s0, 3);
879 |   s1 = _mm_srai_epi32(s1, 3);
880 |   __m128i        res = _mm_packs_epi32(s0, s1);
881 |   res = _mm_xor_si128(res, sign16);
882 | 
883 |   return (res);
884 | }
885 | 
886 | 
887 | 
888 | // src13p and src13n are made of:
889 | // - 4 int16 from 1p/1n in the lowest 64 bits, and
890 | // - 4 int16 from 3p/3n in the highest 64 bits.
891 | // Result is packed in the lowest 64 bits, highest 64 bits are garbage.
892 | __m128i	Eedi3Sse::interp_cubic4(const __m128i &src13p, const __m128i &src13n, const __m128i &nine16, const __m128i &sign16, const __m128i &cubic_cst, const __m128i &zero)
893 | {
894 |   assert(&src13p != 0);
895 |   assert(&src13n != 0);
896 |   assert(&nine16 != 0);
897 |   assert(&sign16 != 0);
898 |   assert(&cubic_cst != 0);
899 |   assert(&zero != 0);
900 | 
901 |   const __m128i  avg = _mm_avg_epu16(src13p, src13n);
902 |   const __m128i  a3 = _mm_unpackhi_epi16(avg, zero);
903 |   const __m128i  hi = _mm_mulhi_epu16(avg, nine16);
904 |   const __m128i  lo = _mm_mullo_epi16(avg, nine16);
905 |   __m128i        s = _mm_unpacklo_epi16(lo, hi);
906 |   s = _mm_sub_epi32(s, a3);
907 |   s = _mm_add_epi32(s, cubic_cst);
908 |   s = _mm_srai_epi32(s, 3);
909 |   __m128i        res = _mm_packs_epi32(s, zero);
910 |   res = _mm_xor_si128(res, sign16);
911 | 
912 |   return (res);
913 | }
914 | 
915 | 
916 | 
917 | __m128i	Eedi3Sse::select(const __m128i &cond, const __m128i &v_t, const __m128i &v_f)
918 | {
919 |   const __m128i  cond_1 = _mm_and_si128(cond, v_t);
920 |   const __m128i  cond_0 = _mm_andnot_si128(cond, v_f);
921 |   const __m128i  res = _mm_or_si128(cond_0, cond_1);
922 | 
923 |   return (res);
924 | }
925 | 
926 | 
927 | 
928 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
929 | 


--------------------------------------------------------------------------------
/EEDI3/Eedi3Sse.h:
--------------------------------------------------------------------------------
  1 | /*****************************************************************************
  2 | 
  3 |         Eedi3Sse.h
  4 | 
  5 | Copyright (C) 2010 Kevin Stone - some part by Laurent de Soras, 2013
  6 | 
  7 | This program is free software; you can redistribute it and/or modify
  8 | it under the terms of the GNU General Public License as published by
  9 | the Free Software Foundation; either version 2 of the License, or
 10 | (at your option) any later version.
 11 | 
 12 | This program is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 | GNU General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU General Public License
 18 | along with this program; if not, write to the Free Software
 19 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 20 | 
 21 | *Tab=3***********************************************************************/
 22 | 
 23 | 
 24 | 
 25 | #if ! defined (Eedi3Sse_HEADER_INCLUDED)
 26 | #define	Eedi3Sse_HEADER_INCLUDED
 27 | 
 28 | #if defined (_MSC_VER)
 29 | 	#pragma once
 30 | 	#pragma warning (4 : 4250)
 31 | #endif
 32 | 
 33 | 
 34 | 
 35 | /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 36 | 
 37 | #include <emmintrin.h>
 38 | #include <stdint.h>
 39 | 
 40 | 
 41 | class Eedi3Sse
 42 | {
 43 | 
 44 | /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 45 | 
 46 | public:
 47 | 
 48 | 	enum {         VECTSIZE =  4 };  // Vector size for internal processing (32-bit data)
 49 | 	enum {         COL_H    = VECTSIZE * 2 }; // Number of simultaneously processed lines (vector size at the API level).
 50 | 	enum {         MARGIN_H = 12 };  // Left and right margins for the virtual source frame
 51 | 
 52 | 	virtual        ~Eedi3Sse () {}
 53 | 
 54 | 	static void    prepare_lines_8bits (uint16_t *dst_ptr, int dst_pitch, const uint8_t *src_ptr, int src_pitch, int bpp, int width, int height, int src_y);
 55 | 	static void    prepare_mask_8bits (uint8_t *dst_ptr, const uint8_t *src_ptr, int src_pitch, int bpp, int width, int height, int src_y);
 56 | 	static void    copy_result_lines_8bits (uint8_t *dst_ptr, int dst_pitch, const uint16_t *src_ptr, int src_pitch, int bpp, int width, int height, int dst_y);
 57 | 	static void    copy_result_dmap (int16_t *dst_ptr, int dst_pitch, const int16_t *src_ptr, int src_pitch, int width, int height, int dst_y);
 58 | 	static void    interp_lines_full_pel (const __m128i *src_ptr, __m128i *dst_ptr, const uint8_t *msk_ptr, uint8_t *tmp_ptr, __m128i *dmap_ptr, int width, int pitch, float alpha, float beta, float gamma, int nrad, int mdis, bool ucubic, bool cost3);
 59 | 
 60 | 
 61 | 
 62 | /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 63 | 
 64 | protected:
 65 | 
 66 | 
 67 | 
 68 | /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 69 | 
 70 | private:
 71 | 
 72 | 	static void    expand_mask (bool dst_ptr [], const uint8_t msk_ptr [], int width, int mdis);
 73 | 	static __forceinline void
 74 | 	               sum_nrad (__m128i &s_0, __m128i &s_1, const __m128i &zero, int nrad, int xl, int xr, const __m128i *src3p, const __m128i *src1p, const __m128i *src1n, const __m128i *src3n);
 75 | 	static __forceinline __m128i
 76 | 	               interp_cubic8 (const __m128i &src1p, const __m128i &src1n, const __m128i &src3p, const __m128i &src3n, const __m128i &nine16, const __m128i &sign16, const __m128i &cubic_cst, const __m128i &zero);
 77 | 	static __forceinline __m128i
 78 | 	               interp_cubic4 (const __m128i &src13p, const __m128i &src13n, const __m128i &nine16, const __m128i &sign16, const __m128i &cubic_cst, const __m128i &zero);
 79 | 
 80 | 	static __forceinline __m128i
 81 | 	               select (const __m128i &cond, const __m128i &v_t, const __m128i &v_f);
 82 | 
 83 | 
 84 | 
 85 | /*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 86 | 
 87 | private:
 88 | 
 89 | 	               Eedi3Sse ();
 90 | 	               Eedi3Sse (const Eedi3Sse &other);
 91 | 	Eedi3Sse &     operator = (const Eedi3Sse &other);
 92 | 	bool           operator == (const Eedi3Sse &other) const;
 93 | 	bool           operator != (const Eedi3Sse &other) const;
 94 | 
 95 | };	// class Eedi3Sse
 96 | 
 97 | 
 98 | 
 99 | //#include	"Eedi3Sse.hpp"
100 | 
101 | 
102 | 
103 | #endif	// Eedi3Sse_HEADER_INCLUDED
104 | 
105 | 
106 | 
107 | /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
108 | 


--------------------------------------------------------------------------------
/EEDI3/PlanarFrame.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | **   My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar
  3 | **   to packed conversions, and always gives 16 bit alignment for all
  4 | **   planes.  Supports YV12/YUY2/RGB24 frames from avisynth, can do any planar
  5 | **   format internally.
  6 | **
  7 | **   Copyright (C) 2005-2010 Kevin Stone
  8 | **
  9 | **   This program is free software; you can redistribute it and/or modify
 10 | **   it under the terms of the GNU General Public License as published by
 11 | **   the Free Software Foundation; either version 2 of the License, or
 12 | **   (at your option) any later version.
 13 | **
 14 | **   This program is distributed in the hope that it will be useful,
 15 | **   but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | **   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | **   GNU General Public License for more details.
 18 | **
 19 | **   You should have received a copy of the GNU General Public License
 20 | **   along with this program; if not, write to the Free Software
 21 | **   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 22 | */
 23 | 
 24 | #include "PlanarFrame.h"
 25 | #include <stdint.h>
 26 | #include <emmintrin.h>
 27 | 
 28 | int modnpf(const int m, const int n)
 29 | {
 30 |   if ((m%n) == 0)
 31 |     return m;
 32 |   return m + n - (m%n);
 33 | }
 34 | 
 35 | PlanarFrame::PlanarFrame(int cpuFlags)
 36 | {
 37 |   ypitch = uvpitch = 0;
 38 |   ywidth = uvwidth = 0;
 39 |   yheight = uvheight = 0;
 40 |   y = u = v = NULL;
 41 |   useSIMD = true;
 42 |   cpu = cpuFlags;
 43 | }
 44 | 
 45 | PlanarFrame::PlanarFrame(VideoInfo &viInfo, int cpuFlags)
 46 | {
 47 |   ypitch = uvpitch = 0;
 48 |   ywidth = uvwidth = 0;
 49 |   yheight = uvheight = 0;
 50 |   y = u = v = NULL;
 51 |   useSIMD = true;
 52 |   cpu = cpuFlags;
 53 |   allocSpace(viInfo);
 54 | }
 55 | 
 56 | PlanarFrame::~PlanarFrame()
 57 | {
 58 |   if (y != NULL) { _aligned_free(y); y = NULL; }
 59 |   if (u != NULL) { _aligned_free(u); u = NULL; }
 60 |   if (v != NULL) { _aligned_free(v); v = NULL; }
 61 | }
 62 | 
 63 | bool PlanarFrame::allocSpace(VideoInfo &viInfo)
 64 | {
 65 |   if (y != NULL) { _aligned_free(y); y = NULL; }
 66 |   if (u != NULL) { _aligned_free(u); u = NULL; }
 67 |   if (v != NULL) { _aligned_free(v); v = NULL; }
 68 |   ypitch = uvpitch = 0;
 69 |   ywidth = uvwidth = 0;
 70 |   yheight = uvheight = 0;
 71 | 
 72 |   int height = viInfo.height;
 73 |   int width = viInfo.width;
 74 |   if ((height == 0) || (width == 0)) return false;
 75 |   if (viInfo.IsYV12())
 76 |   {
 77 |     ypitch = modnpf(width + MIN_PAD, MIN_ALIGNMENT);
 78 |     ywidth = width;
 79 |     yheight = height;
 80 |     width >>= 1;
 81 |     height >>= 1;
 82 |     uvpitch = modnpf(width + MIN_PAD, MIN_ALIGNMENT);
 83 |     uvwidth = width;
 84 |     uvheight = height;
 85 |   }
 86 |   else
 87 |   {
 88 |     if (viInfo.IsYUY2() || viInfo.IsYV16())
 89 |     {
 90 |       ypitch = modnpf(width + MIN_PAD, MIN_ALIGNMENT);
 91 |       ywidth = width;
 92 |       yheight = height;
 93 |       width >>= 1;
 94 |       uvpitch = modnpf(width + MIN_PAD, MIN_ALIGNMENT);
 95 |       uvwidth = width;
 96 |       uvheight = height;
 97 |     }
 98 |     else
 99 |     {
100 |       if (viInfo.IsRGB24() || viInfo.IsYV24())
101 |       {
102 |         ypitch = modnpf(width + MIN_PAD, MIN_ALIGNMENT);
103 |         ywidth = width;
104 |         yheight = height;
105 |         uvpitch = ypitch;
106 |         uvwidth = ywidth;
107 |         uvheight = yheight;
108 |       }
109 |       else
110 |       {
111 |         if (viInfo.IsY8())
112 |         {
113 |           ypitch = modnpf(width + MIN_PAD, MIN_ALIGNMENT);
114 |           ywidth = width;
115 |           yheight = height;
116 |         }
117 |       }
118 |     }
119 |   }
120 |   y = (unsigned char*)_aligned_malloc(ypitch*yheight, MIN_ALIGNMENT);
121 |   if (y == NULL) return false;
122 |   if ((uvpitch != 0) && (uvheight != 0))
123 |   {
124 |     u = (unsigned char*)_aligned_malloc(uvpitch*uvheight, MIN_ALIGNMENT);
125 |     if (u == NULL) return false;
126 |     v = (unsigned char*)_aligned_malloc(uvpitch*uvheight, MIN_ALIGNMENT);
127 |     if (v == NULL) return false;
128 |   }
129 |   return true;
130 | }
131 | 
132 | bool PlanarFrame::allocSpace(int specs[4])
133 | {
134 |   if (y != NULL) { _aligned_free(y); y = NULL; }
135 |   if (u != NULL) { _aligned_free(u); u = NULL; }
136 |   if (v != NULL) { _aligned_free(v); v = NULL; }
137 |   ypitch = uvpitch = 0;
138 |   ywidth = uvwidth = 0;
139 |   yheight = uvheight = 0;
140 | 
141 |   int height = specs[0];
142 |   int width = specs[2];
143 |   if ((height == 0) || (width == 0)) return false;
144 |   ypitch = modnpf(width + MIN_PAD, MIN_ALIGNMENT);
145 |   ywidth = width;
146 |   yheight = height;
147 |   height = specs[1];
148 |   width = specs[3];
149 |   if ((width != 0) && (height != 0))
150 |   {
151 |     uvpitch = modnpf(width + MIN_PAD, MIN_ALIGNMENT);
152 |     uvwidth = width;
153 |     uvheight = height;
154 |   }
155 |   y = (unsigned char*)_aligned_malloc(ypitch*yheight, MIN_ALIGNMENT);
156 |   if (y == NULL) return false;
157 |   if ((uvpitch != 0) && (uvheight != 0))
158 |   {
159 |     u = (unsigned char*)_aligned_malloc(uvpitch*uvheight, MIN_ALIGNMENT);
160 |     if (u == NULL) return false;
161 |     v = (unsigned char*)_aligned_malloc(uvpitch*uvheight, MIN_ALIGNMENT);
162 |     if (v == NULL) return false;
163 |   }
164 |   return true;
165 | }
166 | 
167 | void PlanarFrame::createPlanar(int yheight, int uvheight, int ywidth, int uvwidth)
168 | {
169 |   int specs[4] = { yheight,uvheight,ywidth,uvwidth };
170 |   allocSpace(specs);
171 | }
172 | 
173 | void PlanarFrame::createPlanar(int height, int width, uint8_t chroma_format)
174 | {
175 |   int specs[4];
176 |   switch (chroma_format)
177 |   {
178 |   case 0:
179 |   case 1:
180 |     specs[0] = height; specs[1] = height >> 1;
181 |     specs[2] = width; specs[3] = width >> 1;
182 |     break;
183 |   case 2:
184 |     specs[0] = height; specs[1] = height;
185 |     specs[2] = width; specs[3] = width >> 1;
186 |     break;
187 |   default:
188 |     specs[0] = height; specs[1] = height;
189 |     specs[2] = width; specs[3] = width;
190 |     break;
191 |   }
192 |   allocSpace(specs);
193 | }
194 | 
195 | void PlanarFrame::createFromProfile(VideoInfo &viInfo)
196 | {
197 |   allocSpace(viInfo);
198 | }
199 | 
200 | void PlanarFrame::createFromFrame(PVideoFrame &frame, VideoInfo &viInfo)
201 | {
202 |   allocSpace(viInfo);
203 |   copyInternalFrom(frame, viInfo);
204 | }
205 | 
206 | void PlanarFrame::createFromPlanar(PlanarFrame &frame)
207 | {
208 |   int specs[4] = { frame.yheight,frame.uvheight,frame.ywidth,frame.uvwidth };
209 |   allocSpace(specs);
210 |   copyInternalFrom(frame);
211 | }
212 | 
213 | void PlanarFrame::copyFrom(PVideoFrame &frame, VideoInfo &viInfo)
214 | {
215 |   copyInternalFrom(frame, viInfo);
216 | }
217 | 
218 | void PlanarFrame::copyFrom(PlanarFrame &frame)
219 | {
220 |   copyInternalFrom(frame);
221 | }
222 | 
223 | void PlanarFrame::copyTo(PVideoFrame &frame, VideoInfo &viInfo)
224 | {
225 |   copyInternalTo(frame, viInfo);
226 | }
227 | 
228 | void PlanarFrame::copyTo(PlanarFrame &frame)
229 | {
230 |   copyInternalTo(frame);
231 | }
232 | 
233 | void PlanarFrame::copyPlaneTo(PlanarFrame &frame, uint8_t plane)
234 | {
235 |   copyInternalPlaneTo(frame, plane);
236 | }
237 | 
238 | uint8_t* PlanarFrame::GetPtr(uint8_t plane)
239 | {
240 |   switch (plane)
241 |   {
242 |   case 0: return y; break;
243 |   case 1: return u; break;
244 |   default: return v; break;
245 |   }
246 | }
247 | 
248 | int PlanarFrame::GetWidth(uint8_t plane)
249 | {
250 |   switch (plane)
251 |   {
252 |   case 0: return ywidth; break;
253 |   default: return uvwidth; break;
254 |   }
255 | }
256 | 
257 | int PlanarFrame::GetHeight(uint8_t plane)
258 | {
259 |   switch (plane)
260 |   {
261 |   case 0: return yheight; break;
262 |   default: return uvheight; break;
263 |   }
264 | }
265 | 
266 | int PlanarFrame::GetPitch(uint8_t plane)
267 | {
268 |   switch (plane)
269 |   {
270 |   case 0: return ypitch; break;
271 |   default: return uvpitch; break;
272 |   }
273 | }
274 | 
275 | void PlanarFrame::freePlanar()
276 | {
277 |   if (y != NULL) { _aligned_free(y); y = NULL; }
278 |   if (u != NULL) { _aligned_free(u); u = NULL; }
279 |   if (v != NULL) { _aligned_free(v); v = NULL; }
280 |   ypitch = uvpitch = 0;
281 |   ywidth = uvwidth = 0;
282 |   yheight = uvheight = 0;
283 |   cpu = 0;
284 | }
285 | 
286 | void PlanarFrame::copyInternalFrom(PVideoFrame &frame, VideoInfo &viInfo)
287 | {
288 |   if ((y == NULL) || (!viInfo.IsY8() && ((u == NULL) || (v == NULL)))) return;
289 | 
290 |   if (viInfo.IsYV12() || viInfo.IsYV16() || viInfo.IsYV24())
291 |   {
292 |     BitBlt(y, ypitch, frame->GetReadPtr(PLANAR_Y), frame->GetPitch(PLANAR_Y),
293 |       frame->GetRowSize(PLANAR_Y), frame->GetHeight(PLANAR_Y));
294 |     BitBlt(u, uvpitch, frame->GetReadPtr(PLANAR_U), frame->GetPitch(PLANAR_U),
295 |       frame->GetRowSize(PLANAR_U), frame->GetHeight(PLANAR_U));
296 |     BitBlt(v, uvpitch, frame->GetReadPtr(PLANAR_V), frame->GetPitch(PLANAR_V),
297 |       frame->GetRowSize(PLANAR_V), frame->GetHeight(PLANAR_V));
298 |   }
299 |   else if (viInfo.IsY8())
300 |   {
301 |     BitBlt(y, ypitch, frame->GetReadPtr(PLANAR_Y), frame->GetPitch(PLANAR_Y),
302 |       frame->GetRowSize(PLANAR_Y), frame->GetHeight(PLANAR_Y));
303 |   }
304 |   else if (viInfo.IsYUY2())
305 |   {
306 |     convYUY2to422(frame->GetReadPtr(), y, u, v, frame->GetPitch(), ypitch, uvpitch,
307 |       viInfo.width, viInfo.height);
308 |   }
309 |   else
310 |   {
311 |     if (viInfo.IsRGB24())
312 |     {
313 |       convRGB24to444(frame->GetReadPtr(), y, u, v, frame->GetPitch(), ypitch, uvpitch,
314 |         viInfo.width, viInfo.height);
315 |     }
316 |   }
317 | }
318 | 
319 | void PlanarFrame::copyInternalFrom(PlanarFrame &frame)
320 | {
321 |   if ((y == NULL) || ((uvpitch != 0) && ((u == NULL) || (v == NULL)))) return;
322 | 
323 |   BitBlt(y, ypitch, frame.y, frame.ypitch, frame.ywidth, frame.yheight);
324 |   if (uvpitch != 0)
325 |   {
326 |     BitBlt(u, uvpitch, frame.u, frame.uvpitch, frame.uvwidth, frame.uvheight);
327 |     BitBlt(v, uvpitch, frame.v, frame.uvpitch, frame.uvwidth, frame.uvheight);
328 |   }
329 | }
330 | 
331 | void PlanarFrame::copyInternalTo(PVideoFrame &frame, VideoInfo &viInfo)
332 | {
333 |   if ((y == NULL) || (!viInfo.IsY8() && ((u == NULL) || (v == NULL)))) return;
334 | 
335 |   if (viInfo.IsYV12() || viInfo.IsYV16() || viInfo.IsYV24())
336 |   {
337 |     BitBlt(frame->GetWritePtr(PLANAR_Y), frame->GetPitch(PLANAR_Y), y, ypitch, ywidth, yheight);
338 |     BitBlt(frame->GetWritePtr(PLANAR_U), frame->GetPitch(PLANAR_U), u, uvpitch, uvwidth, uvheight);
339 |     BitBlt(frame->GetWritePtr(PLANAR_V), frame->GetPitch(PLANAR_V), v, uvpitch, uvwidth, uvheight);
340 |   }
341 |   else if (viInfo.IsY8())
342 |   {
343 |     BitBlt(frame->GetWritePtr(PLANAR_Y), frame->GetPitch(PLANAR_Y), y, ypitch, ywidth, yheight);
344 |   }
345 |   else if (viInfo.IsYUY2())
346 |   {
347 |     conv422toYUY2(y, u, v, frame->GetWritePtr(), ypitch, uvpitch, frame->GetPitch(), ywidth, yheight);
348 |   }
349 |   else if (viInfo.IsRGB24())
350 |   {
351 |     conv444toRGB24(y, u, v, frame->GetWritePtr(), ypitch, uvpitch, frame->GetPitch(), ywidth, yheight);
352 |   }
353 | }
354 | 
355 | void PlanarFrame::copyInternalTo(PlanarFrame &frame)
356 | {
357 |   if ((y == NULL) || ((uvpitch != 0) && ((u == NULL) || (v == NULL)))) return;
358 | 
359 |   BitBlt(frame.y, frame.ypitch, y, ypitch, ywidth, yheight);
360 |   if (uvpitch != 0)
361 |   {
362 |     BitBlt(frame.u, frame.uvpitch, u, uvpitch, uvwidth, uvheight);
363 |     BitBlt(frame.v, frame.uvpitch, v, uvpitch, uvwidth, uvheight);
364 |   }
365 | }
366 | 
367 | void PlanarFrame::copyInternalPlaneTo(PlanarFrame &frame, uint8_t plane)
368 | {
369 |   switch (plane)
370 |   {
371 |   case 0: if (y != NULL) BitBlt(frame.y, frame.ypitch, y, ypitch, ywidth, yheight); break;
372 |   case 1: if (u != NULL) BitBlt(frame.u, frame.uvpitch, u, uvpitch, uvwidth, uvheight); break;
373 |   case 2: if (v != NULL) BitBlt(frame.v, frame.uvpitch, v, uvpitch, uvwidth, uvheight); break;
374 |   }
375 | }
376 | 
377 | void PlanarFrame::copyChromaTo(PlanarFrame &dst)
378 | {
379 |   if (uvpitch != 0)
380 |   {
381 |     BitBlt(dst.u, dst.uvpitch, u, uvpitch, dst.uvwidth, dst.uvheight);
382 |     BitBlt(dst.v, dst.uvpitch, v, uvpitch, dst.uvwidth, dst.uvheight);
383 |   }
384 | }
385 | 
386 | 
387 | PlanarFrame& PlanarFrame::operator=(PlanarFrame &ob2)
388 | {
389 |   cpu = ob2.cpu;
390 |   ypitch = ob2.ypitch;
391 |   yheight = ob2.yheight;
392 |   ywidth = ob2.ywidth;
393 |   uvpitch = ob2.uvpitch;
394 |   uvheight = ob2.uvheight;
395 |   uvwidth = ob2.uvwidth;
396 |   this->copyFrom(ob2);
397 |   return *this;
398 | }
399 | 
400 | #ifndef _M_X64
401 | 
402 | __declspec(align(16)) const __int64 Ymask[2] = { 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF };
403 | 
404 | void convYUY2to422_MMX(const unsigned char *src, unsigned char *py, unsigned char *pu,
405 |   unsigned char *pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height)
406 | {
407 |   __asm
408 |   {
409 |     mov edi, src
410 |     mov ebx, py
411 |     mov edx, pu
412 |     mov esi, pv
413 |     mov ecx, width
414 |     shr ecx, 1
415 |     movq mm5, Ymask
416 |     yloop :
417 |     xor eax, eax
418 |       align 16
419 |       xloop :
420 |       movq mm0, [edi + eax * 4]; VYUYVYUY
421 |       movq mm1, [edi + eax * 4 + 8]; VYUYVYUY
422 |       movq mm2, mm0; VYUYVYUY
423 |       movq mm3, mm1; VYUYVYUY
424 |       pand mm0, mm5; 0Y0Y0Y0Y
425 |       psrlw mm2, 8; 0V0U0V0U
426 |       pand mm1, mm5; 0Y0Y0Y0Y
427 |       psrlw mm3, 8; 0V0U0V0U
428 |       packuswb mm0, mm1; YYYYYYYY
429 |       packuswb mm2, mm3; VUVUVUVU
430 |       movq mm4, mm2; VUVUVUVU
431 |       pand mm2, mm5; 0U0U0U0U
432 |       psrlw mm4, 8; 0V0V0V0V
433 |       packuswb mm2, mm2; xxxxUUUU
434 |       packuswb mm4, mm4; xxxxVVVV
435 |       movq[ebx + eax * 2], mm0; store y
436 |       movd[edx + eax], mm2; store u
437 |       movd[esi + eax], mm4; store v
438 |       add eax, 4
439 |       cmp eax, ecx
440 |       jl xloop
441 |       add edi, pitch1
442 |       add ebx, pitch2Y
443 |       add edx, pitch2UV
444 |       add esi, pitch2UV
445 |       dec height
446 |       jnz yloop
447 |       emms
448 |   }
449 | }
450 | 
451 | static void conv422toYUY2_MMX(unsigned char *py, unsigned char *pu, unsigned char *pv,
452 |   unsigned char *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height)
453 | {
454 |   __asm
455 |   {
456 |     mov ebx, py
457 |     mov edx, pu
458 |     mov esi, pv
459 |     mov edi, dst
460 |     mov ecx, width
461 |     shr ecx, 1
462 |     yloop:
463 |     xor eax, eax
464 |       align 16
465 |       xloop :
466 |       movq mm0, [ebx + eax * 2]; YYYYYYYY
467 |       movd mm1, [edx + eax]; 0000UUUU
468 |       movd mm2, [esi + eax]; 0000VVVV
469 |       movq mm3, mm0; YYYYYYYY
470 |       punpcklbw mm1, mm2; VUVUVUVU
471 |       punpcklbw mm0, mm1; VYUYVYUY
472 |       punpckhbw mm3, mm1; VYUYVYUY
473 |       movq[edi + eax * 4], mm0; store
474 |       movq[edi + eax * 4 + 8], mm3; store
475 |       add eax, 4
476 |       cmp eax, ecx
477 |       jl xloop
478 |       add ebx, pitch1Y
479 |       add edx, pitch1UV
480 |       add esi, pitch1UV
481 |       add edi, pitch2
482 |       dec height
483 |       jnz yloop
484 |       emms
485 |   }
486 | }
487 | #endif
488 | 
489 | static void convYUY2to422_SSE2_simd(const unsigned char *src, unsigned char *py, unsigned char *pu,
490 |   unsigned char *pv, int pitch1, int pitch2Y, int pitch2UV, int width, int height)
491 | {
492 |   width >>= 1; // mov ecx, width
493 |   __m128i Ymask = _mm_set1_epi16(0x00FF);
494 |   for (int y = 0; y < height; y++) {
495 |     for (int x = 0; x < width; x += 4) {
496 |       __m128i fullsrc = _mm_load_si128(reinterpret_cast<const __m128i *>(src + x * 4)); // VYUYVYUYVYUYVYUY
497 |       __m128i yy = _mm_and_si128(fullsrc, Ymask); // 0Y0Y0Y0Y0Y0Y0Y0Y
498 |       __m128i uvuv = _mm_srli_epi16(fullsrc, 8); // 0V0U0V0U0V0U0V0U
499 |       yy = _mm_packus_epi16(yy, yy); // xxxxxxxxYYYYYYYY
500 |       uvuv = _mm_packus_epi16(uvuv, uvuv); // xxxxxxxxVUVUVUVU
501 |       __m128i uu = _mm_and_si128(uvuv, Ymask); // xxxxxxxx0U0U0U0U
502 |       __m128i vv = _mm_srli_epi16(uvuv, 8); // xxxxxxxx0V0V0V0V
503 |       uu = _mm_packus_epi16(uu, uu); // xxxxxxxxxxxxUUUU
504 |       vv = _mm_packus_epi16(vv, vv); // xxxxxxxxxxxxVVVV
505 |       _mm_storel_epi64(reinterpret_cast<__m128i *>(py + x * 2), yy); // store y
506 |       *(uint32_t *)(pu + x) = _mm_cvtsi128_si32(uu); // store u
507 |       *(uint32_t *)(pv + x) = _mm_cvtsi128_si32(vv); // store v
508 |     }
509 |     src += pitch1;
510 |     py += pitch2Y;
511 |     pu += pitch2UV;
512 |     pv += pitch2UV;
513 |   }
514 | }
515 | 
516 | static void conv422toYUY2_SSE2(unsigned char *py, unsigned char *pu, unsigned char *pv,
517 |   unsigned char *dst, int pitch1Y, int pitch1UV, int pitch2, int width, int height)
518 | {
519 |   width >>= 1; // mov ecx, width
520 |   for (int y = 0; y < height; y++) {
521 |     for (int x = 0; x < width; x += 4) {
522 |       __m128i yy = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(py + x * 2)); // YYYYYYYY
523 |       __m128i uu = _mm_castps_si128(_mm_load_ss(reinterpret_cast<float *>(pu + x))); // 000000000000UUUU
524 |       __m128i vv = _mm_castps_si128(_mm_load_ss(reinterpret_cast<float *>(pv + x))); // 000000000000VVVV
525 |       __m128i uvuv = _mm_unpacklo_epi8(uu, vv); // 00000000VUVUVUVU
526 |       __m128i yuyv = _mm_unpacklo_epi8(yy, uvuv); // VYUYVYUYVYUYVYUY
527 |       _mm_store_si128(reinterpret_cast<__m128i *>(dst + x * 4), yuyv);
528 |     }
529 |     dst += pitch2;
530 |     py += pitch1Y;
531 |     pu += pitch1UV;
532 |     pv += pitch1UV;
533 |   }
534 | }
535 | 
536 | void PlanarFrame::convYUY2to422(const uint8_t *src, uint8_t *py, uint8_t *pu, uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV,
537 |   int width, int height)
538 | {
539 |   if (((cpu&CPUF_SSE2) != 0) && useSIMD && (((size_t(src) | pitch1) & 15) == 0))
540 |     convYUY2to422_SSE2_simd(src, py, pu, pv, pitch1, pitch2Y, pitch2UV, width, height);
541 |   else
542 |   {
543 | #ifndef _M_X64
544 |     if (((cpu&CPUF_MMX) != 0) && useSIMD)
545 |       convYUY2to422_MMX(src, py, pu, pv, pitch1, pitch2Y, pitch2UV, width, height);
546 |     else
547 | #endif
548 |     {
549 |       width >>= 1;
550 |       for (int y = 0; y < height; ++y)
551 |       {
552 |         int x_1 = 0, x_2 = 0;
553 | 
554 |         for (int x = 0; x < width; ++x)
555 |         {
556 |           py[x_1] = src[x_2];
557 |           pu[x] = src[x_2 + 1];
558 |           py[x_1 + 1] = src[x_2 + 2];
559 |           pv[x] = src[x_2 + 3];
560 |           x_1 += 2;
561 |           x_2 += 4;
562 |         }
563 |         py += pitch2Y;
564 |         pu += pitch2UV;
565 |         pv += pitch2UV;
566 |         src += pitch1;
567 |       }
568 |     }
569 |   }
570 | }
571 | 
572 | 
573 | void PlanarFrame::conv422toYUY2(uint8_t *py, uint8_t *pu, uint8_t *pv, uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2,
574 |   int width, int height)
575 | {
576 |   if (((cpu&CPUF_SSE2) != 0) && useSIMD && ((size_t(dst) & 15) == 0))
577 |     conv422toYUY2_SSE2(py, pu, pv, dst, pitch1Y, pitch1UV, pitch2, width, height);
578 |   else
579 |   {
580 | #ifndef _M_X64
581 |     if (((cpu&CPUF_MMX) != 0) && useSIMD) 
582 |       conv422toYUY2_MMX(py, pu, pv, dst, pitch1Y, pitch1UV, pitch2, width, height);
583 |     else
584 | #endif
585 |     {
586 |       width >>= 1;
587 |       for (int y = 0; y < height; ++y)
588 |       {
589 |         int x_1 = 0, x_2 = 0;
590 | 
591 |         for (int x = 0; x < width; ++x)
592 |         {
593 |           dst[x_2] = py[x_1];
594 |           dst[x_2 + 1] = pu[x];
595 |           dst[x_2 + 2] = py[x_1 + 1];
596 |           dst[x_2 + 3] = pv[x];
597 |           x_1 += 2;
598 |           x_2 += 4;
599 |         }
600 |         py += pitch1Y;
601 |         pu += pitch1UV;
602 |         pv += pitch1UV;
603 |         dst += pitch2;
604 |       }
605 |     }
606 |   }
607 | }
608 | 
609 | 
610 | void PlanarFrame::convRGB24to444(const uint8_t *src, uint8_t *py, uint8_t *pu, uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV,
611 |   int width, int height)
612 | {
613 |   for (int y = 0; y < height; ++y)
614 |   {
615 |     int x_3 = 0;
616 | 
617 |     for (int x = 0; x < width; ++x)
618 |     {
619 |       py[x] = src[x_3];
620 |       pu[x] = src[x_3 + 1];
621 |       pv[x] = src[x_3 + 2];
622 |       x_3 += 3;
623 |     }
624 |     src += pitch1;
625 |     py += pitch2Y;
626 |     pu += pitch2UV;
627 |     pv += pitch2UV;
628 |   }
629 | }
630 | 
631 | void PlanarFrame::conv444toRGB24(uint8_t *py, uint8_t *pu, uint8_t *pv, uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2,
632 |   int width, int height)
633 | {
634 |   dst += (height - 1)*pitch2;
635 |   for (int y = 0; y < height; ++y)
636 |   {
637 |     int x_3 = 0;
638 | 
639 |     for (int x = 0; x < width; ++x)
640 |     {
641 |       dst[x_3] = py[x];
642 |       dst[x_3 + 1] = pu[x];
643 |       dst[x_3 + 2] = pv[x];
644 |       x_3 += 3;
645 |     }
646 |     py += pitch1Y;
647 |     pu += pitch1UV;
648 |     pv += pitch1UV;
649 |     dst -= pitch2;
650 |   }
651 | }
652 | 
653 | // Avisynth v2.5.  Copyright 2002 Ben Rudiak-Gould et al.
654 | // http://www.avisynth.org
655 | 
656 | // This program is free software; you can redistribute it and/or modify
657 | // it under the terms of the GNU General Public License as published by
658 | // the Free Software Foundation; either version 2 of the License, or
659 | // (at your option) any later version.
660 | //
661 | // This program is distributed in the hope that it will be useful,
662 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
663 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
664 | // GNU General Public License for more details.
665 | //
666 | // You should have received a copy of the GNU General Public License
667 | // along with this program; if not, write to the Free Software
668 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
669 | // http://www.gnu.org/copyleft/gpl.html .
670 | //
671 | // Linking Avisynth statically or dynamically with other modules is making a
672 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
673 | // General Public License cover the whole combination.
674 | //
675 | // As a special exception, the copyright holders of Avisynth give you
676 | // permission to link Avisynth with independent modules that communicate with
677 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
678 | // terms of these independent modules, and to copy and distribute the
679 | // resulting combined work under terms of your choice, provided that
680 | // every copy of the combined work is accompanied by a complete copy of
681 | // the source code of Avisynth (the version of Avisynth used to produce the
682 | // combined work), being distributed under the terms of the GNU General
683 | // Public License plus this exception.  An independent module is a module
684 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
685 | // import and export plugins, or graphical user interfaces.
686 | 
687 | // from AviSynth 2.55 source...
688 | // copied so we don't need an
689 | // IScriptEnvironment pointer 
690 | // to call it
691 | 
692 | void PlanarFrame::BitBlt(uint8_t *dstp, int dst_pitch, const uint8_t *srcp, int src_pitch, int row_size, int height)
693 | {
694 |   if ((height == 0) || (row_size == 0) || (dst_pitch == 0) || (src_pitch == 0)) return;
695 | 
696 |   if ((height == 1) || ((dst_pitch == src_pitch) && (src_pitch == row_size))) memcpy(dstp, srcp, src_pitch*height);
697 |   else
698 |   {
699 |     for (int y = height; y > 0; --y)
700 |     {
701 |       memcpy(dstp, srcp, row_size);
702 |       dstp += dst_pitch;
703 |       srcp += src_pitch;
704 |     }
705 |   }
706 | }
707 | 


--------------------------------------------------------------------------------
/EEDI3/PlanarFrame.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | **   My PlanarFrame class... fast mmx/sse2 YUY2 packed to planar and planar
 3 | **   to packed conversions, and always gives 16 bit alignment for all
 4 | **   planes.  Supports YV12/YUY2 frames from avisynth, can do any planar format
 5 | **   internally.
 6 | **
 7 | **   Copyright (C) 2005-2006 Kevin Stone
 8 | **
 9 | **   This program is free software; you can redistribute it and/or modify
10 | **   it under the terms of the GNU General Public License as published by
11 | **   the Free Software Foundation; either version 2 of the License, or
12 | **   (at your option) any later version.
13 | **
14 | **   This program is distributed in the hope that it will be useful,
15 | **   but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | **   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | **   GNU General Public License for more details.
18 | **
19 | **   You should have received a copy of the GNU General Public License
20 | **   along with this program; if not, write to the Free Software
21 | **   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22 | */
23 | 
24 | #ifndef __PlanarFrame_H__
25 | #define __PlanarFrame_H__
26 | 
27 | #include <windows.h>
28 | #include <malloc.h>
29 | #include <stdint.h>
30 | #include "avisynth.h"
31 | 
32 | #define MIN_PAD 10
33 | #define MIN_ALIGNMENT 16
34 | 
35 | #define PLANAR_420 1
36 | #define PLANAR_422 2
37 | #define PLANAR_444 3
38 | 
39 | 
40 | class PlanarFrame
41 | {
42 | private:
43 |   bool useSIMD;
44 |   int ypitch, uvpitch;
45 |   int ywidth, uvwidth;
46 |   int yheight, uvheight;
47 |   uint8_t *y, *u, *v;
48 |   bool PlanarFrame::allocSpace(VideoInfo &viInfo);
49 |   bool PlanarFrame::allocSpace(int specs[4]);
50 |   int PlanarFrame::getCPUInfo();
51 |   int PlanarFrame::checkCPU();
52 |   void PlanarFrame::checkSSEOSSupport(int &cput);
53 |   void PlanarFrame::checkSSE2OSSupport(int &cput);
54 |   void PlanarFrame::copyInternalFrom(PVideoFrame &frame, VideoInfo &viInfo);
55 |   void PlanarFrame::copyInternalFrom(PlanarFrame &frame);
56 |   void PlanarFrame::copyInternalTo(PVideoFrame &frame, VideoInfo &viInfo);
57 |   void PlanarFrame::copyInternalTo(PlanarFrame &frame);
58 |   void PlanarFrame::copyInternalPlaneTo(PlanarFrame &frame, uint8_t plane);
59 |   void PlanarFrame::conv422toYUY2(uint8_t *py, uint8_t *pu, uint8_t *pv, uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2,
60 |     int width, int height);
61 |   void PlanarFrame::conv444toRGB24(uint8_t *py, uint8_t *pu, uint8_t *pv, uint8_t *dst, int pitch1Y, int pitch1UV, int pitch2,
62 |     int width, int height);
63 | 
64 | public:
65 |   int cpu;
66 |   PlanarFrame::PlanarFrame(int cpuFlags);
67 |   PlanarFrame::PlanarFrame(VideoInfo &viInfo, int cpuFlags);
68 |   PlanarFrame::~PlanarFrame();
69 |   void PlanarFrame::createPlanar(int yheight, int uvheight, int ywidth, int uvwidth);
70 |   void PlanarFrame::createPlanar(int height, int width, uint8_t chroma_format);
71 |   void PlanarFrame::createFromProfile(VideoInfo &viInfo);
72 |   void PlanarFrame::createFromFrame(PVideoFrame &frame, VideoInfo &viInfo);
73 |   void PlanarFrame::createFromPlanar(PlanarFrame &frame);
74 |   void PlanarFrame::copyFrom(PVideoFrame &frame, VideoInfo &viInfo);
75 |   void PlanarFrame::copyTo(PVideoFrame &frame, VideoInfo &viInfo);
76 |   void PlanarFrame::copyFrom(PlanarFrame &frame);
77 |   void PlanarFrame::copyTo(PlanarFrame &frame);
78 |   void PlanarFrame::copyChromaTo(PlanarFrame &dst);
79 |   void PlanarFrame::copyPlaneTo(PlanarFrame &dst, uint8_t plane);
80 |   void PlanarFrame::freePlanar();
81 |   uint8_t* PlanarFrame::GetPtr(uint8_t plane);
82 |   int PlanarFrame::GetWidth(uint8_t plane);
83 |   int PlanarFrame::GetHeight(uint8_t plane);
84 |   int PlanarFrame::GetPitch(uint8_t plane);
85 |   void PlanarFrame::BitBlt(uint8_t *dstp, int dst_pitch, const uint8_t *srcp, int src_pitch, int row_size, int height);
86 |   PlanarFrame& PlanarFrame::operator=(PlanarFrame &ob2);
87 |   void PlanarFrame::convYUY2to422(const uint8_t *src, uint8_t *py, uint8_t *pu, uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV,
88 |     int width, int height);
89 |   void PlanarFrame::convRGB24to444(const uint8_t *src, uint8_t *py, uint8_t *pu, uint8_t *pv, int pitch1, int pitch2Y, int pitch2UV,
90 |     int width, int height);
91 | };
92 | 
93 | #endif


--------------------------------------------------------------------------------
/EEDI3/avs/alignment.h:
--------------------------------------------------------------------------------
  1 | // Avisynth C Interface Version 0.20
  2 | // Copyright 2003 Kevin Atkinson
  3 | 
  4 | // This program is free software; you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License as published by
  6 | // the Free Software Foundation; either version 2 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU General Public License
 15 | // along with this program; if not, write to the Free Software
 16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
 17 | // http://www.gnu.org/copyleft/gpl.html .
 18 | //
 19 | // As a special exception, I give you permission to link to the
 20 | // Avisynth C interface with independent modules that communicate with
 21 | // the Avisynth C interface solely through the interfaces defined in
 22 | // avisynth_c.h, regardless of the license terms of these independent
 23 | // modules, and to copy and distribute the resulting combined work
 24 | // under terms of your choice, provided that every copy of the
 25 | // combined work is accompanied by a complete copy of the source code
 26 | // of the Avisynth C interface and Avisynth itself (with the version
 27 | // used to produce the combined work), being distributed under the
 28 | // terms of the GNU General Public License plus this exception.  An
 29 | // independent module is a module which is not derived from or based
 30 | // on Avisynth C Interface, such as 3rd-party filters, import and
 31 | // export plugins, or graphical user interfaces.
 32 | 
 33 | #ifndef AVS_ALIGNMENT_H
 34 | #define AVS_ALIGNMENT_H
 35 | 
 36 | // Functions and macros to help work with alignment requirements.
 37 | 
 38 | // Tells if a number is a power of two.
 39 | #define IS_POWER2(n) ((n) && !((n) & ((n) - 1)))
 40 | 
 41 | // Tells if the pointer "ptr" is aligned to "align" bytes.
 42 | #define IS_PTR_ALIGNED(ptr, align) (((uintptr_t)ptr & ((uintptr_t)(align-1))) == 0)
 43 | 
 44 | // Rounds up the number "n" to the next greater multiple of "align"
 45 | #define ALIGN_NUMBER(n, align) (((n) + (align)-1) & (~((align)-1)))
 46 | 
 47 | // Rounds up the pointer address "ptr" to the next greater multiple of "align"
 48 | #define ALIGN_POINTER(ptr, align) (((uintptr_t)(ptr) + (align)-1) & (~(uintptr_t)((align)-1)))
 49 | 
 50 | #ifdef __cplusplus
 51 | 
 52 | #include <cassert>
 53 | #include <cstdlib>
 54 | #include <cstdint>
 55 | #include <avs/config.h>
 56 | 
 57 | #if defined(MSVC)
 58 |     // needed for VS2013, otherwise C++11 'alignas' works
 59 |     #define avs_alignas(x) __declspec(align(x))
 60 | #else
 61 |     // assumes C++11 support
 62 |     #define avs_alignas(x) alignas(x)
 63 | #endif
 64 | 
 65 | template<typename T>
 66 | static bool IsPtrAligned(T* ptr, size_t align)
 67 | {
 68 |   assert(IS_POWER2(align));
 69 |   return (bool)IS_PTR_ALIGNED(ptr, align);
 70 | }
 71 | 
 72 | template<typename T>
 73 | static T AlignNumber(T n, T align)
 74 | {
 75 |   assert(IS_POWER2(align));
 76 |   return ALIGN_NUMBER(n, align);
 77 | }
 78 | 
 79 | template<typename T>
 80 | static T* AlignPointer(T* ptr, size_t align)
 81 | {
 82 |   assert(IS_POWER2(align));
 83 |   return (T*)ALIGN_POINTER(ptr, align);
 84 | }
 85 | 
 86 | extern "C"
 87 | {
 88 | #else
 89 | #include <stdlib.h>
 90 | #endif  // __cplusplus
 91 | 
 92 | // Returns a new buffer that is at least the size "nbytes".
 93 | // The buffer will be aligned to "align" bytes.
 94 | // Returns NULL on error. On successful allocation,
 95 | // the returned buffer must be freed using "avs_free".
 96 | inline void* avs_malloc(size_t nbytes, size_t align)
 97 | {
 98 |   if (!IS_POWER2(align))
 99 |     return NULL;
100 | 
101 |   size_t offset = sizeof(void*) + align - 1;
102 | 
103 |   void *orig = malloc(nbytes + offset);
104 |   if (orig == NULL)
105 |    return NULL;
106 | 
107 |   void **aligned = (void**)(((uintptr_t)orig + (uintptr_t)offset) & (~(uintptr_t)(align-1)));
108 |   aligned[-1] = orig;
109 |   return aligned;
110 | }
111 | 
112 | // Buffers allocated using "avs_malloc" must be freed
113 | // using "avs_free" instead of "free".
114 | inline void avs_free(void *ptr)
115 | {
116 |   // Mirroring free()'s semantic requires us to accept NULLs
117 |   if (ptr == NULL)
118 |     return;
119 | 
120 |   free(((void**)ptr)[-1]);
121 | }
122 | 
123 | #ifdef __cplusplus
124 | } // extern "C"
125 | 
126 | // The point of these undef's is to force using the template functions
127 | // if we are in C++ mode. For C, the user can rely only on the macros.
128 | #undef IS_PTR_ALIGNED
129 | #undef ALIGN_NUMBER
130 | #undef ALIGN_POINTER
131 | 
132 | #endif  // __cplusplus
133 | 
134 | #endif  //AVS_ALIGNMENT_H
135 | 


--------------------------------------------------------------------------------
/EEDI3/avs/capi.h:
--------------------------------------------------------------------------------
 1 | // Avisynth C Interface Version 0.20
 2 | // Copyright 2003 Kevin Atkinson
 3 | 
 4 | // This program is free software; you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License as published by
 6 | // the Free Software Foundation; either version 2 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception.  An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 | 
33 | #ifndef AVS_CAPI_H
34 | #define AVS_CAPI_H
35 | 
36 | #ifdef __cplusplus
37 | #  define EXTERN_C extern "C"
38 | #else
39 | #  define EXTERN_C
40 | #endif
41 | 
42 | #ifdef MSVC
43 | #ifndef AVSC_USE_STDCALL
44 | #  define AVSC_CC __cdecl
45 | #else
46 | #  define AVSC_CC __stdcall
47 | #endif
48 | #else
49 | #  define AVSC_CC
50 | #endif
51 | 
52 | #define AVSC_INLINE static __inline
53 | 
54 | #ifdef BUILDING_AVSCORE
55 | #  define AVSC_EXPORT __declspec(dllexport)
56 | #  define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name
57 | #else
58 | #  define AVSC_EXPORT EXTERN_C __declspec(dllimport)
59 | #  ifndef AVSC_NO_DECLSPEC
60 | #    define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name
61 | #  else
62 | #    define AVSC_API(ret, name) typedef ret (AVSC_CC *name##_func)
63 | #  endif
64 | #endif
65 | 
66 | #endif //AVS_CAPI_H
67 | 


--------------------------------------------------------------------------------
/EEDI3/avs/config.h:
--------------------------------------------------------------------------------
 1 | // Avisynth C Interface Version 0.20
 2 | // Copyright 2003 Kevin Atkinson
 3 | 
 4 | // This program is free software; you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License as published by
 6 | // the Free Software Foundation; either version 2 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception.  An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 | 
33 | #ifndef AVS_CONFIG_H
34 | #define AVS_CONFIG_H
35 | 
36 | // Undefine this to get cdecl calling convention
37 | #define AVSC_USE_STDCALL 1
38 | 
39 | // NOTE TO PLUGIN AUTHORS:
40 | // Because FRAME_ALIGN can be substantially higher than the alignment
41 | // a plugin actually needs, plugins should not use FRAME_ALIGN to check for
42 | // alignment. They should always request the exact alignment value they need.
43 | // This is to make sure that plugins work over the widest range of AviSynth
44 | // builds possible.
45 | #define FRAME_ALIGN 64
46 | 
47 | #if   defined(_M_AMD64) || defined(__x86_64)
48 | #   define X86_64
49 | #elif defined(_M_IX86) || defined(__i386__)
50 | #   define X86_32
51 | #else
52 | #   error Unsupported CPU architecture.
53 | #endif
54 | 
55 | #if   defined(_MSC_VER)
56 | #   define MSVC
57 | #elif defined(__GNUC__)
58 | #   define GCC
59 | #elif defined(__clang__)
60 | #   define CLANG
61 | #else
62 | #   error Unsupported compiler.
63 | #endif
64 | 
65 | #if   defined(GCC)
66 | #   undef __forceinline
67 | #   define __forceinline inline
68 | #endif
69 | 
70 | #endif //AVS_CONFIG_H
71 | 


--------------------------------------------------------------------------------
/EEDI3/avs/cpuid.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_CPUID_H
33 | #define AVSCORE_CPUID_H
34 | 
35 | // For GetCPUFlags.  These are backwards-compatible with those in VirtualDub.
36 | // ending with SSE4_2
37 | // For emulation see https://software.intel.com/en-us/articles/intel-software-development-emulator
38 | enum {
39 |                     /* oldest CPU to support extension */
40 |   CPUF_FORCE        =  0x01,   //  N/A
41 |   CPUF_FPU          =  0x02,   //  386/486DX
42 |   CPUF_MMX          =  0x04,   //  P55C, K6, PII
43 |   CPUF_INTEGER_SSE  =  0x08,   //  PIII, Athlon
44 |   CPUF_SSE          =  0x10,   //  PIII, Athlon XP/MP
45 |   CPUF_SSE2         =  0x20,   //  PIV, K8
46 |   CPUF_3DNOW        =  0x40,   //  K6-2
47 |   CPUF_3DNOW_EXT    =  0x80,   //  Athlon
48 |   CPUF_X86_64       =  0xA0,   //  Hammer (note: equiv. to 3DNow + SSE2, which
49 |                                //          only Hammer will have anyway)
50 |   CPUF_SSE3         = 0x100,   //  PIV+, K8 Venice
51 |   CPUF_SSSE3        = 0x200,   //  Core 2
52 |   CPUF_SSE4         = 0x400,
53 |   CPUF_SSE4_1       = 0x400,   //  Penryn, Wolfdale, Yorkfield  
54 |   CPUF_AVX          = 0x800,   //  Sandy Bridge, Bulldozer
55 |   CPUF_SSE4_2       = 0x1000,  //  Nehalem
56 |   // AVS+
57 |   CPUF_AVX2         = 0x2000,   //  Haswell
58 |   CPUF_FMA3         = 0x4000,
59 |   CPUF_F16C         = 0x8000,
60 |   CPUF_MOVBE        = 0x10000,  // Big Endian move
61 |   CPUF_POPCNT       = 0x20000,
62 |   CPUF_AES          = 0x40000,
63 |   CPUF_FMA4         = 0x80000,
64 | 
65 |   CPUF_AVX512F      = 0x100000,  // AVX-512 Foundation.
66 |   CPUF_AVX512DQ     = 0x200000,  // AVX-512 DQ (Double/Quad granular) Instructions
67 |   CPUF_AVX512PF     = 0x400000,  // AVX-512 Prefetch
68 |   CPUF_AVX512ER     = 0x800000,  // AVX-512 Exponential and Reciprocal
69 |   CPUF_AVX512CD     = 0x1000000, // AVX-512 Conflict Detection
70 |   CPUF_AVX512BW     = 0x2000000, // AVX-512 BW (Byte/Word granular) Instructions
71 |   CPUF_AVX512VL     = 0x4000000, // AVX-512 VL (128/256 Vector Length) Extensions
72 |   CPUF_AVX512IFMA   = 0x8000000, // AVX-512 IFMA integer 52 bit
73 |   CPUF_AVX512VBMI   = 0x10000000,// AVX-512 VBMI
74 | };
75 | 
76 | #ifdef BUILDING_AVSCORE
77 | int GetCPUFlags();
78 | #endif
79 | 
80 | #endif // AVSCORE_CPUID_H
81 | 


--------------------------------------------------------------------------------
/EEDI3/avs/minmax.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_MINMAX_H
33 | #define AVSCORE_MINMAX_H
34 | 
35 | template<typename T>
36 | T min(T v1, T v2)
37 | {
38 |   return v1 < v2 ? v1 : v2;
39 | }
40 | 
41 | template<typename T>
42 | T max(T v1, T v2)
43 | {
44 |   return v1 > v2 ? v1 : v2;
45 | }
46 | 
47 | template<typename T>
48 | T clamp(T n, T min, T max)
49 | {
50 |     n = n > max ? max : n;
51 |     return n < min ? min : n;
52 | }
53 | 
54 | #endif // AVSCORE_MINMAX_H
55 | 


--------------------------------------------------------------------------------
/EEDI3/avs/types.h:
--------------------------------------------------------------------------------
 1 | // Avisynth C Interface Version 0.20
 2 | // Copyright 2003 Kevin Atkinson
 3 | 
 4 | // This program is free software; you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License as published by
 6 | // the Free Software Foundation; either version 2 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // This program is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this program; if not, write to the Free Software
16 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 | // http://www.gnu.org/copyleft/gpl.html .
18 | //
19 | // As a special exception, I give you permission to link to the
20 | // Avisynth C interface with independent modules that communicate with
21 | // the Avisynth C interface solely through the interfaces defined in
22 | // avisynth_c.h, regardless of the license terms of these independent
23 | // modules, and to copy and distribute the resulting combined work
24 | // under terms of your choice, provided that every copy of the
25 | // combined work is accompanied by a complete copy of the source code
26 | // of the Avisynth C interface and Avisynth itself (with the version
27 | // used to produce the combined work), being distributed under the
28 | // terms of the GNU General Public License plus this exception.  An
29 | // independent module is a module which is not derived from or based
30 | // on Avisynth C Interface, such as 3rd-party filters, import and
31 | // export plugins, or graphical user interfaces.
32 | 
33 | #ifndef AVS_TYPES_H
34 | #define AVS_TYPES_H
35 | 
36 | // Define all types necessary for interfacing with avisynth.dll
37 | 
38 | #ifdef __cplusplus
39 |   #include <cstddef>
40 | #else
41 |   #include <stddef.h>
42 | #endif
43 | 
44 | // Raster types used by VirtualDub & Avisynth
45 | typedef unsigned int    Pixel32;
46 | typedef unsigned char   BYTE;
47 | 
48 | // Audio Sample information
49 | typedef float SFLOAT;
50 | 
51 | #ifdef __GNUC__
52 | typedef long long int INT64;
53 | #else
54 | typedef __int64 INT64;
55 | #endif
56 | 
57 | #endif //AVS_TYPES_H
58 | 


--------------------------------------------------------------------------------
/EEDI3/avs/win.h:
--------------------------------------------------------------------------------
 1 | // This program is free software; you can redistribute it and/or modify
 2 | // it under the terms of the GNU General Public License as published by
 3 | // the Free Software Foundation; either version 2 of the License, or
 4 | // (at your option) any later version.
 5 | //
 6 | // This program is distributed in the hope that it will be useful,
 7 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | // GNU General Public License for more details.
10 | //
11 | // You should have received a copy of the GNU General Public License
12 | // along with this program; if not, write to the Free Software
13 | // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
14 | // http://www.gnu.org/copyleft/gpl.html .
15 | //
16 | // Linking Avisynth statically or dynamically with other modules is making a
17 | // combined work based on Avisynth.  Thus, the terms and conditions of the GNU
18 | // General Public License cover the whole combination.
19 | //
20 | // As a special exception, the copyright holders of Avisynth give you
21 | // permission to link Avisynth with independent modules that communicate with
22 | // Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
23 | // terms of these independent modules, and to copy and distribute the
24 | // resulting combined work under terms of your choice, provided that
25 | // every copy of the combined work is accompanied by a complete copy of
26 | // the source code of Avisynth (the version of Avisynth used to produce the
27 | // combined work), being distributed under the terms of the GNU General
28 | // Public License plus this exception.  An independent module is a module
29 | // which is not derived from or based on Avisynth, such as 3rd-party filters,
30 | // import and export plugins, or graphical user interfaces.
31 | 
32 | #ifndef AVSCORE_WIN_H
33 | #define AVSCORE_WIN_H
34 | 
35 | // Whenever you need windows headers, start by including this file, then the rest.
36 | 
37 | // WWUUT? We require XP now?
38 | #if !defined(NTDDI_VERSION) && !defined(_WIN32_WINNT)
39 |   #define NTDDI_VERSION 0x05020000
40 |   #define _WIN32_WINNT  0x0502
41 | #endif
42 | 
43 | #define WIN32_LEAN_AND_MEAN
44 | #define STRICT
45 | #if !defined(NOMINMAX)
46 |     #define NOMINMAX
47 | #endif
48 | 
49 | #include <windows.h>
50 | 
51 | // Provision for UTF-8 max 4 bytes per code point
52 | #define AVS_MAX_PATH MAX_PATH*4
53 | 
54 | #endif // AVSCORE_WIN_H
55 | 


--------------------------------------------------------------------------------
/EEDI3/eedi3.cpp:
--------------------------------------------------------------------------------
   1 | /*
   2 | **   eedi3 (enhanced edge directed interpolation 3). Works by finding the
   3 | **   best non-decreasing (non-crossing) warping between two lines according to
   4 | **   a cost functional. Doesn't really have anything to do with eedi2 aside
   5 | **   from doing edge-directed interpolation (they use different techniques).
   6 | **
   7 | **   Copyright (C) 2015 Shane Panke
   8 | **
   9 | **   Copyright (C) 2010 Kevin Stone
  10 | **
  11 | **   This program is free software; you can redistribute it and/or modify
  12 | **   it under the terms of the GNU General Public License as published by
  13 | **   the Free Software Foundation; either version 2 of the License, or
  14 | **   (at your option) any later version.
  15 | **
  16 | **   This program is distributed in the hope that it will be useful,
  17 | **   but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 | **   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 | **   GNU General Public License for more details.
  20 | **
  21 | **   You should have received a copy of the GNU General Public License
  22 | **   along with this program; if not, write to the Free Software
  23 | **   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24 | */
  25 | 
  26 | #include "eedi3.h"
  27 | 
  28 | eedi3::eedi3(PClip _child, int _field, bool _dh, bool _Y, bool _U, bool _V, float _alpha,
  29 |   float _beta, float _gamma, int _nrad, int _mdis, bool _hp, bool _ucubic, bool _cost3,
  30 |   int _vcheck, float _vthresh0, float _vthresh1, float _vthresh2, PClip _sclip, int _threads,
  31 |   PClip _mclip, int opt, IScriptEnvironment *env)
  32 |   : GenericVideoFilter(_child), field(_field), dh(_dh), Y(_Y), U(_U),
  33 |   V(_V), alpha(_alpha), beta(_beta), gamma(_gamma), nrad(_nrad), mdis(_mdis), hp(_hp),
  34 |   ucubic(_ucubic), cost3(_cost3), vcheck(_vcheck), vthresh0(_vthresh0), vthresh1(_vthresh1),
  35 |   vthresh2(_vthresh2), sclip(_sclip), mclip(_mclip), _sse2_flag(false)
  36 | {
  37 |   if (field < -2 || field > 3)
  38 |     env->ThrowError("eedi3:  field must be set to -2, -1, 0, 1, 2, or 3!");
  39 |   if (dh && (field < -1 || field > 1))
  40 |     env->ThrowError("eedi3:  field must be set to -1, 0, or 1 when dh=true!");
  41 |   if (alpha < 0.0f || alpha > 1.0f)
  42 |     env->ThrowError("eedi3:  0 <= alpha <= 1!\n");
  43 |   if (beta < 0.0f || beta > 1.0f)
  44 |     env->ThrowError("eedi3:  0 <= beta <= 1!\n");
  45 |   if (alpha + beta > 1.0f)
  46 |     env->ThrowError("eedi3:  0 <= alpha+beta <= 1!\n");
  47 |   if (gamma < 0.0f)
  48 |     env->ThrowError("eedi3:  0 <= gamma!\n");
  49 |   if (nrad < 0 || nrad > 3)
  50 |     env->ThrowError("eedi3:  0 <= nrad <= 3!\n");
  51 |   if (mdis < 1 || mdis > 40)
  52 |     env->ThrowError("eedi3:  1 <= mdis <= 40!\n");
  53 |   if (vcheck < 0 || vcheck > 3)
  54 |     env->ThrowError("eedi3:  0 <= vcheck <= 3!\n");
  55 |   if (vcheck > 0 && (vthresh0 <= 0.0f || vthresh1 <= 0.0f || vthresh2 <= 0.0f))
  56 |     env->ThrowError("eedi3:  0 < vthresh0 , 0 < vthresh1 , 0 < vthresh2!\n");
  57 |   if (field == -2)
  58 |     field = child->GetParity(0) ? 3 : 2;
  59 |   else if (field == -1)
  60 |     field = child->GetParity(0) ? 1 : 0;
  61 | 
  62 |   if (mclip)
  63 |   {
  64 |     const ::VideoInfo &	vi2 = mclip->GetVideoInfo();
  65 |     if (vi.height != vi2.height
  66 |       || vi.width != vi2.width
  67 |       || vi.num_frames != vi2.num_frames
  68 |       || !vi.IsSameColorspace(vi2))
  69 |     {
  70 |       env->ThrowError("eedi3:  mclip doesn't match source clip!\n");
  71 |     }
  72 |   }
  73 | 
  74 |   if (opt == 2)
  75 |   {
  76 |     _sse2_flag = true;
  77 |   }
  78 |   else if (opt != 1)
  79 |   {
  80 |     _sse2_flag = ((env->GetCPUFlags() & CPUF_SSE2) != 0);
  81 |   }
  82 |   _sse2_flag = (_sse2_flag && !hp);	// Half-pel SSE2 not implemented yet
  83 | 
  84 |   if (field > 1)
  85 |   {
  86 |     vi.num_frames *= 2;
  87 |     vi.SetFPS(vi.fps_numerator * 2, vi.fps_denominator);
  88 |   }
  89 |   if (dh)
  90 |     vi.height *= 2;
  91 |   vi.SetFieldBased(false);
  92 |   child->SetCacheHints(CACHE_GET_RANGE, 3);
  93 |   mcpPF = 0;
  94 | 
  95 |   const int cpuFlags = env->GetCPUFlags();
  96 | 
  97 |   if (mclip)
  98 |   {
  99 |     ::VideoInfo	vi2 = vi;
 100 |     vi2.height /= 2;
 101 |     mcpPF = new PlanarFrame(vi2, cpuFlags);
 102 |   }
 103 |   srcPF = new PlanarFrame(cpuFlags);
 104 |   if(vi.IsY8())
 105 |     srcPF->createPlanar(vi.height + MARGIN_V * 2, 0,
 106 |       vi.width + MARGIN_H * 2, 0);
 107 |   else
 108 |     srcPF->createPlanar(vi.height + MARGIN_V * 2, (vi.IsYV12() ? (vi.height >> 1) : vi.height) + MARGIN_V * 2,
 109 |       vi.width + MARGIN_H * 2, ((vi.IsYV24() || vi.IsRGB24()) ? vi.width : (vi.width >> 1)) + MARGIN_H * 2);
 110 | 
 111 |   dstPF = new PlanarFrame(vi, cpuFlags);
 112 |   scpPF = new PlanarFrame(vi, cpuFlags);
 113 |   if (_threads > 0)
 114 |     omp_set_num_threads(_threads);
 115 |   const int nthreads = omp_get_max_threads();
 116 |   workspace = (uint8_t**)calloc(nthreads, sizeof(*workspace));
 117 |   dmapa = (int16_t*)_aligned_malloc(dstPF->GetPitch(0)*dstPF->GetHeight(0) * sizeof(*dmapa), 16);
 118 |   if (!workspace || !dmapa)
 119 |     env->ThrowError("eedi3:  malloc failure!\n");
 120 |   const int tpitch = max(mdis * ((hp) ? 4 : 2) + 1, 16);
 121 |   int workspace_size = vi.width * tpitch * 4 * sizeof(float);
 122 |   if (_sse2_flag)
 123 |   {
 124 |     workspace_size = (vi.width + 2 * Eedi3Sse::MARGIN_H) * 4 * sizeof(uint16_t) * Eedi3Sse::COL_H; // src
 125 |     workspace_size += vi.width * 2 * sizeof(int16_t) * Eedi3Sse::COL_H; // dst + dmap
 126 |     workspace_size += (vi.width * sizeof(uint8_t) * Eedi3Sse::COL_H + 15) & -16; // mask
 127 |     workspace_size += vi.width * tpitch * 5 * sizeof(float) * Eedi3Sse::VECTSIZE; // temp
 128 |   }
 129 |   for (int i = 0; i < nthreads; ++i)
 130 |   {
 131 |     workspace[i] = (uint8_t*)_aligned_malloc(workspace_size, 16);
 132 |     if (!workspace[i])
 133 |       env->ThrowError("eedi3:  malloc failure!\n");
 134 |   }
 135 |   if (vcheck > 0 && sclip)
 136 |   {
 137 |     VideoInfo vi2 = sclip->GetVideoInfo();
 138 |     if (vi.height != vi2.height ||
 139 |       vi.width != vi2.width ||
 140 |       vi.num_frames != vi2.num_frames ||
 141 |       !vi.IsSameColorspace(vi2))
 142 |       env->ThrowError("eedi3:  sclip doesn't match!\n");
 143 |   }
 144 | }
 145 | 
 146 | eedi3::~eedi3()
 147 | {
 148 |   delete srcPF;
 149 |   delete dstPF;
 150 |   delete scpPF;
 151 |   delete mcpPF;
 152 |   const int nthreads = omp_get_num_threads();
 153 |   for (int i = 0; i < nthreads; ++i)
 154 |     _aligned_free(workspace[i]);
 155 |   free(workspace);
 156 |   _aligned_free(dmapa);
 157 | }
 158 | 
 159 | void expand_mask(bool bmask[], const uint8_t maskp[], int width, int mdis)
 160 | {
 161 |   const int	minmdis = (width < mdis) ? width : mdis;
 162 | 
 163 |   int			last = -666999;
 164 | 
 165 |   for (int x = 0; x < minmdis; ++x)
 166 |   {
 167 |     if (maskp[x] != 0)
 168 |     {
 169 |       last = x + mdis;
 170 |     }
 171 |   }
 172 | 
 173 |   for (int x = 0; x < width - minmdis; ++x)
 174 |   {
 175 |     if (maskp[x + mdis] != 0)
 176 |     {
 177 |       last = x + mdis * 2;
 178 |     }
 179 |     bmask[x] = (x <= last);
 180 |   }
 181 | 
 182 |   for (int x = width - minmdis; x < width; ++x)
 183 |   {
 184 |     bmask[x] = (x <= last);
 185 |   }
 186 | }
 187 | 
 188 | // Full-pel steps
 189 | void interpLineFP(const uint8_t *srcp, const int width, const int pitch,
 190 |   const float alpha, const float beta, const float gamma, const int nrad,
 191 |   const int mdis, float *temp, uint8_t *dstp, int16_t *dmap, const bool ucubic,
 192 |   const bool cost3, const uint8_t *maskp)
 193 | {
 194 |   const uint8_t *src3p = srcp - 3 * pitch;
 195 |   const uint8_t *src1p = srcp - 1 * pitch;
 196 |   const uint8_t *src1n = srcp + 1 * pitch;
 197 |   const uint8_t *src3n = srcp + 3 * pitch;
 198 |   const int tpitch = mdis * 2 + 1;
 199 |   float *ccosts = temp;	// Array of mdis*2+1 costs for each pixel of the line
 200 |   float *pcosts = ccosts + width * tpitch;
 201 |   int *pbackt = (int*)(pcosts + width * tpitch);
 202 |   int *fpath = pbackt + width * tpitch;
 203 |   bool *bmask = (bool *)(fpath + width);
 204 |   if (maskp != 0)
 205 |   {
 206 |     memset(ccosts, 0, sizeof(ccosts[0]) * tpitch * width);
 207 |     expand_mask(bmask, maskp, width, mdis);
 208 |   }
 209 |   // calculate all connection costs
 210 |   if (!cost3)
 211 |   {
 212 |     for (int x = 0; x < width; ++x)
 213 |     {
 214 |       if (maskp == 0 || bmask[x])
 215 |       {
 216 |         const int umax = min(min(x, width - 1 - x), mdis);
 217 |         for (int u = -umax; u <= umax; ++u)
 218 |         {
 219 |           int s = 0;
 220 |           for (int k = -nrad; k <= nrad; ++k)
 221 |             s +=
 222 |             abs(src3p[x + u + k] - src1p[x - u + k]) +
 223 |             abs(src1p[x + u + k] - src1n[x - u + k]) +
 224 |             abs(src1n[x + u + k] - src3n[x - u + k]);
 225 |           const int ip = (src1p[x + u] + src1n[x - u] + 1) >> 1; // should use cubic if ucubic=true
 226 |           const int v = abs(src1p[x] - ip) + abs(src1n[x] - ip);
 227 |           ccosts[x*tpitch + mdis + u] = alpha * s + beta * abs(u) + (1.0f - alpha - beta)*v;
 228 |         }
 229 |       }
 230 |     }
 231 |   }
 232 |   else
 233 |   {
 234 |     for (int x = 0; x < width; ++x)
 235 |     {
 236 |       if (maskp == 0 || bmask[x])
 237 |       {
 238 |         const int umax = min(min(x, width - 1 - x), mdis);
 239 |         for (int u = -umax; u <= umax; ++u)
 240 |         {
 241 |           int s0 = 0, s1 = -1, s2 = -1;
 242 |           for (int k = -nrad; k <= nrad; ++k)
 243 |             s0 +=
 244 |             abs(src3p[x + u + k] - src1p[x - u + k]) +
 245 |             abs(src1p[x + u + k] - src1n[x - u + k]) +
 246 |             abs(src1n[x + u + k] - src3n[x - u + k]);
 247 |           if ((u >= 0 && x >= u * 2) || (u <= 0 && x < width + u * 2))
 248 |           {
 249 |             s1 = 0;
 250 |             for (int k = -nrad; k <= nrad; ++k)
 251 |               s1 +=
 252 |               abs(src3p[x + k] - src1p[x - u * 2 + k]) +
 253 |               abs(src1p[x + k] - src1n[x - u * 2 + k]) +
 254 |               abs(src1n[x + k] - src3n[x - u * 2 + k]);
 255 |           }
 256 |           if ((u <= 0 && x >= -u * 2) || (u >= 0 && x < width + u * 2)) // LDS: fixed u -> -u
 257 |           {
 258 |             s2 = 0;
 259 |             for (int k = -nrad; k <= nrad; ++k)
 260 |               s2 +=
 261 |               abs(src3p[x + u * 2 + k] - src1p[x + k]) +
 262 |               abs(src1p[x + u * 2 + k] - src1n[x + k]) +
 263 |               abs(src1n[x + u * 2 + k] - src3n[x + k]);
 264 |           }
 265 |           s1 = s1 >= 0 ? s1 : (s2 >= 0 ? s2 : s0);
 266 |           s2 = s2 >= 0 ? s2 : (s1 >= 0 ? s1 : s0);
 267 |           const int ip = (src1p[x + u] + src1n[x - u] + 1) >> 1; // should use cubic if ucubic=true
 268 |           const int v = abs(src1p[x] - ip) + abs(src1n[x] - ip);
 269 |           ccosts[x*tpitch + mdis + u] = alpha * (s0 + s1 + s2)*0.333333f + beta * abs(u) + (1.0f - alpha - beta)*v;
 270 |         }
 271 |       }
 272 |     }
 273 |   }
 274 |   // calculate path costs
 275 |   pcosts[mdis] = ccosts[mdis];
 276 |   for (int x = 1; x < width; ++x)
 277 |   {
 278 |     float *tT = ccosts + x * tpitch;
 279 |     float *ppT = pcosts + (x - 1)*tpitch;
 280 |     float *pT = pcosts + x * tpitch;
 281 |     int   *piT = pbackt + (x - 1)*tpitch;
 282 |     if (maskp != 0 && !bmask[x])
 283 |     {
 284 |       if (x == 1)
 285 |       {
 286 |         const int umax = min(min(x, width - 1 - x), mdis);
 287 |         for (int u = -umax; u <= umax; ++u)
 288 |         {
 289 |           pT[mdis + u] = tT[mdis + u];
 290 |         }
 291 |         memset(piT, 0, sizeof(*piT) * tpitch);
 292 |       }
 293 |       else
 294 |       {
 295 |         memcpy(pT, ppT, sizeof(*ppT) * tpitch);
 296 |         memcpy(piT, piT - tpitch, sizeof(*piT) * tpitch);
 297 |         const int pumax = min(x - 1, width - x);
 298 |         if (pumax < mdis)
 299 |         {
 300 |           piT[mdis - pumax] = 1 - pumax;
 301 |           piT[mdis + pumax] = pumax - 1;
 302 |         }
 303 |       }
 304 |     }
 305 |     else
 306 |     {
 307 |       const int umax = min(min(x, width - 1 - x), mdis);
 308 |       for (int u = -umax; u <= umax; ++u)
 309 |       {
 310 |         int idx;
 311 |         float bval = FLT_MAX;
 312 |         const int umax2 = min(min(x - 1, width - x), mdis);
 313 |         for (int v = max(-umax2, u - 1); v <= min(umax2, u + 1); ++v)
 314 |         {
 315 |           const double y = ppT[mdis + v] + gamma * abs(u - v);
 316 |           const float ccost = (float)min(y, FLT_MAX*0.9);
 317 |           if (ccost < bval)
 318 |           {
 319 |             bval = ccost;
 320 |             idx = v;
 321 |           }
 322 |         }
 323 |         const double y = bval + tT[mdis + u];
 324 |         pT[mdis + u] = (float)min(y, FLT_MAX*0.9);
 325 |         piT[mdis + u] = idx;
 326 |       }
 327 |     }
 328 |   }
 329 |   // backtrack
 330 |   fpath[width - 1] = 0;
 331 |   for (int x = width - 2; x >= 0; --x)
 332 |     fpath[x] = pbackt[x*tpitch + mdis + fpath[x + 1]];
 333 |   // interpolate
 334 |   for (int x = 0; x < width; ++x)
 335 |   {
 336 |     if (maskp != 0 && !bmask[x])
 337 |     {
 338 |       dmap[x] = 0;
 339 |       if (ucubic)
 340 |       {
 341 |         dstp[x] = min(max((9 * (src1p[x] + src1n[x]) -
 342 |           (src3p[x] + src3n[x]) + 8) >> 4, 0), 255);
 343 |       }
 344 |       else
 345 |       {
 346 |         dstp[x] = (src1p[x] + src1n[x] + 1) >> 1;
 347 |       }
 348 |     }
 349 |     else
 350 |     {
 351 |       const int dir = fpath[x];
 352 |       dmap[x] = dir;
 353 |       const int ad = abs(dir);
 354 |       if (ucubic && x >= ad * 3 && x <= width - 1 - ad * 3)
 355 |         dstp[x] = min(max((9 * (src1p[x + dir] + src1n[x - dir]) -
 356 |         (src3p[x + dir * 3] + src3n[x - dir * 3]) + 8) >> 4, 0), 255);
 357 |       else
 358 |         dstp[x] = (src1p[x + dir] + src1n[x - dir] + 1) >> 1;
 359 |     }
 360 |   }
 361 | }
 362 | 
 363 | // Half-pel steps
 364 | void interpLineHP(const uint8_t *srcp, const int width, const int pitch,
 365 |   const float alpha, const float beta, const float gamma, const int nrad,
 366 |   const int mdis, float *temp, uint8_t *dstp, int16_t *dmap, const bool ucubic,
 367 |   const bool cost3, const uint8_t *maskp)
 368 | {
 369 |   const uint8_t *src3p = srcp - 3 * pitch;
 370 |   const uint8_t *src1p = srcp - 1 * pitch;
 371 |   const uint8_t *src1n = srcp + 1 * pitch;
 372 |   const uint8_t *src3n = srcp + 3 * pitch;
 373 |   const int tpitch = mdis * 4 + 1;
 374 |   float *ccosts = temp;
 375 |   float *pcosts = ccosts + width * tpitch;
 376 |   int *pbackt = (int*)(pcosts + width * tpitch);
 377 |   int *fpath = pbackt + width * tpitch;
 378 |   uint8_t *hp3p = (uint8_t*)fpath;
 379 |   uint8_t *hp1p = hp3p + width;
 380 |   uint8_t *hp1n = hp1p + width;
 381 |   uint8_t *hp3n = hp1n + width;
 382 |   bool *bmask = (bool *)(hp3n + width);
 383 |   // calculate half pel values
 384 |   for (int x = 0; x < width - 1; ++x)
 385 |   {
 386 |     if (!ucubic || (x == 0 || x == width - 2))
 387 |     {
 388 |       hp3p[x] = (src3p[x] + src3p[x + 1] + 1) >> 1;
 389 |       hp1p[x] = (src1p[x] + src1p[x + 1] + 1) >> 1;
 390 |       hp1n[x] = (src1n[x] + src1n[x + 1] + 1) >> 1;
 391 |       hp3n[x] = (src3n[x] + src3n[x + 1] + 1) >> 1;
 392 |     }
 393 |     else
 394 |     {
 395 |       hp3p[x] = min(max((9 * (src3p[x] + src3p[x + 1]) - (src3p[x - 1] + src3p[x + 2]) + 8) >> 4, 0), 255);
 396 |       hp1p[x] = min(max((9 * (src1p[x] + src1p[x + 1]) - (src1p[x - 1] + src1p[x + 2]) + 8) >> 4, 0), 255);
 397 |       hp1n[x] = min(max((9 * (src1n[x] + src1n[x + 1]) - (src1n[x - 1] + src1n[x + 2]) + 8) >> 4, 0), 255);
 398 |       hp3n[x] = min(max((9 * (src3n[x] + src3n[x + 1]) - (src3n[x - 1] + src3n[x + 2]) + 8) >> 4, 0), 255);
 399 |     }
 400 |   }
 401 |   if (maskp != 0)
 402 |   {
 403 |     memset(ccosts, 0, sizeof(ccosts[0]) * tpitch * width);
 404 |     expand_mask(bmask, maskp, width, mdis);
 405 |   }
 406 |   // calculate all connection costs
 407 |   if (!cost3)
 408 |   {
 409 |     for (int x = 0; x < width; ++x)
 410 |     {
 411 |       if (maskp == 0 || bmask[x])
 412 |       {
 413 |         const int umax = min(min(x, width - 1 - x), mdis);
 414 |         for (int u = -umax * 2; u <= umax * 2; ++u)
 415 |         {
 416 |           int s = 0, ip;
 417 |           const int u2 = u >> 1;
 418 |           if (!(u & 1))
 419 |           {
 420 |             for (int k = -nrad; k <= nrad; ++k)
 421 |               s +=
 422 |               abs(src3p[x + u2 + k] - src1p[x - u2 + k]) +
 423 |               abs(src1p[x + u2 + k] - src1n[x - u2 + k]) +
 424 |               abs(src1n[x + u2 + k] - src3n[x - u2 + k]);
 425 |             ip = (src1p[x + u2] + src1n[x - u2] + 1) >> 1; // should use cubic if ucubic=true
 426 |           }
 427 |           else
 428 |           {
 429 |             for (int k = -nrad; k <= nrad; ++k)
 430 |               s +=
 431 |               abs(hp3p[x + u2 + k] - hp1p[x - u2 - 1 + k]) +
 432 |               abs(hp1p[x + u2 + k] - hp1n[x - u2 - 1 + k]) +
 433 |               abs(hp1n[x + u2 + k] - hp3n[x - u2 - 1 + k]);
 434 |             ip = (hp1p[x + u2] + hp1n[x - u2 - 1] + 1) >> 1; // should use cubic if ucubic=true
 435 |           }
 436 |           const int v = abs(src1p[x] - ip) + abs(src1n[x] - ip);
 437 |           ccosts[x*tpitch + mdis * 2 + u] = alpha * s + beta * abs(u)*0.5f + (1.0f - alpha - beta)*v;
 438 |         }
 439 |       }
 440 |     }
 441 |   }
 442 |   else
 443 |   {
 444 |     for (int x = 0; x < width; ++x)
 445 |     {
 446 |       if (maskp == 0 || bmask[x])
 447 |       {
 448 |         const int umax = min(min(x, width - 1 - x), mdis);
 449 |         for (int u = -umax * 2; u <= umax * 2; ++u)
 450 |         {
 451 |           int s0 = 0, s1 = -1, s2 = -1, ip;
 452 |           const int u2 = u >> 1;
 453 |           if (!(u & 1))
 454 |           {
 455 |             for (int k = -nrad; k <= nrad; ++k)
 456 |               s0 +=
 457 |               abs(src3p[x + u2 + k] - src1p[x - u2 + k]) +
 458 |               abs(src1p[x + u2 + k] - src1n[x - u2 + k]) +
 459 |               abs(src1n[x + u2 + k] - src3n[x - u2 + k]);
 460 |             ip = (src1p[x + u2] + src1n[x - u2] + 1) >> 1; // should use cubic if ucubic=true
 461 |           }
 462 |           else
 463 |           {
 464 |             for (int k = -nrad; k <= nrad; ++k)
 465 |               s0 +=
 466 |               abs(hp3p[x + u2 + k] - hp1p[x - u2 - 1 + k]) +
 467 |               abs(hp1p[x + u2 + k] - hp1n[x - u2 - 1 + k]) +
 468 |               abs(hp1n[x + u2 + k] - hp3n[x - u2 - 1 + k]);
 469 |             ip = (hp1p[x + u2] + hp1n[x - u2 - 1] + 1) >> 1; // should use cubic if ucubic=true
 470 |           }
 471 |           if ((u >= 0 && x >= u) || (u <= 0 && x < width + u))
 472 |           {
 473 |             s1 = 0;
 474 |             for (int k = -nrad; k <= nrad; ++k)
 475 |               s1 +=
 476 |               abs(src3p[x + k] - src1p[x - u + k]) +
 477 |               abs(src1p[x + k] - src1n[x - u + k]) +
 478 |               abs(src1n[x + k] - src3n[x - u + k]);
 479 |           }
 480 |           if ((u <= 0 && x >= -u) || (u >= 0 && x < width + u)) // LDS: fixed u -> -u
 481 |           {
 482 |             s2 = 0;
 483 |             for (int k = -nrad; k <= nrad; ++k)
 484 |               s2 +=
 485 |               abs(src3p[x + u + k] - src1p[x + k]) +
 486 |               abs(src1p[x + u + k] - src1n[x + k]) +
 487 |               abs(src1n[x + u + k] - src3n[x + k]);
 488 |           }
 489 |           s1 = s1 >= 0 ? s1 : (s2 >= 0 ? s2 : s0);
 490 |           s2 = s2 >= 0 ? s2 : (s1 >= 0 ? s1 : s0);
 491 |           const int v = abs(src1p[x] - ip) + abs(src1n[x] - ip);
 492 |           ccosts[x*tpitch + mdis * 2 + u] = alpha * (s0 + s1 + s2)*0.333333f + beta * abs(u)*0.5f + (1.0f - alpha - beta)*v;
 493 |         }
 494 |       }
 495 |     }
 496 |   }
 497 |   // calculate path costs
 498 |   pcosts[mdis * 2] = ccosts[mdis * 2];
 499 |   for (int x = 1; x < width; ++x)
 500 |   {
 501 |     float *tT = ccosts + x * tpitch;
 502 |     float *ppT = pcosts + (x - 1)*tpitch;
 503 |     float *pT = pcosts + x * tpitch;
 504 |     int *piT = pbackt + (x - 1)*tpitch;
 505 |     if (maskp != 0 && !bmask[x])
 506 |     {
 507 |       if (x == 1)
 508 |       {
 509 |         const int umax = min(min(x, width - 1 - x), mdis);
 510 |         for (int u = -umax * 2; u <= umax * 2; ++u)
 511 |         {
 512 |           pT[mdis * 2 + u] = tT[mdis * 2 + u];
 513 |         }
 514 |         memset(piT, 0, sizeof(*piT) * tpitch);
 515 |       }
 516 |       else
 517 |       {
 518 |         memcpy(pT, ppT, sizeof(*ppT) * tpitch);
 519 |         memcpy(piT, piT - tpitch, sizeof(*piT) * tpitch);
 520 |         const int pumax = min(x - 1, width - x);
 521 |         if (pumax < mdis)
 522 |         {
 523 |           piT[mdis - pumax * 2] = (1 - pumax) * 2;
 524 |           piT[mdis - pumax * 2 + 1] = (1 - pumax) * 2;
 525 |           piT[mdis + pumax * 2 - 1] = (pumax - 1) * 2;
 526 |           piT[mdis + pumax * 2] = (pumax - 1) * 2;
 527 |         }
 528 |       }
 529 |     }
 530 |     else
 531 |     {
 532 |       const int umax = min(min(x, width - 1 - x), mdis);
 533 |       for (int u = -umax * 2; u <= umax * 2; ++u)
 534 |       {
 535 |         int idx;
 536 |         float bval = FLT_MAX;
 537 |         const int umax2 = min(min(x - 1, width - x), mdis);
 538 |         for (int v = max(-umax2 * 2, u - 2); v <= min(umax2 * 2, u + 2); ++v)
 539 |         {
 540 |           const double y = ppT[mdis * 2 + v] + gamma * abs(u - v)*0.5f;
 541 |           const float ccost = (float)min(y, FLT_MAX*0.9);
 542 |           if (ccost < bval)
 543 |           {
 544 |             bval = ccost;
 545 |             idx = v;
 546 |           }
 547 |         }
 548 |         const double y = bval + tT[mdis * 2 + u];
 549 |         pT[mdis * 2 + u] = (float)min(y, FLT_MAX*0.9);
 550 |         piT[mdis * 2 + u] = idx;
 551 |       }
 552 |     }
 553 |   }
 554 |   // backtrack
 555 |   fpath[width - 1] = 0;
 556 |   for (int x = width - 2; x >= 0; --x)
 557 |     fpath[x] = pbackt[x*tpitch + mdis * 2 + fpath[x + 1]];
 558 |   // interpolate
 559 |   for (int x = 0; x < width; ++x)
 560 |   {
 561 |     if (maskp != 0 && !bmask[x])
 562 |     {
 563 |       dmap[x] = 0;
 564 |       if (ucubic)
 565 |         dstp[x] = min(max((9 * (src1p[x] + src1n[x]) -
 566 |         (src3p[x] + src3n[x]) + 8) >> 4, 0), 255);
 567 |       else
 568 |         dstp[x] = (src1p[x] + src1n[x] + 1) >> 1;
 569 |     }
 570 |     else
 571 |     {
 572 |       const int dir = fpath[x];
 573 |       dmap[x] = dir;
 574 |       if (!(dir & 1))
 575 |       {
 576 |         const int d2 = dir >> 1;
 577 |         const int ad = abs(d2);
 578 |         if (ucubic && x >= ad * 3 && x <= width - 1 - ad * 3)
 579 |           dstp[x] = min(max((9 * (src1p[x + d2] + src1n[x - d2]) -
 580 |           (src3p[x + d2 * 3] + src3n[x - d2 * 3]) + 8) >> 4, 0), 255);
 581 |         else
 582 |           dstp[x] = (src1p[x + d2] + src1n[x - d2] + 1) >> 1;
 583 |       }
 584 |       else
 585 |       {
 586 |         const int d20 = dir >> 1;
 587 |         const int d21 = (dir + 1) >> 1;
 588 |         const int d30 = (dir * 3) >> 1;
 589 |         const int d31 = (dir * 3 + 1) >> 1;
 590 |         const int ad = max(abs(d30), abs(d31));
 591 |         if (ucubic && x >= ad && x <= width - 1 - ad)
 592 |         {
 593 |           const int c0 = src3p[x + d30] + src3p[x + d31];
 594 |           const int c1 = src1p[x + d20] + src1p[x + d21]; // should use cubic if ucubic=true
 595 |           const int c2 = src1n[x - d20] + src1n[x - d21]; // should use cubic if ucubic=true
 596 |           const int c3 = src3n[x - d30] + src3n[x - d31];
 597 |           dstp[x] = min(max((9 * (c1 + c2) - (c0 + c3) + 16) >> 5, 0), 255);
 598 |         }
 599 |         else
 600 |           dstp[x] = (src1p[x + d20] + src1p[x + d21] + src1n[x - d20] + src1n[x - d21] + 2) >> 2;
 601 |       }
 602 |     }
 603 |   }
 604 | }
 605 | 
 606 | PVideoFrame __stdcall eedi3::GetFrame(int n, IScriptEnvironment *env)
 607 | {
 608 |   int field_n;
 609 |   int field_s = n;
 610 |   if (field > 1)
 611 |   {
 612 |     if (n & 1) field_n = field == 3 ? 0 : 1;
 613 |     else field_n = field == 3 ? 1 : 0;
 614 |     field_s >>= 1;
 615 |   }
 616 |   else
 617 |     field_n = field;
 618 |   copyPad(field_s, field_n, env);
 619 |   if (mclip)
 620 |   {
 621 |     copyMask(field_s, field_n, env);
 622 |   }
 623 |   if (vcheck > 0 && sclip)
 624 |     scpPF->copyFrom(sclip->GetFrame(n, env), vi);
 625 |   int planecount = vi.IsY8() ? 1 : 3;
 626 |   for (int b = 0; b < planecount; ++b)
 627 |   {
 628 |     if ((b == 0 && !Y) ||
 629 |       (b == 1 && !U) ||
 630 |       (b == 2 && !V))
 631 |       continue;
 632 |     const uint8_t *srcp = srcPF->GetPtr(b);
 633 |     const int spitch = srcPF->GetPitch(b);
 634 |     const int width = srcPF->GetWidth(b);
 635 |     const int height = srcPF->GetHeight(b);
 636 |     uint8_t *dstp = dstPF->GetPtr(b);
 637 |     const int dpitch = dstPF->GetPitch(b);
 638 |     env->BitBlt(dstp + (1 - field_n)*dpitch,
 639 |       dpitch * 2, srcp + (MARGIN_V + 1 - field_n)*spitch + MARGIN_H,
 640 |       spitch * 2, width - MARGIN_H * 2, (height - MARGIN_V * 2) >> 1);
 641 |     uint8_t *   maskp_base = 0;
 642 |     int               mpitch = 0;
 643 |     if (mclip)
 644 |     {
 645 |       maskp_base = mcpPF->GetPtr(b);
 646 |       mpitch = mcpPF->GetPitch(b);
 647 |     }
 648 | 
 649 |     // SSE2
 650 |     if (_sse2_flag)
 651 |     {
 652 |       assert(!hp);
 653 | 
 654 |       srcp += MARGIN_V * spitch;
 655 |       dstp += field_n * dpitch;
 656 | 
 657 |       const int   plane_w = width - MARGIN_H * 2;
 658 |       const int   plane_h = height - MARGIN_V * 2;
 659 |       const int   plane_hs = (plane_h + field_n) >> 1; // Number of existing source lines
 660 |       const int   plane_hi = plane_h - plane_hs;       // Number of interpolated lines
 661 |       const int   packedline_stride_pix = plane_w + 2 * Eedi3Sse::MARGIN_H;
 662 |       const int   packedline_stride =
 663 |         packedline_stride_pix * sizeof(uint16_t) * Eedi3Sse::COL_H;
 664 | 
 665 |       // ~99% of the processing time is spent in this loop
 666 | #pragma omp parallel for
 667 |       for (int y = field_n; y < plane_h; y += 2 * Eedi3Sse::COL_H)
 668 |       {
 669 |         const int      tidx = omp_get_thread_num();
 670 |         const int      off = (y - field_n) >> 1;
 671 |         uint8_t* maskp = 0;
 672 |         uint8_t *      src_ptr = workspace[tidx];
 673 |         uint8_t *      dst_ptr = src_ptr + 4 * packedline_stride;
 674 |         uint8_t *      dma_ptr = dst_ptr + plane_w * Eedi3Sse::COL_H * sizeof(uint16_t);
 675 |         uint8_t *      msk_ptr = dma_ptr + ((plane_w * Eedi3Sse::COL_H * sizeof(uint8_t) + 15) & -16);
 676 |         uint8_t *      tmp_ptr = msk_ptr + plane_w * Eedi3Sse::COL_H * sizeof(int16_t);
 677 |         if (maskp_base == 0)
 678 |         {
 679 |           msk_ptr = 0;
 680 |         }
 681 |         else
 682 |         {
 683 |           Eedi3Sse::prepare_mask_8bits(
 684 |             msk_ptr,
 685 |             maskp_base,
 686 |             mpitch,
 687 |             1,
 688 |             plane_w,
 689 |             plane_hs,
 690 |             off
 691 |           );
 692 |         }
 693 |         Eedi3Sse::prepare_lines_8bits(
 694 |           reinterpret_cast <uint16_t *> (src_ptr),
 695 |           packedline_stride_pix,
 696 |           srcp + spitch * (1 - field_n) + MARGIN_H,  // +spitch* because the C++ version points on the interpolated line. We need the next one.
 697 |           spitch * 2,
 698 |           1,
 699 |           plane_w,
 700 |           plane_hs,
 701 |           off + field_n
 702 |         );
 703 |         Eedi3Sse::interp_lines_full_pel(
 704 |           reinterpret_cast <const __m128i *> (src_ptr),
 705 |           reinterpret_cast <__m128i *> (dst_ptr),
 706 |           msk_ptr,
 707 |           tmp_ptr,
 708 |           reinterpret_cast <__m128i *> (dma_ptr),
 709 |           plane_w,
 710 |           packedline_stride_pix,
 711 |           alpha, beta, gamma,
 712 |           nrad, mdis, ucubic, cost3
 713 |         );
 714 |         Eedi3Sse::copy_result_lines_8bits(
 715 |           dstp,
 716 |           dpitch * 2,
 717 |           reinterpret_cast <const uint16_t *> (dst_ptr),
 718 |           plane_w,
 719 |           1,
 720 |           plane_w,
 721 |           plane_hi,
 722 |           off
 723 |         );
 724 |         if (vcheck > 0)
 725 |         {
 726 |           Eedi3Sse::copy_result_dmap(
 727 |             dmapa,
 728 |             dpitch,
 729 |             reinterpret_cast <int16_t *> (dma_ptr),
 730 |             plane_w,
 731 |             plane_w,
 732 |             plane_hi,
 733 |             off
 734 |           );
 735 |         }
 736 |       }
 737 | 
 738 |       srcp += field_n * spitch;
 739 |     }
 740 | 
 741 |     // C++ only
 742 |     else
 743 |     {
 744 |       srcp += (MARGIN_V + field_n)*spitch;
 745 |       dstp += field_n * dpitch;
 746 | 
 747 |       // ~99% of the processing time is spent in this loop
 748 | #pragma omp parallel for
 749 |       for (int y = MARGIN_V + field_n; y < height - MARGIN_V; y += 2)
 750 |       {
 751 |         const int tidx = omp_get_thread_num();
 752 |         const int off = (y - MARGIN_V - field_n) >> 1;
 753 |         uint8_t* maskp = 0;
 754 |         if (maskp_base != 0)
 755 |         {
 756 |           maskp = maskp_base + mpitch * off;
 757 |         }
 758 |         if (hp)
 759 |           interpLineHP(srcp + MARGIN_H + off * 2 * spitch, width - MARGIN_H * 2, spitch, alpha, beta,
 760 |             gamma, nrad, mdis, (float*)(workspace[tidx]), dstp + off * 2 * dpitch,
 761 |             dmapa + off * dpitch, ucubic, cost3, maskp);
 762 |         else
 763 |           interpLineFP(srcp + MARGIN_H + off * 2 * spitch, width - MARGIN_H * 2, spitch, alpha, beta,
 764 |             gamma, nrad, mdis, (float*)(workspace[tidx]), dstp + off * 2 * dpitch,
 765 |             dmapa + off * dpitch, ucubic, cost3, maskp);
 766 |       }
 767 |     }
 768 |     if (vcheck > 0)
 769 |     {
 770 |       int16_t *dstpd = dmapa;
 771 |       const uint8_t *scpp = NULL;
 772 |       int scpitch;
 773 |       if (sclip)
 774 |       {
 775 |         scpitch = scpPF->GetPitch(b);
 776 |         scpp = scpPF->GetPtr(b) + field_n * scpitch;
 777 |       }
 778 |       for (int y = MARGIN_V + field_n; y < height - MARGIN_V; y += 2)
 779 |       {
 780 |         if (y >= 6 && y < height - 6)
 781 |         {
 782 |           const uint8_t *dst3p = srcp - 3 * spitch + MARGIN_H;
 783 |           const uint8_t *dst2p = dstp - 2 * dpitch;
 784 |           const uint8_t *dst1p = dstp - 1 * dpitch;
 785 |           const uint8_t *dst1n = dstp + 1 * dpitch;
 786 |           const uint8_t *dst2n = dstp + 2 * dpitch;
 787 |           const uint8_t *dst3n = srcp + 3 * spitch + MARGIN_H;
 788 |           uint8_t *tline = workspace[0];
 789 |           for (int x = 0; x < width - MARGIN_H * 2; ++x)
 790 |           {
 791 |             const int dirc = dstpd[x];
 792 |             const int cint = scpp ? scpp[x] :
 793 |               min(max((9 * (dst1p[x] + dst1n[x]) - (dst3p[x] + dst3n[x]) + 8) >> 4, 0), 255);
 794 |             if (dirc == 0)
 795 |             {
 796 |               tline[x] = cint;
 797 |               continue;
 798 |             }
 799 |             const int dirt = dstpd[x - dpitch];
 800 |             const int dirb = dstpd[x + dpitch];
 801 |             if (max(dirc*dirt, dirc*dirb) < 0 || (dirt == dirb && dirt == 0))
 802 |             {
 803 |               tline[x] = cint;
 804 |               continue;
 805 |             }
 806 |             int it, ib, vt, vb, vc;
 807 |             vc = abs(dstp[x] - dst1p[x]) + abs(dstp[x] - dst1n[x]);
 808 |             if (hp)
 809 |             {
 810 |               if (!(dirc & 1))
 811 |               {
 812 |                 const int d2 = dirc >> 1;
 813 |                 it = (dst2p[x + d2] + dstp[x - d2] + 1) >> 1;
 814 |                 vt = abs(dst2p[x + d2] - dst1p[x + d2]) + abs(dstp[x + d2] - dst1p[x + d2]);
 815 |                 ib = (dstp[x + d2] + dst2n[x - d2] + 1) >> 1;
 816 |                 vb = abs(dst2n[x - d2] - dst1n[x - d2]) + abs(dstp[x - d2] - dst1n[x - d2]);
 817 |               }
 818 |               else
 819 |               {
 820 |                 const int d20 = dirc >> 1;
 821 |                 const int d21 = (dirc + 1) >> 1;
 822 |                 const int pa2p = dst2p[x + d20] + dst2p[x + d21] + 1;
 823 |                 const int pa1p = dst1p[x + d20] + dst1p[x + d21] + 1;
 824 |                 const int ps0 = dstp[x - d20] + dstp[x - d21] + 1;
 825 |                 const int pa0 = dstp[x + d20] + dstp[x + d21] + 1;
 826 |                 const int ps1n = dst1n[x - d20] + dst1n[x - d21] + 1;
 827 |                 const int ps2n = dst2n[x - d20] + dst2n[x - d21] + 1;
 828 |                 it = (pa2p + ps0) >> 2;
 829 |                 vt = (abs(pa2p - pa1p) + abs(pa0 - pa1p)) >> 1;
 830 |                 ib = (pa0 + ps2n) >> 2;
 831 |                 vb = (abs(ps2n - ps1n) + abs(ps0 - ps1n)) >> 1;
 832 |               }
 833 |             }
 834 |             else
 835 |             {
 836 |               it = (dst2p[x + dirc] + dstp[x - dirc] + 1) >> 1;
 837 |               vt = abs(dst2p[x + dirc] - dst1p[x + dirc]) + abs(dstp[x + dirc] - dst1p[x + dirc]);
 838 |               ib = (dstp[x + dirc] + dst2n[x - dirc] + 1) >> 1;
 839 |               vb = abs(dst2n[x - dirc] - dst1n[x - dirc]) + abs(dstp[x - dirc] - dst1n[x - dirc]);
 840 |             }
 841 |             const int d0 = abs(it - dst1p[x]);
 842 |             const int d1 = abs(ib - dst1n[x]);
 843 |             const int d2 = abs(vt - vc);
 844 |             const int d3 = abs(vb - vc);
 845 |             const int mdiff0 = vcheck == 1 ? min(d0, d1) : vcheck == 2 ? ((d0 + d1 + 1) >> 1) : max(d0, d1);
 846 |             const int mdiff1 = vcheck == 1 ? min(d2, d3) : vcheck == 2 ? ((d2 + d3 + 1) >> 1) : max(d2, d3);
 847 |             const float a0 = mdiff0 / vthresh0;
 848 |             const float a1 = mdiff1 / vthresh1;
 849 |             const int dircv = hp ? (abs(dirc) >> 1) : abs(dirc);
 850 |             const float a2 = max((vthresh2 - dircv) / vthresh2, 0.0f);
 851 |             const float a = min(max(max(a0, a1), a2), 1.0f);
 852 |             tline[x] = (int)((1.0 - a)*dstp[x] + a * cint);
 853 |           }
 854 |           memcpy(dstp, tline, width - MARGIN_H * 2);
 855 |         }
 856 |         srcp += 2 * spitch;
 857 |         dstp += 2 * dpitch;
 858 |         if (scpp)
 859 |           scpp += 2 * scpitch;
 860 |         dstpd += dpitch;
 861 |       }
 862 |     }
 863 |   }
 864 |   PVideoFrame dst = env->NewVideoFrame(vi);
 865 |   dstPF->copyTo(dst, vi);
 866 |   return dst;
 867 | }
 868 | 
 869 | void eedi3::copyPad(int n, int fn, IScriptEnvironment *env)
 870 | {
 871 |   const int off = 1 - fn;
 872 |   PVideoFrame src = child->GetFrame(n, env);
 873 |   int planecount = 3; // rgb24 and YUY2 is converted to 3 planes too
 874 |   if (!dh)
 875 |   {
 876 |     if (vi.IsY8() || vi.IsYV12() || vi.IsYV16() || vi.IsYV24())
 877 |     {
 878 |       const int plane[3] = { PLANAR_Y, PLANAR_U, PLANAR_V };
 879 |       planecount = vi.NumComponents(); // override for Y8
 880 |       for (int b = 0; b < planecount; ++b)
 881 |         env->BitBlt(srcPF->GetPtr(b) + srcPF->GetPitch(b)*(MARGIN_V + off) + MARGIN_H,
 882 |           srcPF->GetPitch(b) * 2,
 883 |           src->GetReadPtr(plane[b]) + src->GetPitch(plane[b])*off,
 884 |           src->GetPitch(plane[b]) * 2, src->GetRowSize(plane[b]),
 885 |           src->GetHeight(plane[b]) >> 1);
 886 |     }
 887 |     else if (vi.IsYUY2())
 888 |     {
 889 |       srcPF->convYUY2to422(src->GetReadPtr() + src->GetPitch()*off,
 890 |         srcPF->GetPtr(0) + srcPF->GetPitch(0)*(MARGIN_V + off) + MARGIN_H,
 891 |         srcPF->GetPtr(1) + srcPF->GetPitch(1)*(MARGIN_V + off) + MARGIN_H,
 892 |         srcPF->GetPtr(2) + srcPF->GetPitch(2)*(MARGIN_V + off) + MARGIN_H,
 893 |         src->GetPitch() * 2, srcPF->GetPitch(0) * 2, srcPF->GetPitch(1) * 2,
 894 |         vi.width, vi.height >> 1);
 895 |     }
 896 |     else
 897 |     {
 898 |       srcPF->convRGB24to444(src->GetReadPtr() + (vi.height - 1 - off)*src->GetPitch(),
 899 |         srcPF->GetPtr(0) + srcPF->GetPitch(0)*(MARGIN_V + off) + MARGIN_H,
 900 |         srcPF->GetPtr(1) + srcPF->GetPitch(1)*(MARGIN_V + off) + MARGIN_H,
 901 |         srcPF->GetPtr(2) + srcPF->GetPitch(2)*(MARGIN_V + off) + MARGIN_H,
 902 |         -src->GetPitch() * 2, srcPF->GetPitch(0) * 2, srcPF->GetPitch(1) * 2,
 903 |         vi.width, vi.height >> 1);
 904 |     }
 905 |   }
 906 |   else
 907 |   {
 908 |     if (vi.IsY8() || vi.IsYV12() || vi.IsYV16() || vi.IsYV24())
 909 |     {
 910 |       const int plane[3] = { PLANAR_Y, PLANAR_U, PLANAR_V };
 911 |       planecount = vi.NumComponents(); // override for Y8
 912 |       for (int b = 0; b < planecount; ++b)
 913 |         env->BitBlt(srcPF->GetPtr(b) + srcPF->GetPitch(b)*(MARGIN_V + off) + MARGIN_H,
 914 |           srcPF->GetPitch(b) * 2, src->GetReadPtr(plane[b]),
 915 |           src->GetPitch(plane[b]), src->GetRowSize(plane[b]),
 916 |           src->GetHeight(plane[b]));
 917 |     }
 918 |     else if (vi.IsYUY2())
 919 |     {
 920 |       srcPF->convYUY2to422(src->GetReadPtr(),
 921 |         srcPF->GetPtr(0) + srcPF->GetPitch(0)*(MARGIN_V + off) + MARGIN_H,
 922 |         srcPF->GetPtr(1) + srcPF->GetPitch(1)*(MARGIN_V + off) + MARGIN_H,
 923 |         srcPF->GetPtr(2) + srcPF->GetPitch(2)*(MARGIN_V + off) + MARGIN_H,
 924 |         src->GetPitch(), srcPF->GetPitch(0) * 2, srcPF->GetPitch(1) * 2,
 925 |         vi.width, vi.height >> 1);
 926 |     }
 927 |     else
 928 |     {
 929 |       srcPF->convRGB24to444(src->GetReadPtr() + ((vi.height >> 1) - 1)*src->GetPitch(),
 930 |         srcPF->GetPtr(0) + srcPF->GetPitch(0)*(MARGIN_V + off) + MARGIN_H,
 931 |         srcPF->GetPtr(1) + srcPF->GetPitch(1)*(MARGIN_V + off) + MARGIN_H,
 932 |         srcPF->GetPtr(2) + srcPF->GetPitch(2)*(MARGIN_V + off) + MARGIN_H,
 933 |         -src->GetPitch(), srcPF->GetPitch(0) * 2, srcPF->GetPitch(1) * 2,
 934 |         vi.width, vi.height >> 1);
 935 |     }
 936 |   }
 937 |   for (int b = 0; b < planecount; ++b)
 938 |   {
 939 |     uint8_t *dstp = srcPF->GetPtr(b);
 940 |     const int dst_pitch = srcPF->GetPitch(b);
 941 |     const int height = srcPF->GetHeight(b);
 942 |     const int width = srcPF->GetWidth(b);
 943 |     dstp += (MARGIN_V + off)*dst_pitch;
 944 |     for (int y = MARGIN_V + off; y < height - MARGIN_V; y += 2)
 945 |     {
 946 |       for (int x = 0; x < MARGIN_H; ++x)
 947 |         dstp[x] = dstp[MARGIN_H * 2 - x];
 948 |       int c = 2;
 949 |       for (int x = width - MARGIN_H; x < width; ++x, c += 2)
 950 |         dstp[x] = dstp[x - c];
 951 |       dstp += dst_pitch * 2;
 952 |     }
 953 |     dstp = srcPF->GetPtr(b);
 954 |     for (int y = off; y < MARGIN_V; y += 2)
 955 |       env->BitBlt(dstp + y * dst_pitch, dst_pitch,
 956 |         dstp + (MARGIN_V * 2 - y)*dst_pitch, dst_pitch, width, 1);
 957 |     int c = 2 + 2 * off;
 958 |     for (int y = height - MARGIN_V + off; y < height; y += 2, c += 4)
 959 |       env->BitBlt(dstp + y * dst_pitch, dst_pitch,
 960 |         dstp + (y - c)*dst_pitch, dst_pitch, width, 1);
 961 |   }
 962 | }
 963 | 
 964 | void	eedi3::copyMask(int n, int fn, IScriptEnvironment *env)
 965 | {
 966 |   const int off = (dh) ? 0 : fn;
 967 |   const int mul = (dh) ? 1 : 2;
 968 |   PVideoFrame src = mclip->GetFrame(n, env);
 969 |   if (vi.IsY8() || vi.IsYV12() || vi.IsYV16() || vi.IsYV24())
 970 |   {
 971 |     const int plane[3] = { PLANAR_Y, PLANAR_U, PLANAR_V };
 972 |     int planecount = vi.NumComponents(); // override for Y8
 973 |     for (int b = 0; b < planecount; ++b)
 974 |       env->BitBlt(
 975 |         mcpPF->GetPtr(b),
 976 |         mcpPF->GetPitch(b),
 977 |         src->GetReadPtr(plane[b]) + src->GetPitch(plane[b]) * off,
 978 |         src->GetPitch(plane[b]) * mul,
 979 |         src->GetRowSize(plane[b]),
 980 |         mcpPF->GetHeight(b)
 981 |       );
 982 |   }
 983 |   else if (vi.IsYUY2())
 984 |   {
 985 |     mcpPF->convYUY2to422(
 986 |       src->GetReadPtr() + src->GetPitch() * off,
 987 |       mcpPF->GetPtr(0),
 988 |       mcpPF->GetPtr(1),
 989 |       mcpPF->GetPtr(2),
 990 |       src->GetPitch() * mul,
 991 |       mcpPF->GetPitch(0),
 992 |       mcpPF->GetPitch(1),
 993 |       vi.width,
 994 |       mcpPF->GetHeight(0)
 995 |     );
 996 |   }
 997 |   else
 998 |   {
 999 |     mcpPF->convRGB24to444(
1000 |       src->GetReadPtr() + (src->GetHeight() - 1 - off) * src->GetPitch(),
1001 |       mcpPF->GetPtr(0),
1002 |       mcpPF->GetPtr(1),
1003 |       mcpPF->GetPtr(2),
1004 |       -src->GetPitch() * mul,
1005 |       mcpPF->GetPitch(0),
1006 |       mcpPF->GetPitch(1),
1007 |       vi.width,
1008 |       mcpPF->GetHeight(0)
1009 |     );
1010 |   }
1011 | }
1012 | 
1013 | AVSValue __cdecl Create_eedi3(AVSValue args, void* user_data, IScriptEnvironment* env)
1014 | {
1015 |   if (!args[0].IsClip())
1016 |     env->ThrowError("eedi3:  arg 0 must be a clip!");
1017 |   VideoInfo vi = args[0].AsClip()->GetVideoInfo();
1018 |   if (!vi.IsY8() && !vi.IsYV12() && !vi.IsYV16() && !vi.IsYV24() && !vi.IsYUY2() && !vi.IsRGB24())
1019 |     env->ThrowError("eedi3:  only Y8, YV12, YV16, YV24, YUY2, and RGB24 input are supported!");
1020 |   const bool dh = args[2].AsBool(false);
1021 |   if ((vi.height & 1) && !dh)
1022 |     env->ThrowError("eedi3:  height must be mod 2 when dh=false (%d)!", vi.height);
1023 |   return new eedi3(args[0].AsClip(), args[1].AsInt(-1), args[2].AsBool(false),
1024 |     args[3].AsBool(true), args[4].AsBool(true), args[5].AsBool(true),
1025 |     float(args[6].AsFloat(0.2f)), float(args[7].AsFloat(0.25f)), float(args[8].AsFloat(20.0f)),
1026 |     args[9].AsInt(2), args[10].AsInt(20), args[11].AsBool(false), args[12].AsBool(true),
1027 |     args[13].AsBool(true), args[14].AsInt(2), float(args[15].AsFloat(32.0f)),
1028 |     float(args[16].AsFloat(64.0f)), float(args[17].AsFloat(4.0f)), args[18].IsClip() ?
1029 |     args[18].AsClip() : NULL, args[19].AsInt(0), args[20].IsClip() ? args[20].AsClip() : NULL,
1030 |     args[21].AsInt(0), env);
1031 | }
1032 | 
1033 | AVSValue __cdecl Create_eedi3_rpow2(AVSValue args, void* user_data, IScriptEnvironment *env)
1034 | {
1035 |   if (!args[0].IsClip())
1036 |     env->ThrowError("eedi3_rpow2:  arg 0 must be a clip!");
1037 |   VideoInfo vi = args[0].AsClip()->GetVideoInfo();
1038 |   if (!vi.IsY8() && !vi.IsYV12() && !vi.IsYV16() && !vi.IsYV24() && !vi.IsYUY2() && !vi.IsRGB24())
1039 |     env->ThrowError("eedi3_rpow2:  only Y8, YV12, YV16, YV24, YUY2, and RGB24 input are supported!");
1040 |   if (vi.IsYUY2() && (vi.width & 3))
1041 |     env->ThrowError("eedi3_rpow2:  for yuy2 input width must be mod 4 (%d)!", vi.width);
1042 |   const int rfactor = args[1].AsInt(-1);
1043 |   const float alpha = float(args[2].AsFloat(0.2f));
1044 |   const float beta = float(args[3].AsFloat(0.25f));
1045 |   const float gamma = float(args[4].AsFloat(20.0f));
1046 |   const int nrad = args[5].AsInt(2);
1047 |   const int mdis = args[6].AsInt(20);
1048 |   const bool hp = args[7].AsBool(false);
1049 |   const bool ucubic = args[8].AsBool(true);
1050 |   const bool cost3 = args[9].AsBool(true);
1051 |   const int vcheck = args[10].AsInt(2);
1052 |   const float vthresh0 = float(args[11].AsFloat(32.0f));
1053 |   const float vthresh1 = float(args[12].AsFloat(64.0f));
1054 |   const float vthresh2 = float(args[13].AsFloat(4.0f));
1055 |   PClip sclip = NULL;
1056 |   const char *cshift = args[14].AsString("");
1057 |   const int fwidth = args[15].IsInt() ? args[15].AsInt() : rfactor * vi.width;
1058 |   const int fheight = args[16].IsInt() ? args[16].AsInt() : rfactor * vi.height;
1059 |   const float ep0 = args[17].IsFloat() ? float(args[17].AsFloat()) : -FLT_MAX;
1060 |   const float ep1 = args[18].IsFloat() ? float(args[18].AsFloat()) : -FLT_MAX;
1061 |   const int threads = args[19].AsInt(0);
1062 |   const int opt = args[20].AsInt(0);
1063 |   if (rfactor < 2 || rfactor > 1024)
1064 |     env->ThrowError("eedi3_rpow2:  2 <= rfactor <= 1024, and rfactor be a power of 2!\n");
1065 |   int rf = 1, ct = 0;
1066 |   while (rf < rfactor)
1067 |   {
1068 |     rf *= 2;
1069 |     ++ct;
1070 |   }
1071 |   if (rf != rfactor)
1072 |     env->ThrowError("eedi3_rpow2:  2 <= rfactor <= 1024, and rfactor be a power of 2!\n");
1073 |   if (alpha < 0.0f || alpha > 1.0f)
1074 |     env->ThrowError("eedi3_rpow2:  0 <= alpha <= 1!\n");
1075 |   if (beta < 0.0f || beta > 1.0f)
1076 |     env->ThrowError("eedi3_rpow2:  0 <= beta <= 1!\n");
1077 |   if (alpha + beta > 1.0f)
1078 |     env->ThrowError("eedi3_rpow2:  0 <= alpha+beta <= 1!\n");
1079 |   if (gamma < 0.0f)
1080 |     env->ThrowError("eedi3_rpow2:  0 <= gamma!\n");
1081 |   if (nrad < 0 || nrad > 3)
1082 |     env->ThrowError("eedi3_rpow2:  0 <= nrad <= 3!\n");
1083 |   if (mdis < 1)
1084 |     env->ThrowError("eedi3_rpow2:  1 <= mdis!\n");
1085 |   if (vcheck > 0 && (vthresh0 <= 0.0f || vthresh1 <= 0.0f || vthresh2 <= 0.0f))
1086 |     env->ThrowError("eedi3_rpow2:  0 < vthresh0 , 0 < vthresh1 , 0 < vthresh2!\n");
1087 |   AVSValue v = args[0].AsClip();
1088 |   try
1089 |   {
1090 |     double hshift = 0.0, vshift = 0.0;
1091 |     if (vi.IsRGB24())
1092 |     {
1093 |       for (int i = 0; i < ct; ++i)
1094 |       {
1095 |         v = new eedi3(v.AsClip(), i == 0 ? 1 : 0, true, true, true, true, alpha,
1096 |           beta, gamma, nrad, mdis, hp, ucubic, cost3, vcheck, vthresh0,
1097 |           vthresh1, vthresh2, sclip, threads, 0, opt, env);
1098 |         v = env->Invoke("TurnRight", v).AsClip();
1099 |         v = new eedi3(v.AsClip(), i == 0 ? 1 : 0, true, true, true, true, alpha,
1100 |           beta, gamma, nrad, mdis, hp, ucubic, cost3, vcheck, vthresh0,
1101 |           vthresh1, vthresh2, sclip, threads, 0, opt, env);
1102 |         v = env->Invoke("TurnLeft", v).AsClip();
1103 |       }
1104 |       hshift = vshift = -0.5;
1105 |     }
1106 |     else if (vi.IsPlanar()) // Y8, Y12, Y16, Y24
1107 |     {
1108 |       for (int i = 0; i < ct; ++i)
1109 |       {
1110 |         v = new eedi3(v.AsClip(), i == 0 ? 1 : 0, true, true, true, true, alpha, beta,
1111 |           gamma, nrad, mdis, hp, ucubic, cost3, vcheck, vthresh0, vthresh1,
1112 |           vthresh2, sclip, threads, 0, opt, env);
1113 |         v = env->Invoke("TurnRight", v).AsClip();
1114 |         // always use field=1 to keep chroma/luma horizontal alignment
1115 |         v = new eedi3(v.AsClip(), 1, true, true, true, true, alpha, beta, gamma,
1116 |           nrad, mdis, hp, ucubic, cost3, vcheck, vthresh0, vthresh1, vthresh2,
1117 |           sclip, threads, 0, opt, env);
1118 |         v = env->Invoke("TurnLeft", v).AsClip();
1119 |       }
1120 |       // Correct chroma shift (it's always 1/2 pixel upwards).
1121 |       // Need a cache here because v/vc will both request from this point.
1122 |       v = env->Invoke("InternalCache", v).AsClip();
1123 |       v.AsClip()->SetCacheHints(CACHE_GET_RANGE, 2);
1124 |       AVSValue sargs[7] = { v, vi.width*rfactor, vi.height*rfactor, 0.0, -0.5,
1125 |         vi.width*rfactor, vi.height*rfactor };
1126 |       const char *nargs[7] = { 0, 0, 0, "src_left", "src_top",
1127 |         "src_width", "src_height" };
1128 |       AVSValue vc = env->Invoke("Spline36Resize", AVSValue(sargs, 7), nargs).AsClip();
1129 |       AVSValue margs[2] = { v, vc };
1130 |       v = env->Invoke("MergeChroma", AVSValue(margs, 2)).AsClip();
1131 |       for (int i = 0; i < ct; ++i)
1132 |         hshift = hshift * 2.0 - 0.5;
1133 |       vshift = -0.5;
1134 |     }
1135 |     else
1136 |     {
1137 |       // Unfortunately, turnleft()/turnright() can't preserve YUY2 chroma, so we convert
1138 |       // U/V planes to Y planes in separate clips and process them that way.
1139 |       AVSValue vu = env->Invoke("UtoY", v).AsClip();
1140 |       AVSValue vv = env->Invoke("VtoY", v).AsClip();
1141 |       for (int i = 0; i < ct; ++i)
1142 |       {
1143 |         v = new eedi3(v.AsClip(), i == 0 ? 1 : 0, true, true, false, false, alpha, beta, gamma,
1144 |           nrad, mdis, hp, ucubic, cost3, vcheck, vthresh0, vthresh1, vthresh2, sclip, threads, 0, opt, env);
1145 |         v = env->Invoke("TurnRight", v).AsClip();
1146 |         // always use field=1 to keep chroma/luma horizontal alignment
1147 |         v = new eedi3(v.AsClip(), 1, true, true, false, false, alpha, beta, gamma, nrad,
1148 |           mdis, hp, ucubic, cost3, vcheck, vthresh0, vthresh1, vthresh2, sclip, threads, 0, opt, env);
1149 |         v = env->Invoke("TurnLeft", v).AsClip();
1150 |       }
1151 |       for (int i = 0; i < ct; ++i)
1152 |       {
1153 |         vu = new eedi3(vu.AsClip(), i == 0 ? 1 : 0, true, true, false, false, alpha, beta,
1154 |           gamma, nrad, mdis, hp, ucubic, cost3, vcheck, vthresh0, vthresh1,
1155 |           vthresh2, sclip, threads, 0, opt, env);
1156 |         vu = env->Invoke("TurnRight", vu).AsClip();
1157 |         // always use field=1 to keep chroma/luma horizontal alignment
1158 |         vu = new eedi3(vu.AsClip(), 1, true, true, false, false, alpha, beta, gamma,
1159 |           nrad, mdis, hp, ucubic, cost3, vcheck, vthresh0, vthresh1, vthresh2,
1160 |           sclip, threads, 0, opt, env);
1161 |         vu = env->Invoke("TurnLeft", vu).AsClip();
1162 |       }
1163 |       for (int i = 0; i < ct; ++i)
1164 |       {
1165 |         vv = new eedi3(vv.AsClip(), i == 0 ? 1 : 0, true, true, false, false, alpha, beta,
1166 |           gamma, nrad, mdis, hp, ucubic, cost3, vcheck, vthresh0, vthresh1, vthresh2,
1167 |           sclip, threads, 0, opt, env);
1168 |         vv = env->Invoke("TurnRight", vv).AsClip();
1169 |         // always use field=1 to keep chroma/luma horizontal alignment
1170 |         vv = new eedi3(vv.AsClip(), 1, true, true, false, false, alpha, beta, gamma,
1171 |           nrad, mdis, hp, ucubic, cost3, vcheck, vthresh0, vthresh1, vthresh2,
1172 |           sclip, threads, 0, opt, env);
1173 |         vv = env->Invoke("TurnLeft", vv).AsClip();
1174 |       }
1175 |       AVSValue ytouvargs[3] = { vu, vv, v };
1176 |       v = env->Invoke("YtoUV", AVSValue(ytouvargs, 3)).AsClip();
1177 |       for (int i = 0; i < ct; ++i)
1178 |         hshift = hshift * 2.0 - 0.5;
1179 |       vshift = -0.5;
1180 |     }
1181 |     if (cshift[0])
1182 |     {
1183 |       int type = 0;
1184 |       if (_strnicmp(cshift, "blackmanresize", 14) == 0 ||
1185 |         _strnicmp(cshift, "lanczosresize", 13) == 0 ||
1186 |         _strnicmp(cshift, "sincresize", 10) == 0)
1187 |         type = 1;
1188 |       else if (_strnicmp(cshift, "gaussresize", 11) == 0)
1189 |         type = 2;
1190 |       else if (_strnicmp(cshift, "bicubicresize", 13) == 0)
1191 |         type = 3;
1192 |       if (!type || (type != 3 && ep0 == -FLT_MAX) ||
1193 |         (type == 3 && ep0 == -FLT_MAX && ep1 == -FLT_MAX))
1194 |       {
1195 |         AVSValue sargs[7] = { v, fwidth, fheight, hshift, vshift,
1196 |           vi.width*rfactor, vi.height*rfactor };
1197 |         const char *nargs[7] = { 0, 0, 0, "src_left", "src_top",
1198 |           "src_width", "src_height" };
1199 |         v = env->Invoke(cshift, AVSValue(sargs, 7), nargs).AsClip();
1200 |       }
1201 |       else if (type != 3 || min(ep0, ep1) == -FLT_MAX)
1202 |       {
1203 |         AVSValue sargs[8] = { v, fwidth, fheight, hshift, vshift,
1204 |           vi.width*rfactor, vi.height*rfactor, type == 1 ? AVSValue((int)(ep0 + 0.5f)) :
1205 |           (type == 2 ? ep0 : max(ep0, ep1)) };
1206 |         const char *nargs[8] = { 0, 0, 0, "src_left", "src_top",
1207 |           "src_width", "src_height", type == 1 ? "taps" : (type == 2 ? "p" : (max(ep0, ep1) == ep0 ? "b" : "c")) };
1208 |         v = env->Invoke(cshift, AVSValue(sargs, 8), nargs).AsClip();
1209 |       }
1210 |       else
1211 |       {
1212 |         AVSValue sargs[9] = { v, fwidth, fheight, hshift, vshift,
1213 |           vi.width*rfactor, vi.height*rfactor, ep0, ep1 };
1214 |         const char *nargs[9] = { 0, 0, 0, "src_left", "src_top",
1215 |           "src_width", "src_height", "b", "c" };
1216 |         v = env->Invoke(cshift, AVSValue(sargs, 9), nargs).AsClip();
1217 |       }
1218 |     }
1219 |   }
1220 |   catch (IScriptEnvironment::NotFound)
1221 |   {
1222 |     env->ThrowError("eedi3_rpow2:  error using env->invoke (function not found)!\n");
1223 |   }
1224 |   return v;
1225 | }
1226 | 
1227 | const AVS_Linkage *AVS_linkage = nullptr;
1228 | 
1229 | extern "C" __declspec(dllexport) const char* __stdcall AvisynthPluginInit3(IScriptEnvironment* env, const AVS_Linkage* const vectors)
1230 | {
1231 |   AVS_linkage = vectors;
1232 | 
1233 |   env->AddFunction("eedi3", "c[field]i[dh]b[Y]b[U]b[V]b[alpha]f[beta]f[gamma]f[nrad]i[mdis]i" \
1234 |     "[hp]b[ucubic]b[cost3]b[vcheck]i[vthresh0]f[vthresh1]f[vthresh2]f[sclip]c[threads]i[mclip]c[opt]i",
1235 |     Create_eedi3, 0);
1236 |   env->AddFunction("eedi3_rpow2", "c[rfactor]i[alpha]f[beta]f[gamma]f[nrad]i[mdis]i[hp]b" \
1237 |     "[ucubic]b[cost3]b[vcheck]i[vthresh0]f[vthresh1]f[vthresh2]f[cshift]s[fwidth]i" \
1238 |     "[fheight]i[ep0]f[ep1]f[threads]i[opt]i",
1239 |     Create_eedi3_rpow2, 0);
1240 |   return "eedi3 plugin";
1241 | }
1242 | 


--------------------------------------------------------------------------------
/EEDI3/eedi3.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | **   eedi3 (enhanced edge directed interpolation 3). Works by finding the
 3 | **   best non-decreasing (non-crossing) warping between two lines according to
 4 | **   a cost functional. Doesn't really have anything to do with eedi2 aside
 5 | **   from doing edge-directed interpolation (they use different techniques).
 6 | **
 7 | **   Copyright (C) 2015 Shane Panke
 8 | **
 9 | **   Copyright (C) 2010 Kevin Stone
10 | **
11 | **   This program is free software; you can redistribute it and/or modify
12 | **   it under the terms of the GNU General Public License as published by
13 | **   the Free Software Foundation; either version 2 of the License, or
14 | **   (at your option) any later version.
15 | **
16 | **   This program is distributed in the hope that it will be useful,
17 | **   but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | **   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 | **   GNU General Public License for more details.
20 | **
21 | **   You should have received a copy of the GNU General Public License
22 | **   along with this program; if not, write to the Free Software
23 | **   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24 | */
25 | 
26 | 
27 | #include <cassert>
28 | #include <float.h>
29 | #include <math.h>
30 | #include <omp.h>
31 | #include <stdint.h>
32 | #include <stdio.h>
33 | #include <windows.h>
34 | 
35 | #include "Eedi3Sse.h"
36 | #include "PlanarFrame.h"
37 | 
38 | class eedi3 : public GenericVideoFilter
39 | {
40 | private:
41 | 	enum {         MARGIN_H = 12 };  // Left and right margins for the virtual source frame
42 | 	enum {         MARGIN_V =  4 };  // Top and bottom margins
43 | 
44 | 	bool dh, Y, U, V, hp, ucubic, cost3;
45 | 	float alpha, beta, gamma,  vthresh0, vthresh1, vthresh2;
46 | 	int field, nrad, mdis, vcheck;
47 | 	int16_t *dmapa;
48 | 	bool _sse2_flag;
49 | 
50 | 	// Array of pointers (one per thread) to the temporary workzone.
51 | 	// Each zone is made of:
52 | 	// CPP:
53 | 	// - connection cost:     w * (mdis*2*pel+1) float
54 | 	// - path cost      :     w * (mdis*2*pel+1) float
55 | 	// - backtrack index:     w * (mdis*2*pel+1) int32
56 | 	// - final path     :     w                  int32
57 | 	// - mask           :     w                  bool
58 | 	// SSE/SSE2:
59 | 	// - Temporary src  : 8 * (w+2*MARGIN_H) * 4 uint16
60 | 	// - Temporary dst  : 8 * w                  uint16
61 | 	// - Temporary dmap : 8 * w                  int16
62 | 	// - connection cost: 8 * w * (mdis*2*pel+1) float
63 | 	// - path cost      : 4 * w * (mdis*2*pel+1) float
64 | 	// - backtrack index: 4 * w * (mdis*2*pel+1) int32
65 | 	// - final path     : 4 * w                  int32
66 | 	// - mask           :     w                  bool
67 | 	uint8_t **workspace;
68 | 	PlanarFrame *srcPF, *dstPF, *scpPF;
69 | 	PlanarFrame *mcpPF;	// Fields from the mask clip
70 | 	PClip sclip;
71 | 	PClip mclip;
72 | 	void copyPad(int n, int fn, IScriptEnvironment *env);
73 | 	void copyMask(int n, int fn, IScriptEnvironment *env);
74 | 
75 | public:
76 | 	eedi3::eedi3(PClip _child, int _field, bool _dh, bool _Y, bool _U, bool _V, 
77 | 		float _alpha, float _beta, float _gamma, int _nrad, int _mdis, bool _hp, 
78 | 		bool _ucubic, bool _cost3, int _vcheck, float _vthresh0, float _vthresh1, 
79 | 		float _vthresh2, PClip _sclip, int _threads, PClip _mclip, int opt,
80 | 		IScriptEnvironment *env);
81 | 	eedi3::~eedi3();
82 | 	PVideoFrame __stdcall eedi3::GetFrame(int n, IScriptEnvironment *env);
83 | };


--------------------------------------------------------------------------------
/EEDI3/eedi3.rc:
--------------------------------------------------------------------------------
 1 | // Microsoft Visual C++ generated resource script.
 2 | //
 3 | #include "resource.h"
 4 | 
 5 | #define APSTUDIO_READONLY_SYMBOLS
 6 | /////////////////////////////////////////////////////////////////////////////
 7 | //
 8 | // Generated from the TEXTINCLUDE 2 resource.
 9 | //
10 | #include "afxres.h"
11 | 
12 | /////////////////////////////////////////////////////////////////////////////
13 | #undef APSTUDIO_READONLY_SYMBOLS
14 | 
15 | /////////////////////////////////////////////////////////////////////////////
16 | // English (United States) resources
17 | 
18 | #if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
19 | LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
20 | #pragma code_page(1252)
21 | 
22 | #ifdef APSTUDIO_INVOKED
23 | /////////////////////////////////////////////////////////////////////////////
24 | //
25 | // TEXTINCLUDE
26 | //
27 | 
28 | 1 TEXTINCLUDE 
29 | BEGIN
30 |     "resource.h\0"
31 | END
32 | 
33 | 2 TEXTINCLUDE 
34 | BEGIN
35 |     "#include ""afxres.h""\r\n"
36 |     "\0"
37 | END
38 | 
39 | 3 TEXTINCLUDE 
40 | BEGIN
41 |     "\r\n"
42 |     "\0"
43 | END
44 | 
45 | #endif    // APSTUDIO_INVOKED
46 | 
47 | 
48 | /////////////////////////////////////////////////////////////////////////////
49 | //
50 | // Version
51 | //
52 | 
53 | VS_VERSION_INFO VERSIONINFO
54 |  FILEVERSION 0,9,2,3
55 |  PRODUCTVERSION 0,9,2,3
56 |  FILEFLAGSMASK 0x17L
57 | #ifdef _DEBUG
58 |  FILEFLAGS 0x1L
59 | #else
60 |  FILEFLAGS 0x0L
61 | #endif
62 |  FILEOS 0x4L
63 |  FILETYPE 0x2L
64 |  FILESUBTYPE 0x0L
65 | BEGIN
66 |     BLOCK "StringFileInfo"
67 |     BEGIN
68 |         BLOCK "040904b0"
69 |         BEGIN
70 |             VALUE "FileDescription", "eedi3 v0.9.2 for Avisynth 2.6.x"
71 |             VALUE "FileVersion", "0.9.2.3"
72 |             VALUE "LegalCopyright", "Copyright (C) 2010  Kevin Stone"
73 |             VALUE "ProductVersion", "0.9.2.3"
74 |         END
75 |     END
76 |     BLOCK "VarFileInfo"
77 |     BEGIN
78 |         VALUE "Translation", 0x409, 1200
79 |     END
80 | END
81 | 
82 | #endif    // English (United States) resources
83 | /////////////////////////////////////////////////////////////////////////////
84 | 
85 | 
86 | 
87 | #ifndef APSTUDIO_INVOKED
88 | /////////////////////////////////////////////////////////////////////////////
89 | //
90 | // Generated from the TEXTINCLUDE 3 resource.
91 | //
92 | 
93 | 
94 | /////////////////////////////////////////////////////////////////////////////
95 | #endif    // not APSTUDIO_INVOKED
96 | 
97 | 


--------------------------------------------------------------------------------
/EEDI3/eedi3.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 2013
 4 | VisualStudioVersion = 12.0.40629.0
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "eedi3", "eedi3.vcxproj", "{93A7D7C9-3365-46A9-8725-FC641E655945}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Win32 = Debug|Win32
11 | 		Debug|x64 = Debug|x64
12 | 		Release|Win32 = Release|Win32
13 | 		Release|x64 = Release|x64
14 | 	EndGlobalSection
15 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | 		{93A7D7C9-3365-46A9-8725-FC641E655945}.Debug|Win32.ActiveCfg = Debug|Win32
17 | 		{93A7D7C9-3365-46A9-8725-FC641E655945}.Debug|Win32.Build.0 = Debug|Win32
18 | 		{93A7D7C9-3365-46A9-8725-FC641E655945}.Debug|x64.ActiveCfg = Debug|x64
19 | 		{93A7D7C9-3365-46A9-8725-FC641E655945}.Debug|x64.Build.0 = Debug|x64
20 | 		{93A7D7C9-3365-46A9-8725-FC641E655945}.Release|Win32.ActiveCfg = Release|Win32
21 | 		{93A7D7C9-3365-46A9-8725-FC641E655945}.Release|Win32.Build.0 = Release|Win32
22 | 		{93A7D7C9-3365-46A9-8725-FC641E655945}.Release|x64.ActiveCfg = Release|x64
23 | 		{93A7D7C9-3365-46A9-8725-FC641E655945}.Release|x64.Build.0 = Release|x64
24 | 	EndGlobalSection
25 | 	GlobalSection(SolutionProperties) = preSolution
26 | 		HideSolutionNode = FALSE
27 | 	EndGlobalSection
28 | EndGlobal
29 | 


--------------------------------------------------------------------------------
/EEDI3/eedi3.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <ProjectGuid>{93A7D7C9-3365-46A9-8725-FC641E655945}</ProjectGuid>
 23 |     <RootNamespace>eedi3</RootNamespace>
 24 |     <Keyword>Win32Proj</Keyword>
 25 |     <WindowsTargetPlatformVersion>7.0</WindowsTargetPlatformVersion>
 26 |   </PropertyGroup>
 27 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 28 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 29 |     <ConfigurationType>DynamicLibrary</ConfigurationType>
 30 |     <CharacterSet>Unicode</CharacterSet>
 31 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 32 |     <PlatformToolset>v141_xp</PlatformToolset>
 33 |   </PropertyGroup>
 34 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 35 |     <ConfigurationType>DynamicLibrary</ConfigurationType>
 36 |     <CharacterSet>Unicode</CharacterSet>
 37 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 38 |     <PlatformToolset>v141_xp</PlatformToolset>
 39 |   </PropertyGroup>
 40 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 41 |     <ConfigurationType>DynamicLibrary</ConfigurationType>
 42 |     <PlatformToolset>v141_xp</PlatformToolset>
 43 |     <CharacterSet>Unicode</CharacterSet>
 44 |   </PropertyGroup>
 45 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 46 |     <ConfigurationType>DynamicLibrary</ConfigurationType>
 47 |     <PlatformToolset>v141_xp</PlatformToolset>
 48 |     <CharacterSet>Unicode</CharacterSet>
 49 |   </PropertyGroup>
 50 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 51 |   <ImportGroup Label="ExtensionSettings">
 52 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\masm.props" />
 53 |   </ImportGroup>
 54 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
 55 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 56 |   </ImportGroup>
 57 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 58 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 59 |   </ImportGroup>
 60 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
 61 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 62 |   </ImportGroup>
 63 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 64 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 65 |   </ImportGroup>
 66 |   <PropertyGroup Label="UserMacros" />
 67 |   <PropertyGroup>
 68 |     <_ProjectFileVersion>11.0.60610.1</_ProjectFileVersion>
 69 |   </PropertyGroup>
 70 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 71 |     <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
 72 |     <IntDir>$(Configuration)\</IntDir>
 73 |     <LinkIncremental>true</LinkIncremental>
 74 |   </PropertyGroup>
 75 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 76 |     <LinkIncremental>true</LinkIncremental>
 77 |   </PropertyGroup>
 78 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 79 |     <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
 80 |     <IntDir>$(Configuration)\</IntDir>
 81 |     <LinkIncremental>false</LinkIncremental>
 82 |   </PropertyGroup>
 83 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 84 |     <LinkIncremental>false</LinkIncremental>
 85 |   </PropertyGroup>
 86 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 87 |     <ClCompile>
 88 |       <Optimization>Disabled</Optimization>
 89 |       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;EEDI3_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 90 |       <MinimalRebuild>true</MinimalRebuild>
 91 |       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
 92 |       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
 93 |       <PrecompiledHeader />
 94 |       <WarningLevel>Level3</WarningLevel>
 95 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
 96 |       <AdditionalOptions>/Zc:threadSafeInit- %(AdditionalOptions)</AdditionalOptions>
 97 |     </ClCompile>
 98 |     <Link>
 99 |       <GenerateDebugInformation>true</GenerateDebugInformation>
100 |       <SubSystem>Windows</SubSystem>
101 |       <TargetMachine>MachineX86</TargetMachine>
102 |     </Link>
103 |   </ItemDefinitionGroup>
104 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
105 |     <ClCompile>
106 |       <Optimization>Disabled</Optimization>
107 |       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;EEDI3_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
108 |       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
109 |       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
110 |       <PrecompiledHeader>
111 |       </PrecompiledHeader>
112 |       <WarningLevel>Level3</WarningLevel>
113 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
114 |       <AdditionalOptions>/Zc:threadSafeInit- %(AdditionalOptions)</AdditionalOptions>
115 |     </ClCompile>
116 |     <Link>
117 |       <GenerateDebugInformation>true</GenerateDebugInformation>
118 |       <SubSystem>Windows</SubSystem>
119 |     </Link>
120 |   </ItemDefinitionGroup>
121 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
122 |     <ClCompile>
123 |       <Optimization>MaxSpeed</Optimization>
124 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
125 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
126 |       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;EEDI3_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
127 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
128 |       <BufferSecurityCheck>false</BufferSecurityCheck>
129 |       <OpenMPSupport>true</OpenMPSupport>
130 |       <PrecompiledHeader />
131 |       <WarningLevel>Level3</WarningLevel>
132 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
133 |       <AdditionalOptions>/Zc:threadSafeInit- %(AdditionalOptions)</AdditionalOptions>
134 |       <IntrinsicFunctions>true</IntrinsicFunctions>
135 |     </ClCompile>
136 |     <Link>
137 |       <GenerateDebugInformation>true</GenerateDebugInformation>
138 |       <SubSystem>Windows</SubSystem>
139 |       <OptimizeReferences>true</OptimizeReferences>
140 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
141 |       <TargetMachine>MachineX86</TargetMachine>
142 |       <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
143 |     </Link>
144 |   </ItemDefinitionGroup>
145 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
146 |     <ClCompile>
147 |       <Optimization>MaxSpeed</Optimization>
148 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
149 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
150 |       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;EEDI3_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
151 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
152 |       <BufferSecurityCheck>false</BufferSecurityCheck>
153 |       <OpenMPSupport>true</OpenMPSupport>
154 |       <PrecompiledHeader>
155 |       </PrecompiledHeader>
156 |       <WarningLevel>Level3</WarningLevel>
157 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
158 |       <AdditionalOptions>/Zc:threadSafeInit- %(AdditionalOptions)</AdditionalOptions>
159 |       <IntrinsicFunctions>true</IntrinsicFunctions>
160 |     </ClCompile>
161 |     <Link>
162 |       <GenerateDebugInformation>true</GenerateDebugInformation>
163 |       <SubSystem>Windows</SubSystem>
164 |       <OptimizeReferences>true</OptimizeReferences>
165 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
166 |     </Link>
167 |   </ItemDefinitionGroup>
168 |   <ItemGroup>
169 |     <ClCompile Include="eedi3.cpp" />
170 |     <ClCompile Include="Eedi3Sse.cpp">
171 |       <AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AssemblyAndSourceCode</AssemblerOutput>
172 |       <AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AssemblyAndSourceCode</AssemblerOutput>
173 |     </ClCompile>
174 |     <ClCompile Include="PlanarFrame.cpp" />
175 |   </ItemGroup>
176 |   <ItemGroup>
177 |     <ClInclude Include="avisynth.h" />
178 |     <ClInclude Include="avs\alignment.h" />
179 |     <ClInclude Include="avs\capi.h" />
180 |     <ClInclude Include="avs\config.h" />
181 |     <ClInclude Include="avs\cpuid.h" />
182 |     <ClInclude Include="avs\minmax.h" />
183 |     <ClInclude Include="avs\types.h" />
184 |     <ClInclude Include="avs\win.h" />
185 |     <ClInclude Include="eedi3.h" />
186 |     <ClInclude Include="Eedi3Sse.h" />
187 |     <ClInclude Include="PlanarFrame.h" />
188 |     <ClInclude Include="resource.h" />
189 |   </ItemGroup>
190 |   <ItemGroup>
191 |     <ResourceCompile Include="eedi3.rc" />
192 |   </ItemGroup>
193 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
194 |   <ImportGroup Label="ExtensionTargets">
195 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\masm.targets" />
196 |   </ImportGroup>
197 | </Project>


--------------------------------------------------------------------------------
/EEDI3/eedi3.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <ClCompile Include="eedi3.cpp">
 5 |       <Filter>Source Files</Filter>
 6 |     </ClCompile>
 7 |     <ClCompile Include="Eedi3Sse.cpp">
 8 |       <Filter>Source Files</Filter>
 9 |     </ClCompile>
10 |     <ClCompile Include="PlanarFrame.cpp">
11 |       <Filter>Source Files</Filter>
12 |     </ClCompile>
13 |   </ItemGroup>
14 |   <ItemGroup>
15 |     <ClInclude Include="avs\alignment.h">
16 |       <Filter>Header Files</Filter>
17 |     </ClInclude>
18 |     <ClInclude Include="avisynth.h">
19 |       <Filter>Header Files</Filter>
20 |     </ClInclude>
21 |     <ClInclude Include="avs\capi.h">
22 |       <Filter>Header Files</Filter>
23 |     </ClInclude>
24 |     <ClInclude Include="avs\config.h">
25 |       <Filter>Header Files</Filter>
26 |     </ClInclude>
27 |     <ClInclude Include="avs\cpuid.h">
28 |       <Filter>Header Files</Filter>
29 |     </ClInclude>
30 |     <ClInclude Include="eedi3.h">
31 |       <Filter>Header Files</Filter>
32 |     </ClInclude>
33 |     <ClInclude Include="Eedi3Sse.h">
34 |       <Filter>Header Files</Filter>
35 |     </ClInclude>
36 |     <ClInclude Include="avs\minmax.h">
37 |       <Filter>Header Files</Filter>
38 |     </ClInclude>
39 |     <ClInclude Include="resource.h">
40 |       <Filter>Header Files</Filter>
41 |     </ClInclude>
42 |     <ClInclude Include="avs\types.h">
43 |       <Filter>Header Files</Filter>
44 |     </ClInclude>
45 |     <ClInclude Include="avs\win.h">
46 |       <Filter>Header Files</Filter>
47 |     </ClInclude>
48 |     <ClInclude Include="PlanarFrame.h">
49 |       <Filter>Header Files</Filter>
50 |     </ClInclude>
51 |   </ItemGroup>
52 |   <ItemGroup>
53 |     <Filter Include="Header Files">
54 |       <UniqueIdentifier>{41b43ad7-df64-4f10-b6b7-7c212e8f370b}</UniqueIdentifier>
55 |       <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
56 |     </Filter>
57 |     <Filter Include="Source Files">
58 |       <UniqueIdentifier>{0fe661e3-7d17-4dc2-92b6-bf22867976c8}</UniqueIdentifier>
59 |       <Extensions>cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
60 |     </Filter>
61 |     <Filter Include="Resource Files">
62 |       <UniqueIdentifier>{4909926f-98dd-4fa4-81b9-421dec23bb5b}</UniqueIdentifier>
63 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx</Extensions>
64 |     </Filter>
65 |   </ItemGroup>
66 |   <ItemGroup>
67 |     <ResourceCompile Include="eedi3.rc">
68 |       <Filter>Resource Files</Filter>
69 |     </ResourceCompile>
70 |   </ItemGroup>
71 | </Project>


--------------------------------------------------------------------------------
/EEDI3/resource.h:
--------------------------------------------------------------------------------
 1 | //{{NO_DEPENDENCIES}}
 2 | // Microsoft Visual C++ generated include file.
 3 | // Used by eedi3.rc
 4 | 
 5 | // Next default values for new objects
 6 | // 
 7 | #ifdef APSTUDIO_INVOKED
 8 | #ifndef APSTUDIO_READONLY_SYMBOLS
 9 | #define _APS_NEXT_RESOURCE_VALUE        101
10 | #define _APS_NEXT_COMMAND_VALUE         40001
11 | #define _APS_NEXT_CONTROL_VALUE         1001
12 | #define _APS_NEXT_SYMED_VALUE           101
13 | #endif
14 | #endif
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 
341 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # EEDI3
 2 | 
 3 | This is a fork of Elegant's port of firesledge's EEDI3 mod so that it works with both x86 and x64.
 4 | 
 5 | ### Requirements
 6 | 
 7 | This filter requires AviSynth 2.6.0 or AviSynth+ as well as the Visual C++ Redistributable Package for Visual Studio 2017.
 8 | 
 9 | ### Syntax and Parameters
10 | 
11 | The syntax and parameters are identical to the original EEDI3. To see a list refer to this [link](http://avisynth.nl/index.php/Eedi3).
12 | 
13 | ### Changes
14 | 
15 | * Switched to AVS 2.6 API
16 | * Added x64 support
17 | * Updated some variable types for clarity
18 | * Y8, Y16 and Y24 colorspaces enabled
19 | 
20 | 
21 | ### Programmer Notes
22 | 
23 | This program was compiled using Visual Studio 2017 and falls under the GNU General Public License.
24 | 
25 | I would like to thank cretindesalpes/firesledge, jpsdr for their work on EEDI3 and NNEDI3 (respectively) as this project was made as a result of their efforts.
26 | Special thanks to Elegant for updating the project in 2015


--------------------------------------------------------------------------------