├── .gitignore ├── README.md ├── eric_zhou_proj8_site.zip ├── filter_tools.py ├── phaseAmplify.ipynb ├── pyramid_tools.py └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.avi 2 | *.gif 3 | *.jpg 4 | .ipynb_checkpoints 5 | __pycache__ 6 | Release* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Video Motion Magnification 2 | 3 | This project aims to reimplement phase base video motion magnification in python. 4 | 5 | The main algorithm is in phaseAmplify.ipynb. This file assumes the presence of filter_tools.py, pyramid_tools.py, and util.py. 6 | 7 | * filter_tools is a python file full of filtering algorithms. 8 | * pyramid_tools contains a bunch of functions dedicated to working with the steerable pyramid. 9 | * util.py just has misc IO functions. 10 | 11 | See the site zip for more information about this project and this algorithm. 12 | 13 | Enjoy! -------------------------------------------------------------------------------- /eric_zhou_proj8_site.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheryu/video_motion_magnification/d3b193a47eaf921694fd2dce49f0d1b7e44be439/eric_zhou_proj8_site.zip -------------------------------------------------------------------------------- /filter_tools.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility file for temporal filters 3 | """ 4 | 5 | from skimage.filters import gaussian 6 | from scipy.signal import firwin 7 | from pyfftw.interfaces.scipy_fftpack import fft, ifft, ifftshift 8 | from numpy import tile, real, min, zeros 9 | 10 | def amplitude_weighted_blur(x, weight, sigma): 11 | if sigma != 0: 12 | return gaussian(x*weight, sigma, mode="wrap") / gaussian(weight, sigma, mode="wrap") 13 | return x 14 | 15 | def difference_of_iir(delta, rl, rh): 16 | """ 17 | difference of infinite impulse responses 18 | """ 19 | lowpass_1 = delta[0].copy() 20 | lowpass_2 = lowpass_1.copy() 21 | out = zeros(delta.shape, dtype=delta.dtype) 22 | for i in range(1, delta.shape[0]): 23 | lowpass_1 = (1-rh)*lowpass_1 + rh*delta[i] 24 | lowpass_2 = (1-rl)*lowpass_2 + rl*delta[i] 25 | out[i] = lowpass_1 - lowpass_2 26 | return out 27 | 28 | def fir_window_bp(delta, fl, fh): 29 | """ 30 | Finite impulse response, bandpass. 31 | This filter doesn't work exactly like the matlab version due to some fourier transform imprecisions. 32 | Consider replacing the transform calls to the FFTW versions. 33 | """ 34 | b = firwin(delta.shape[0]+1, (fl*2, fh*2), pass_zero=False)[:-1] 35 | m = delta.shape[1] 36 | batches = 20 37 | batch_size = int(m / batches) + 1 38 | temp = fft(ifftshift(b)) 39 | out = zeros(delta.shape, dtype=delta.dtype) 40 | for i in range(batches): 41 | indexes = (batch_size*i, min((batch_size*(i+1), m))) 42 | freq = fft(delta[:,indexes[0]:indexes[1]], axis=0)*tile(temp, (delta.shape[2],indexes[1]-indexes[0], 1)).swapaxes(0,2) 43 | out[:, indexes[0]:indexes[1]] = real(ifft(freq, axis=0)) 44 | return out -------------------------------------------------------------------------------- /phaseAmplify.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Populating the interactive namespace from numpy and matplotlib\n" 15 | ] 16 | }, 17 | { 18 | "name": "stderr", 19 | "output_type": "stream", 20 | "text": [ 21 | "C:\\Users\\Zheryu\\Anaconda3\\envs\\image-processing\\lib\\site-packages\\IPython\\core\\magics\\pylab.py:161: UserWarning: pylab import has clobbered these variables: ['ifft']\n", 22 | "`%matplotlib` prevents importing * from pylab and numpy\n", 23 | " \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n" 24 | ] 25 | } 26 | ], 27 | "source": [ 28 | "%pylab inline" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "source": [ 37 | "#### Import" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 5, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "import numpy as np\n", 49 | "import pyfftw.interfaces.scipy_fftpack as spfft\n", 50 | "import scipy.signal as spsignal\n", 51 | "import skimage.transform as sktransform\n", 52 | "import matplotlib.pyplot as plt" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 6, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "from filter_tools import *\n", 64 | "from util import *\n", 65 | "from pyramid_tools import *" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "### Load the resources" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": false 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "video_file = \"baby.avi\"\n", 84 | "magnification_factor = 20\n", 85 | "fl = .04\n", 86 | "fh = .4\n", 87 | "fs = 1\n", 88 | "attenuate_other_frequencies=False\n", 89 | "pyr_type = \"octave\"\n", 90 | "sigma = 5\n", 91 | "temporal_filter = difference_of_iir\n", 92 | "scale_video = .8" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": false 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "video = load_video(video_file)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "#### Resize our video" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "collapsed": false 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "video2 = np.zeros((len(video), 200, 200, 3))\n", 122 | "for i in range(len(video)):\n", 123 | " video2[i] = sktransform.resize(video[i], (200,200))" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": true 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "video = video2" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": true 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "phase_amplify(video2, magnification_factor, fl, fh, fs, attenuate_other_frequencies=attenuate_other_frequencies, pyramid_type=pyr_type, sigma=sigma, temporal_filter=temporal_filter)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "collapsed": false 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "def phase_amplify(video, magnification_factor, fl, fh, fs, attenuate_other_frequencies=False, pyramid_type=\"octave\", sigma=0, temporal_filter=fir_window_bp):\n", 157 | " num_frames, h, w, num_channels = video.shape\n", 158 | " pyr_height = max_scf_pyr_height((h, w))\n", 159 | "\n", 160 | " if pyr_type is \"octave\":\n", 161 | " print(\"Using vanilla octave pyramid\")\n", 162 | " filters = get_filters((h, w), 2**np.array(list(range(0,-pyr_height-1,-1)), dtype=float), 4)\n", 163 | " elif pyr_type is \"halfOctave\":\n", 164 | " print(\"Using half octave pyramid\")\n", 165 | " filters = get_filters((h, w), 2**np.array(list(range(0,-pyr_height-1,-1)), dtype=float), 8, t_width=0.75)\n", 166 | " elif pyr_type is \"smoothHalfOctave\":\n", 167 | " print(\"Using smooth half octave pyramid.\")\n", 168 | " filters = get_filters_smooth_window((h, w), 8, filters_per_octave=2)\n", 169 | " elif pyr_type is \"quarterOctave\":\n", 170 | " print(\"Using quarter octave pyramid.\")\n", 171 | " filters = get_filters_smooth_window((h, w), 8, filters_per_octave=4)\n", 172 | " else:\n", 173 | " print(\"Invalid filter type. Specify ocatave, halfOcatave, smoothHalfOctave, or quarterOctave\")\n", 174 | " return None\n", 175 | "\n", 176 | " yiq_video = np.zeros((num_frames, h, w, num_channels))\n", 177 | " fft_video = np.zeros((num_frames, h, w), dtype=complex64)\n", 178 | "\n", 179 | " for i in range(num_frames):\n", 180 | " yiq_video[i] = rgb2yiq(video[i])\n", 181 | " fft_video[i] = spfft.fftshift(spfft.fft2(yiq_video[i][:,:,0]))\n", 182 | "\n", 183 | " magnified_y_channel = np.zeros((num_frames, h, w), dtype=complex64)\n", 184 | " dc_frame_index = 0\n", 185 | " for i in range(1,len(filters)-1):\n", 186 | " print(\"processing level \"+str(i))\n", 187 | "\n", 188 | " dc_frame = spfft.ifft2(spfft.ifftshift(filters[i]*fft_video[dc_frame_index])) \n", 189 | " dc_frame_no_mag = dc_frame / np.abs(dc_frame) \n", 190 | " dc_frame_phase = np.angle(dc_frame)\n", 191 | "\n", 192 | " total = np.zeros(fft_video.shape, dtype=float)\n", 193 | " filtered = np.zeros(fft_video.shape, dtype=complex64)\n", 194 | "\n", 195 | " for j in range(num_frames):\n", 196 | " filtered[j] = spfft.ifft2(spfft.ifftshift(filters[i]*fft_video[j]))\n", 197 | " total[j] = simplify_phase(np.angle(filtered[j]) - dc_frame_phase)\n", 198 | "\n", 199 | " print(\"bandpassing...\")\n", 200 | " total = temporal_filter(total, fl/fs, fh/fs).astype(float)\n", 201 | "\n", 202 | " for j in range(num_frames):\n", 203 | " phase_of_frame = total[j]\n", 204 | " if sigma != 0:\n", 205 | " phase_of_frame = amplitude_weighted_blur(phase_of_frame, np.abs(filtered[j]), sigma)\n", 206 | "\n", 207 | " phase_of_frame *= magnification_factor\n", 208 | "\n", 209 | " if attenuate_other_frequencies:\n", 210 | " temp_orig = np.abs(filtered[j])*dc_frame_no_mag\n", 211 | " else:\n", 212 | " temp_orig = filtered[j]\n", 213 | " magnified_component = 2*filters[i]*spfft.fftshift(spfft.fft2(temp_orig*np.exp(1j*phase_of_frame)))\n", 214 | "\n", 215 | " magnified_y_channel[j] = magnified_y_channel[j] + magnified_component\n", 216 | "\n", 217 | " for i in range(num_frames):\n", 218 | " magnified_y_channel[i] = magnified_y_channel[i] + (fft_video[i]*(filters[-1]**2))\n", 219 | "\n", 220 | " out = np.zeros(yiq_video.shape)\n", 221 | "\n", 222 | " for i in range(num_frames):\n", 223 | " out_frame = np.dstack((np.real(spfft.ifft2(spfft.ifftshift(magnified_y_channel[i]))), yiq_video[i,:,:,1:3]))\n", 224 | " out[i] = yiq2rgb(out_frame)\n", 225 | "\n", 226 | " return out.clip(min=0, max=1)" 227 | ] 228 | } 229 | ], 230 | "metadata": { 231 | "kernelspec": { 232 | "display_name": "Python (image-processing)", 233 | "language": "python", 234 | "name": "image-processing" 235 | }, 236 | "language_info": { 237 | "codemirror_mode": { 238 | "name": "ipython", 239 | "version": 3 240 | }, 241 | "file_extension": ".py", 242 | "mimetype": "text/x-python", 243 | "name": "python", 244 | "nbconvert_exporter": "python", 245 | "pygments_lexer": "ipython3", 246 | "version": "3.5.2" 247 | } 248 | }, 249 | "nbformat": 4, 250 | "nbformat_minor": 0 251 | } 252 | -------------------------------------------------------------------------------- /pyramid_tools.py: -------------------------------------------------------------------------------- 1 | """ 2 | Complex steerable filter utility functions 3 | 4 | Python version of pyrToolsExt. 5 | 6 | This is a direct reimplementation of utility functions used in the original Phase-based Motion Magnification 7 | project authored by Wadhwa et al. 8 | """ 9 | import math 10 | from numpy import * 11 | 12 | def simplify_phase(x): 13 | """ 14 | Moves x into the [-pi, pi] range. 15 | """ 16 | return ((x + pi) % (2*pi)) - pi 17 | 18 | def max_scf_pyr_height(dims): 19 | """ 20 | Gets the maximum possible steerable pyramid height 21 | dims: (h, w), the height and width of your desired filters in a tuple 22 | """ 23 | return int(log2(min(dims[:2]))) - 2 24 | 25 | def get_polar_grid(dims): 26 | center = ceil((array(dims))/2).astype(int) 27 | xramp, yramp = meshgrid(linspace(-1, 1, dims[1]+1)[:-1], linspace(-1, 1, dims[0]+1)[:-1]) 28 | 29 | theta = arctan2(yramp, xramp) 30 | r = sqrt(xramp**2 + yramp**2) 31 | 32 | # eliminate the zero at the center 33 | r[center[0], center[1]] = min((r[center[0], center[1]-1], r[center[0]-1, center[1]]))/2 34 | return theta, r 35 | 36 | def get_angle_mask_smooth(index, num_bands, angle, is_complex): 37 | order = num_bands-1 38 | const = sqrt((2**(2*order))*(math.factorial(order)**2)/(num_bands*math.factorial(2*order))) 39 | angle = simplify_phase(angle+(pi*index/num_bands)) 40 | 41 | if is_complex: 42 | return const*(cos(angle)**order)*(abs(angle) < pi/2) 43 | else: 44 | return abs(sqrt(const)*(cos(angle)**order)) 45 | 46 | def get_filters_smooth_window(dims, orientations, cos_order=6, filters_per_octave=6, is_complex=True, pyr_height=-1): 47 | """ 48 | A complex steerable filter generator with a smoother window. Better for quarter octave or half octave decompositions. 49 | """ 50 | max_pyr_height = max_scf_pyr_height(dims) 51 | if pyr_height == -1 or pyr_height > max_pyr_height: 52 | pyr_height = max_pyr_height 53 | total_filter_count = filters_per_octave * pyr_height 54 | 55 | theta, r = get_polar_grid(dims) 56 | r = (log2(r) + pyr_height)*pi*(0.5 + (total_filter_count / 7)) / pyr_height 57 | 58 | window_function = lambda x, c: (abs(x - c) < pi/2).astype(int) 59 | compute_shift = lambda k: pi*(k/(cos_order+1)+2/7) 60 | 61 | rad_filters = [] 62 | 63 | total = zeros(dims) 64 | a_constant = sqrt((2**(2*cos_order))*(math.factorial(cos_order)**2)/((cos_order+1)*math.factorial(2*cos_order))) 65 | for k in range(total_filter_count): 66 | shift = compute_shift(k+1) 67 | rad_filters += [a_constant*(cos(r-shift)**cos_order)*window_function(r,shift)] 68 | total += rad_filters[k]**2 69 | rad_filters = rad_filters[::-1] 70 | 71 | center = ceil(array(dims)/2).astype(int) 72 | low_dims = ceil(array(center+1.5)/4).astype(int) 73 | total_cropped = total[center[0]-low_dims[0]:center[0]+low_dims[0]+1, center[1]-low_dims[1]:center[1]+low_dims[1]+1] 74 | 75 | low_pass = zeros(dims) 76 | low_pass[center[0]-low_dims[0]:center[0]+low_dims[0]+1, center[1]-low_dims[1]:center[1]+low_dims[1]+1] = abs(sqrt(1+0j-total_cropped)) 77 | total += low_pass**2 78 | high_pass = abs(sqrt(1+0j-total)) 79 | 80 | anglemasks = [] 81 | for i in range(orientations): 82 | anglemasks += [get_angle_mask_smooth(i, orientations, theta, is_complex)] 83 | 84 | out = [high_pass] 85 | for i in range(len(rad_filters)): 86 | for j in range(len(anglemasks)): 87 | out += [anglemasks[j]*rad_filters[i]] 88 | out += [low_pass] 89 | return out 90 | 91 | def get_radial_mask_pair(r, rad, t_width): 92 | log_rad = log2(rad)-log2(r) 93 | hi_mask = abs(cos(log_rad.clip(min=-t_width, max=0)*pi/(2*t_width))) 94 | lo_mask = sqrt(1-(hi_mask**2)) 95 | return (hi_mask, lo_mask) 96 | 97 | def get_angle_mask(b, orientations, angle): 98 | order = orientations - 1 99 | a_constant = sqrt((2**(2*order))*(math.factorial(order)**2)/(orientations*math.factorial(2*order))) 100 | angle2 = simplify_phase(angle - (pi*b/orientations)) 101 | return 2*a_constant*(cos(angle2)**order)*(abs(angle2) < pi/2) 102 | 103 | def get_filters(dims, r_vals=None, orientations=4, t_width=1): 104 | """ 105 | Gets a steerbale filter bank in the form of a list of ndarrays 106 | dims: (h, w). Dimensions of the output filters. Should be the same size as the image you're using these to filter 107 | r_vals: The boundary between adjacent filters. Should be an array. 108 | e.g.: 2**np.array(list(range(0,-7,-1))) 109 | orientations: The number of filters per level 110 | t-width: The falloff of each filter. Smaller t_widths correspond to thicker filters with less falloff 111 | """ 112 | if r_vals is None: 113 | r_vals = 2**np.array(list(range(0,-max_scf_pyr_height(dims)-1,-1))) 114 | angle, r = get_polar_grid(dims) 115 | hi_mask, lo_mask_prev = get_radial_mask_pair(r_vals[0], r, t_width) 116 | filters = [hi_mask] 117 | for i in range(1, len(r_vals)): 118 | hi_mask, lo_mask = get_radial_mask_pair(r_vals[i], r, t_width) 119 | rad_mask = hi_mask * lo_mask_prev 120 | for j in range(orientations): 121 | angle_mask = get_angle_mask(j, orientations, angle) 122 | filters += [rad_mask*angle_mask/2] 123 | lo_mask_prev = lo_mask 124 | filters += [lo_mask_prev] 125 | return filters -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | """ 2 | other utilities 3 | """ 4 | from skimage import img_as_float 5 | from imageio import get_reader, get_writer 6 | from numpy import asarray, array 7 | from numpy.linalg import inv 8 | 9 | yiq_from_rgb = array([[0.299 , 0.587 , 0.114 ], 10 | [0.59590059, -0.27455667, -0.32134392], 11 | [0.21153661, -0.52273617, 0.31119955]]) 12 | rgb_from_yiq = inv(yiq_from_rgb) 13 | 14 | def rgb2yiq(img): 15 | return img_as_float(img).dot(yiq_from_rgb.T) 16 | 17 | def yiq2rgb(img): 18 | return img_as_float(img).dot(rgb_from_yiq.T) 19 | 20 | def load_video(filename): 21 | reader = get_reader(filename) 22 | orig_vid = [] 23 | for i, im in enumerate(reader): 24 | orig_vid.append(im) 25 | return asarray(orig_vid) 26 | 27 | def write_video(video, fps, name): 28 | writer = get_writer(name, fps=fps) 29 | for i in range(video.shape[0]): 30 | writer.append_data(video[i]) 31 | writer.close() 32 | 33 | --------------------------------------------------------------------------------