├── .gitignore ├── .ipynb_checkpoints └── Watermarking -checkpoint.ipynb ├── Matting-Levin-Lischinski-Weiss-PAMI.pdf ├── README.md ├── Watermarking .ipynb ├── array.npz ├── coco_dataset ├── alpha.png ├── auth.png ├── copyright.png └── watermark.png ├── final ├── 137840668.jpg ├── 168667147.jpg ├── 168667186.jpg ├── 168667261.jpg ├── 168667468.jpg ├── 168667490.jpg ├── 168668046.jpg ├── 168668148.jpg ├── 168668150.jpg ├── 168668190.jpg ├── 75353029.jpg ├── fotolia_137840668.jpg ├── fotolia_168667147.jpg ├── fotolia_168667186.jpg ├── fotolia_168667261.jpg ├── fotolia_168667468.jpg ├── fotolia_168667490.jpg ├── fotolia_168668046.jpg ├── fotolia_168668148.jpg ├── fotolia_168668150.jpg ├── fotolia_168668190.jpg └── fotolia_75353029.jpg ├── main.py ├── main_cocoset.py ├── references.txt ├── src ├── __init__.py ├── closed_form_matting.py ├── estimate_watermark.py ├── image_crawler.py ├── preprocess.py ├── tensorflow_experiments.py └── watermark_reconstruct.py └── watermark.png /.gitignore: -------------------------------------------------------------------------------- 1 | images/ 2 | Dekel_* 3 | *.pyc 4 | *.pdf 5 | *.bmp -------------------------------------------------------------------------------- /Matting-Levin-Lischinski-Weiss-PAMI.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/Matting-Levin-Lischinski-Weiss-PAMI.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Automatic watermark detection and removal 2 | This was a project that was built as part of project for CS663 (Digital Image Processing). 3 | This is a crude Python implementation of the paper "On The Effectiveness Of Visible Watermarks", Tali Dekel, Michael Rubinstein, Ce Liu and William T. Freeman, 4 | Conference on Computer Vision and Pattern Recongnition (CVPR), 2017. 5 | 6 | ### Rough sketch of the algorithm 7 | A watermarked image `J` is obtained by imposing a watermark `W` over an unwatermarked image `I` with a blend factor . Specifically, we have the following equation: 8 | 9 |
10 | 11 |
12 | 13 | Where `p = (x, y)` is the pixel location. For a set of `K` images, we have: 14 |
15 | 16 | Although we have a lot of unknown quantities (), we can make use of the structural properties of the image to determine its location and estimate its structure. The coherency of and W over all the images can be exploited to solve the above problem with good accuracy. The steps followed to determine these values are: 17 | - Initial watermark estimation and detection 18 | - Estimating the matted watermark 19 | - Compute the median of the watermarked image gradients, independently in the `x` and `y` directions, at every pixel location `p`. 20 | 21 |
22 | 23 | - Crop `W_m` to remove boundary regions by computing its magnitude and taking the bounding box of the edge map. The initial estimated watermark is estimated using Poisson reconstruction. Here is an estimated watermark using a dataset of 450+ Fotolia images. 24 | watermark_est 25 | 26 | - Watermark detection: Obtain a verbose edge map (using Canny edge detector) and compute 27 | its Euclidean distance transform, which is then correlated with 28 | to get the Chamfer distance from each pixel to the closest edge. 29 | Lastly, the watermark position is taken to be the pixel with minimum 30 | distance in the map. 31 | 32 | #### Multi-image matting and reconstruction 33 | - Estimate keeping fixed. 34 | - Watermark update - Update the value of keeping the rest fixed. 35 | - Matte update - Update the value of keeping the rest fixed. 36 | 37 | Please refer to the paper and supplementary for a more in-depth description and derivation of the algorithm. 38 | 39 | Results 40 | -------- 41 | Here are some of the results for watermarked and watermark removed images: 42 | 43 |
44 | 45 |
46 | 47 | 48 |
49 | 50 | 51 |
52 |
53 | 54 | However, this is a rough implementation and the removal of watermark leaves some "traces" in form of texture distortion or artifacts. I believe this can be corrected by appropriate parameter tuning. 55 | 56 | More information 57 | ------- 58 | For more information, refer to the original paper [here](http://openaccess.thecvf.com/content_cvpr_2017/papers/Dekel_On_the_Effectiveness_CVPR_2017_paper.pdf) 59 | 60 | Disclaimer 61 | -------- 62 | I do not encourage or endorse piracy by making this project public. The code is free for academic/research purpose. Please feel free to send pull requests for bug fixes/optimizations, etc. 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /array.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/array.npz -------------------------------------------------------------------------------- /coco_dataset/alpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/coco_dataset/alpha.png -------------------------------------------------------------------------------- /coco_dataset/auth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/coco_dataset/auth.png -------------------------------------------------------------------------------- /coco_dataset/copyright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/coco_dataset/copyright.png -------------------------------------------------------------------------------- /coco_dataset/watermark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/coco_dataset/watermark.png -------------------------------------------------------------------------------- /final/137840668.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/137840668.jpg -------------------------------------------------------------------------------- /final/168667147.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/168667147.jpg -------------------------------------------------------------------------------- /final/168667186.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/168667186.jpg -------------------------------------------------------------------------------- /final/168667261.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/168667261.jpg -------------------------------------------------------------------------------- /final/168667468.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/168667468.jpg -------------------------------------------------------------------------------- /final/168667490.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/168667490.jpg -------------------------------------------------------------------------------- /final/168668046.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/168668046.jpg -------------------------------------------------------------------------------- /final/168668148.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/168668148.jpg -------------------------------------------------------------------------------- /final/168668150.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/168668150.jpg -------------------------------------------------------------------------------- /final/168668190.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/168668190.jpg -------------------------------------------------------------------------------- /final/75353029.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/75353029.jpg -------------------------------------------------------------------------------- /final/fotolia_137840668.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/fotolia_137840668.jpg -------------------------------------------------------------------------------- /final/fotolia_168667147.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/fotolia_168667147.jpg -------------------------------------------------------------------------------- /final/fotolia_168667186.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/fotolia_168667186.jpg -------------------------------------------------------------------------------- /final/fotolia_168667261.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/fotolia_168667261.jpg -------------------------------------------------------------------------------- /final/fotolia_168667468.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/fotolia_168667468.jpg -------------------------------------------------------------------------------- /final/fotolia_168667490.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/fotolia_168667490.jpg -------------------------------------------------------------------------------- /final/fotolia_168668046.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/fotolia_168668046.jpg -------------------------------------------------------------------------------- /final/fotolia_168668148.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/fotolia_168668148.jpg -------------------------------------------------------------------------------- /final/fotolia_168668150.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/fotolia_168668150.jpg -------------------------------------------------------------------------------- /final/fotolia_168668190.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/fotolia_168668190.jpg -------------------------------------------------------------------------------- /final/fotolia_75353029.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohitrango/automatic-watermark-detection/a11d4e01dbb02bcb2595703d45d9243281e38d37/final/fotolia_75353029.jpg -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from src import * 2 | 3 | gx, gy, gxlist, gylist = estimate_watermark('images/fotolia_processed') 4 | 5 | # est = poisson_reconstruct(gx, gy, np.zeros(gx.shape)[:,:,0]) 6 | cropped_gx, cropped_gy = crop_watermark(gx, gy) 7 | W_m = poisson_reconstruct(cropped_gx, cropped_gy) 8 | 9 | # random photo 10 | img = cv2.imread('images/fotolia_processed/fotolia_137840645.jpg') 11 | im, start, end = watermark_detector(img, cropped_gx, cropped_gy) 12 | 13 | # plt.imshow(im) 14 | # plt.show() 15 | # We are done with watermark estimation 16 | # W_m is the cropped watermark 17 | num_images = len(gxlist) 18 | 19 | J, img_paths = get_cropped_images('images/fotolia_processed', num_images, start, end, cropped_gx.shape) 20 | # get a random subset of J 21 | idx = [389, 144, 147, 468, 423, 92, 3, 354, 196, 53, 470, 445, 314, 349, 105, 366, 56, 168, 351, 15, 465, 368, 90, 96, 202, 54, 295, 137, 17, 79, 214, 413, 454, 305, 187, 4, 458, 330, 290, 73, 220, 118, 125, 180, 247, 243, 257, 194, 117, 320, 104, 252, 87, 95, 228, 324, 271, 398, 334, 148, 425, 190, 78, 151, 34, 310, 122, 376, 102, 260] 22 | idx = idx[:25] 23 | # Wm = (255*PlotImage(W_m)) 24 | Wm = W_m - W_m.min() 25 | 26 | # get threshold of W_m for alpha matte estimate 27 | alph_est = estimate_normalized_alpha(J, Wm) 28 | alph = np.stack([alph_est, alph_est, alph_est], axis=2) 29 | C, est_Ik = estimate_blend_factor(J, Wm, alph) 30 | 31 | alpha = alph.copy() 32 | for i in xrange(3): 33 | alpha[:,:,i] = C[i]*alpha[:,:,i] 34 | 35 | Wm = Wm + alpha*est_Ik 36 | 37 | W = Wm.copy() 38 | for i in xrange(3): 39 | W[:,:,i]/=C[i] 40 | 41 | Jt = J[:25] 42 | # now we have the values of alpha, Wm, J 43 | # Solve for all images 44 | Wk, Ik, W, alpha1 = solve_images(Jt, W_m, alpha, W) 45 | # W_m_threshold = (255*PlotImage(np.average(W_m, axis=2))).astype(np.uint8) 46 | # ret, thr = cv2.threshold(W_m_threshold, 127, 255, cv2.THRESH_BINARY) 47 | 48 | -------------------------------------------------------------------------------- /main_cocoset.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This main file is for the Microsoft Coco dataset 3 | ''' 4 | from src import * 5 | 6 | IMAGE_FOLDER = "/media/rohitrango/2EC8DBB2C8DB7715/" 7 | IMG_LOC = "coco_dataset" 8 | IMG_PROCESSED_LOC = "coco_dataset_processed" 9 | 10 | def get_alpha_matte(watermark, threshold=128): 11 | w = np.average(watermark, axis=2) 12 | _, w = cv2.threshold(w, threshold, 255, cv2.THRESH_BINARY_INV) 13 | return PlotImage(w) 14 | 15 | def P(img,e=None): 16 | if e is None: 17 | plt.imshow(PlotImage(img)); plt.show() 18 | else: 19 | plt.imshow(PlotImage(img),'gray'); plt.show() 20 | 21 | def bgr2rgb(img): 22 | return img[:,:,[2, 1, 0]] 23 | ''' 24 | Ground Truth values 25 | alpha -> coco_dataset/alpha.png 26 | copyright -> coco_dataset/copyright.png 27 | c = .45 28 | 29 | Experiments: Threshold for estimating initial alpha -> 153, and then subtract 1 from alpha 30 | ''' 31 | if __name__ == "__main__": 32 | # watermark = cv2.imread('coco_dataset/watermark.png') 33 | # alpha = get_alpha_matte(watermark) 34 | foldername = os.path.join(IMAGE_FOLDER, IMG_PROCESSED_LOC) 35 | gx, gy, gxlist, gylist = estimate_watermark(foldername) 36 | 37 | # est = poisson_reconstruct(gx, gy, np.zeros(gx.shape)[:,:,0]) 38 | cropped_gx, cropped_gy = crop_watermark(gx, gy) 39 | W_m = poisson_reconstruct(cropped_gx, cropped_gy, num_iters=5000) 40 | 41 | # random photo 42 | img = cv2.imread(os.path.join(foldername, '000000051008.jpg')) 43 | im, start, end = watermark_detector(img, cropped_gx, cropped_gy) 44 | num_images = len(gxlist) 45 | 46 | J, img_paths = get_cropped_images(foldername, num_images, start, end, cropped_gx.shape) 47 | # get a random subset of J 48 | idx = [389, 144, 147, 468, 423, 92, 3, 354, 196, 53, 470, 445, 314, 349, 105, 366, 56, 168, 351, 15, 465, 368, 90, 96, 202, 54, 295, 137, 17, 79, 214, 413, 454, 305, 187, 4, 458, 330, 290, 73, 220, 118, 125, 180, 247, 243, 257, 194, 117, 320, 104, 252, 87, 95, 228, 324, 271, 398, 334, 148, 425, 190, 78, 151, 34, 310, 122, 376, 102, 260] 49 | idx = idx[:25] 50 | # Wm = (255*PlotImage(W_m)) 51 | Wm = W_m - W_m.min() 52 | 53 | # get threshold of W_m for alpha matte estimate 54 | alph_est = estimate_normalized_alpha(J, Wm, num_images=15, threshold=125, invert=False, adaptive=False) 55 | alph = np.stack([alph_est, alph_est, alph_est], axis=2) 56 | C, est_Ik = estimate_blend_factor(J, Wm, alph) 57 | 58 | alpha = alph.copy() 59 | for i in xrange(3): 60 | alpha[:,:,i] = C[i]*alpha[:,:,i] 61 | 62 | # Wm = Wm + alpha*est_Ik 63 | 64 | W = Wm.copy() 65 | for i in xrange(3): 66 | W[:,:,i]/=C[i] 67 | 68 | Jt = J[idx] 69 | # now we have the values of alpha, Wm, J 70 | # Solve for all images 71 | Wk, Ik, W, alpha1 = solve_images(Jt, W_m, alpha, W) 72 | # W_m_threshold = (255*PlotImage(np.average(W_m, axis=2))).astype(np.uint8) 73 | # ret, thr = cv2.threshold(W_m_threshold, 127, 255, cv2.THRESH_BINARY) 74 | 75 | 76 | -------------------------------------------------------------------------------- /references.txt: -------------------------------------------------------------------------------- 1 | trimap: http://support.digitalfilmtools.com/support/index.php?/Knowledgebase/Article/View/68/0/what-is-a-trimap 2 | 3 | 4 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | from estimate_watermark import * 2 | from preprocess import * 3 | from image_crawler import * 4 | from watermark_reconstruct import * 5 | -------------------------------------------------------------------------------- /src/closed_form_matting.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import numpy as np 4 | import scipy.sparse 5 | import scipy 6 | from scipy.sparse import * 7 | from numpy.lib.stride_tricks import as_strided 8 | 9 | 10 | def rolling_block(A, block=(3, 3)): 11 | shape = (A.shape[0] - block[0] + 1, A.shape[1] - block[1] + 1) + block 12 | strides = (A.strides[0], A.strides[1]) + A.strides 13 | return as_strided(A, shape=shape, strides=strides) 14 | 15 | 16 | # Returns sparse matting laplacian 17 | def computeLaplacian(img, eps=10**(-7), win_rad=1): 18 | win_size = (win_rad*2+1)**2 19 | h, w, d = img.shape 20 | # Number of window centre indices in h, w axes 21 | c_h, c_w = h - 2*win_rad, w - 2*win_rad 22 | win_diam = win_rad*2+1 23 | 24 | indsM = np.arange(h*w).reshape((h, w)) 25 | ravelImg = img.reshape(h*w, d) 26 | win_inds = rolling_block(indsM, block=(win_diam, win_diam)) 27 | 28 | win_inds = win_inds.reshape(c_h, c_w, win_size) 29 | winI = ravelImg[win_inds] 30 | 31 | win_mu = np.mean(winI, axis=2, keepdims=True) 32 | win_var = np.einsum('...ji,...jk ->...ik', winI, winI)/win_size - np.einsum('...ji,...jk ->...ik', win_mu, win_mu) 33 | 34 | inv = np.linalg.inv(win_var + (eps/win_size)*np.eye(3)) 35 | 36 | X = np.einsum('...ij,...jk->...ik', winI - win_mu, inv) 37 | vals = np.eye(win_size) - (1/win_size)*(1 + np.einsum('...ij,...kj->...ik', X, winI - win_mu)) 38 | 39 | nz_indsCol = np.tile(win_inds, win_size).ravel() 40 | nz_indsRow = np.repeat(win_inds, win_size).ravel() 41 | nz_indsVal = vals.ravel() 42 | L = scipy.sparse.coo_matrix((nz_indsVal, (nz_indsRow, nz_indsCol)), shape=(h*w, h*w)) 43 | return L 44 | 45 | 46 | def closed_form_matte(img, scribbled_img, mylambda=100): 47 | h, w,c = img.shape 48 | consts_map = (np.sum(abs(img - scribbled_img), axis=-1)>0.001).astype(np.float64) 49 | #scribbled_img = rgb2gray(scribbled_img) 50 | 51 | consts_vals = scribbled_img[:,:,0]*consts_map 52 | D_s = consts_map.ravel() 53 | b_s = consts_vals.ravel() 54 | # print("Computing Matting Laplacian") 55 | L = computeLaplacian(img) 56 | sD_s = scipy.sparse.diags(D_s) 57 | # print("Solving for alpha") 58 | x = scipy.sparse.linalg.spsolve(L + mylambda*sD_s, mylambda*b_s) 59 | alpha = np.minimum(np.maximum(x.reshape(h, w), 0), 1) 60 | return alpha 61 | -------------------------------------------------------------------------------- /src/estimate_watermark.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | import cv2 3 | import numpy as np 4 | import warnings 5 | from matplotlib import pyplot as plt 6 | import math 7 | import numpy 8 | import scipy, scipy.fftpack 9 | 10 | # Variables 11 | KERNEL_SIZE = 3 12 | 13 | def estimate_watermark(foldername): 14 | """ 15 | Given a folder, estimate the watermark (grad(W) = median(grad(J))) 16 | Also, give the list of gradients, so that further processing can be done on it 17 | """ 18 | if not os.path.exists(foldername): 19 | warnings.warn("Folder does not exist.", UserWarning) 20 | return None 21 | 22 | images = [] 23 | for r, dirs, files in os.walk(foldername): 24 | # Get all the images 25 | for file in files: 26 | img = cv2.imread(os.sep.join([r, file])) 27 | if img is not None: 28 | images.append(img) 29 | else: 30 | print("%s not found."%(file)) 31 | 32 | # Compute gradients 33 | print("Computing gradients.") 34 | gradx = map(lambda x: cv2.Sobel(x, cv2.CV_64F, 1, 0, ksize=KERNEL_SIZE), images) 35 | grady = map(lambda x: cv2.Sobel(x, cv2.CV_64F, 0, 1, ksize=KERNEL_SIZE), images) 36 | 37 | # Compute median of grads 38 | print("Computing median gradients.") 39 | Wm_x = np.median(np.array(gradx), axis=0) 40 | Wm_y = np.median(np.array(grady), axis=0) 41 | 42 | return (Wm_x, Wm_y, gradx, grady) 43 | 44 | 45 | def PlotImage(image): 46 | """ 47 | PlotImage: Give a normalized image matrix which can be used with implot, etc. 48 | Maps to [0, 1] 49 | """ 50 | im = image.astype(float) 51 | return (im - np.min(im))/(np.max(im) - np.min(im)) 52 | 53 | 54 | def poisson_reconstruct2(gradx, grady, boundarysrc): 55 | # Thanks to Dr. Ramesh Raskar for providing the original matlab code from which this is derived 56 | # Dr. Raskar's version is available here: http://web.media.mit.edu/~raskar/photo/code.pdf 57 | 58 | # Laplacian 59 | gyy = grady[1:,:-1] - grady[:-1,:-1] 60 | gxx = gradx[:-1,1:] - gradx[:-1,:-1] 61 | f = numpy.zeros(boundarysrc.shape) 62 | f[:-1,1:] += gxx 63 | f[1:,:-1] += gyy 64 | 65 | # Boundary image 66 | boundary = boundarysrc.copy() 67 | boundary[1:-1,1:-1] = 0; 68 | 69 | # Subtract boundary contribution 70 | f_bp = -4*boundary[1:-1,1:-1] + boundary[1:-1,2:] + boundary[1:-1,0:-2] + boundary[2:,1:-1] + boundary[0:-2,1:-1] 71 | f = f[1:-1,1:-1] - f_bp 72 | 73 | # Discrete Sine Transform 74 | tt = scipy.fftpack.dst(f, norm='ortho') 75 | fsin = scipy.fftpack.dst(tt.T, norm='ortho').T 76 | 77 | # Eigenvalues 78 | (x,y) = numpy.meshgrid(range(1,f.shape[1]+1), range(1,f.shape[0]+1), copy=True) 79 | denom = (2*numpy.cos(math.pi*x/(f.shape[1]+2))-2) + (2*numpy.cos(math.pi*y/(f.shape[0]+2)) - 2) 80 | 81 | f = fsin/denom 82 | 83 | # Inverse Discrete Sine Transform 84 | tt = scipy.fftpack.idst(f, norm='ortho') 85 | img_tt = scipy.fftpack.idst(tt.T, norm='ortho').T 86 | 87 | # New center + old boundary 88 | result = boundary 89 | result[1:-1,1:-1] = img_tt 90 | 91 | return result 92 | 93 | 94 | def poisson_reconstruct(gradx, grady, kernel_size=KERNEL_SIZE, num_iters=100, h=0.1, 95 | boundary_image=None, boundary_zero=True): 96 | """ 97 | Iterative algorithm for Poisson reconstruction. 98 | Given the gradx and grady values, find laplacian, and solve for image 99 | Also return the squared difference of every step. 100 | h = convergence rate 101 | """ 102 | fxx = cv2.Sobel(gradx, cv2.CV_64F, 1, 0, ksize=kernel_size) 103 | fyy = cv2.Sobel(grady, cv2.CV_64F, 0, 1, ksize=kernel_size) 104 | laplacian = fxx + fyy 105 | m,n,p = laplacian.shape 106 | 107 | if boundary_zero == True: 108 | est = np.zeros(laplacian.shape) 109 | else: 110 | assert(boundary_image is not None) 111 | assert(boundary_image.shape == laplacian.shape) 112 | est = boundary_image.copy() 113 | 114 | est[1:-1, 1:-1, :] = np.random.random((m-2, n-2, p)) 115 | loss = [] 116 | 117 | for i in xrange(num_iters): 118 | old_est = est.copy() 119 | est[1:-1, 1:-1, :] = 0.25*(est[0:-2, 1:-1, :] + est[1:-1, 0:-2, :] + est[2:, 1:-1, :] + est[1:-1, 2:, :] - h*h*laplacian[1:-1, 1:-1, :]) 120 | error = np.sum(np.square(est-old_est)) 121 | loss.append(error) 122 | 123 | return (est) 124 | 125 | 126 | def image_threshold(image, threshold=0.5): 127 | ''' 128 | Threshold the image to make all its elements greater than threshold*MAX = 1 129 | ''' 130 | m, M = np.min(image), np.max(image) 131 | im = PlotImage(image) 132 | im[im >= threshold] = 1 133 | im[im < 1] = 0 134 | return im 135 | 136 | 137 | def crop_watermark(gradx, grady, threshold=0.4, boundary_size=2): 138 | """ 139 | Crops the watermark by taking the edge map of magnitude of grad(W) 140 | Assumes the gradx and grady to be in 3 channels 141 | @param: threshold - gives the threshold param 142 | @param: boundary_size - boundary around cropped image 143 | """ 144 | W_mod = np.sqrt(np.square(gradx) + np.square(grady)) 145 | W_mod = PlotImage(W_mod) 146 | W_gray = image_threshold(np.average(W_mod, axis=2), threshold=threshold) 147 | x, y = np.where(W_gray == 1) 148 | 149 | xm, xM = np.min(x) - boundary_size - 1, np.max(x) + boundary_size + 1 150 | ym, yM = np.min(y) - boundary_size - 1, np.max(y) + boundary_size + 1 151 | 152 | return gradx[xm:xM, ym:yM, :] , grady[xm:xM, ym:yM, :] 153 | 154 | 155 | def normalized(img): 156 | """ 157 | Return the image between -1 to 1 so that its easier to find out things like 158 | correlation between images, convolutionss, etc. 159 | Currently required for Chamfer distance for template matching. 160 | """ 161 | return (2*PlotImage(img)-1) 162 | 163 | def watermark_detector(img, gx, gy, thresh_low=200, thresh_high=220, printval=False): 164 | """ 165 | Compute a verbose edge map using Canny edge detector, take its magnitude. 166 | Assuming cropped values of gradients are given. 167 | Returns image, start and end coordinates 168 | """ 169 | Wm = (np.average(np.sqrt(np.square(gx) + np.square(gy)), axis=2)) 170 | 171 | img_edgemap = (cv2.Canny(img, thresh_low, thresh_high)) 172 | chamfer_dist = cv2.filter2D(img_edgemap.astype(float), -1, Wm) 173 | 174 | rect = Wm.shape 175 | index = np.unravel_index(np.argmax(chamfer_dist), img.shape[:-1]) 176 | if printval: 177 | print(index) 178 | 179 | x,y = (index[0]-rect[0]/2), (index[1]-rect[1]/2) 180 | im = img.copy() 181 | cv2.rectangle(im, (y, x), (y+rect[1], x+rect[0]), (255, 0, 0)) 182 | return (im, (x, y), (rect[0], rect[1])) 183 | -------------------------------------------------------------------------------- /src/image_crawler.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | import argparse 4 | import sys 5 | from time import sleep 6 | 7 | import requests 8 | from bs4 import BeautifulSoup as bs 9 | from threading import Thread 10 | 11 | ## variables 12 | fotolia_download_button = 'comp-download-buttons row-large' 13 | istock_base_download_button = 'asset-link draggable' 14 | 15 | ## get the url of the image 16 | def _get_image_url_fotolia(base_url, minVal, directory, index=0, num_retries = 5): 17 | img_url = "" 18 | retries = 0 19 | while retries < num_retries: 20 | # try 21 | r = requests.get(base_url + str(minVal + index)) 22 | if r.status_code == 200: 23 | soup = bs(r.content, 'lxml') 24 | row = soup.find_all(attrs={'class': fotolia_download_button}) 25 | # check row 26 | if len(row) > 0: 27 | link = row[0].findChildren()[0] 28 | if(link.attrs.has_key('href')): 29 | img_url = link.attrs['href'] 30 | __download_and_save_image(img_url, directory) 31 | else: 32 | print("Error, check: ") 33 | print(link) 34 | else: 35 | print("There is no image download button.") 36 | 37 | break 38 | else: 39 | retries += 1 40 | 41 | return img_url 42 | 43 | # get the link 44 | def _get_istock_page_and_download(link, directory): 45 | _media_url = "media.istockphoto.com" 46 | r = requests.get(link) 47 | if r.status_code == 200: 48 | soup = bs(r.content, 'lxml') 49 | img = [] 50 | img = filter(lambda x: _media_url in x.attrs['src'], filter(lambda x: x.attrs.has_key('src'), soup.find_all('img'))) 51 | if img == []: 52 | print("Cannot find image.") 53 | else: 54 | img_link = img[0].attrs['src'] 55 | __download_and_save_image(img_link, directory, src='istock') 56 | else: 57 | print("Cannot connect to : " + link) 58 | 59 | # download and save a given image 60 | def __download_and_save_image(link, directory, src='fotolia'): 61 | print("Attempting to download: " + link) 62 | r = requests.get(link) 63 | if r.status_code == 200: 64 | 65 | # depends on source 66 | if src == 'fotolia': 67 | try: 68 | filename = r.headers['Content-Disposition'].split('filename="')[1][:-2] 69 | except: 70 | print("No Content-Disposition header present.") 71 | return 72 | elif src == 'istock': 73 | try: 74 | filename = r.headers['Content-Disposition'].split('filename=')[1] 75 | except: 76 | print("No Content-Disposition header present.") 77 | return 78 | 79 | filename = os.sep.join([directory, filename]) 80 | print("Saving to filename: %s "%(filename)) 81 | with open(filename, 'wb') as f: 82 | f.write(r.content) 83 | else: 84 | print("Couldn't download from link: " + link) 85 | 86 | 87 | # function to scrape from fotolia 88 | def fotolia_scrape(directory, minVal=137840645, n_images=100): 89 | # make the dir first 90 | if not os.path.isdir(directory): 91 | os.mkdir(directory) 92 | 93 | base_url = "https://www.fotolia.com/Content/Comp/" 94 | image_url_list = [] 95 | index = 0 96 | 97 | # check thread list 98 | thread_list = [] 99 | 100 | # start threads 101 | for index in xrange(n_images): 102 | th = Thread(target=_get_image_url_fotolia, args=(base_url, minVal, directory, index)) 103 | thread_list.append(th) 104 | th.start() 105 | 106 | # join 107 | for th in thread_list: 108 | th.join() 109 | 110 | 111 | # function to scrape from istock 112 | def istock_scrape(directory, topic="abstract", n_images=100): 113 | 114 | ## iStock blocks you, be careful 115 | # raise NotImplementedError("iStockPhotos blocks you, be careful.") 116 | 117 | webpage = "https://www.istockphoto.com" 118 | base_search_url = "http://www.istockphoto.com/in/photos/%s"%topic 119 | 120 | r = requests.get(base_search_url) 121 | links_list = [] 122 | if r.status_code == 200: 123 | soup = bs(r.content, 'lxml') 124 | links = map(lambda x: webpage + x.attrs['href'], soup.find_all(attrs={'class': istock_base_download_button})) 125 | links_list += links 126 | 127 | nextPageLink = soup.find_all(attrs={'id':'next-gallery-page'}) 128 | print("Moving to next page.") 129 | sleep(0.5) 130 | 131 | while(nextPageLink != [] and len(links_list) < n_images): 132 | href = webpage + nextPageLink[0].attrs['href'] 133 | r = requests.get(href) 134 | if r.status_code == 200: 135 | soup = bs(r.content, 'lxml') 136 | links = map(lambda x: webpage + x.attrs['href'], soup.find_all(attrs={'class': istock_base_download_button})) 137 | links_list += links 138 | nextPageLink = soup.find_all(attrs={'id':'next-gallery-page'}) 139 | print("Moving to next page.") 140 | else: 141 | nextPageLink = [] 142 | print("No next page found.") 143 | 144 | thread_list = [] 145 | ## we have the list of link, go to each link and download it 146 | for link in links_list: 147 | th = Thread(target=_get_istock_page_and_download, args=(link, directory)) 148 | thread_list.append(th) 149 | th.start() 150 | th.join() 151 | sleep(1) 152 | 153 | # for th in thread_list: 154 | # th.join() 155 | 156 | 157 | ''' 158 | Main function here 159 | ''' 160 | if __name__ == "__main__": 161 | parser = argparse.ArgumentParser(description='Scrape from stock images') 162 | parser.add_argument('-f', dest='folder', help='Specify the folder where to place the images.') 163 | parser.add_argument('-u', dest='url', help='Specify the place from where to scrape.') 164 | args = parser.parse_args() 165 | 166 | if args.url is None: 167 | parser.print_help() 168 | sys.exit(0) 169 | else: 170 | # define the folder 171 | if args.folder is None: 172 | directory = "." 173 | else: 174 | directory = args.folder 175 | 176 | # check for the param 177 | if "fotolia" in args.url: 178 | fotolia_scrape(directory, n_images=100) 179 | 180 | elif "istock" in args.url: 181 | istock_scrape(directory, n_images=150, topic='mountains') 182 | 183 | 184 | print("Done.") 185 | 186 | -------------------------------------------------------------------------------- /src/preprocess.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | import cv2 3 | import numpy as np 4 | 5 | 6 | def preprocess(foldername, size=500, suffix="_processed"): 7 | 8 | dest_folder = foldername + suffix 9 | processed=os.path.abspath(dest_folder) 10 | 11 | if os.path.exists(processed): 12 | print ("Directory %s already exists."%(processed)) 13 | return None 14 | 15 | os.mkdir(dest_folder) 16 | 17 | for root, dirs, files in os.walk(foldername): 18 | for file in files: 19 | path = (os.sep.join([os.path.abspath(root), file])) 20 | img = cv2.imread(path) 21 | if img is not None: 22 | m,n,p = img.shape 23 | m_t, n_t = (size-m)/2, (size-n)/2 24 | final_img = np.pad(img, ((m_t, size-m-m_t), (n_t, size-n-n_t), (0, 0)), mode='constant') 25 | cv2.imwrite(os.sep.join([dest_folder, file]), final_img) 26 | print("Saved to : %s"%(file)) 27 | print(final_img.shape) 28 | 29 | 30 | if __name__ == "__main__": 31 | if len(sys.argv) < 2: 32 | print("Format : %s "%(sys.argv[0])) 33 | else: 34 | preprocess(sys.argv[1]) -------------------------------------------------------------------------------- /src/tensorflow_experiments.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cv2 4 | import os 5 | import scipy 6 | from scipy.sparse import * 7 | 8 | # helpers that are going to be useful here 9 | sobel_x = tf.constant([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], tf.float32) 10 | sobel_y = tf.transpose(sobel_x) 11 | 12 | sobel_x_filter = tf.stack([sobel_x, sobel_x, sobel_x]) 13 | sobel_x_filter = tf.stack([sobel_x_filter, sobel_x_filter, sobel_x_filter]) 14 | 15 | sobel_y_filter = tf.stack([sobel_y, sobel_y, sobel_y]) 16 | sobel_y_filter = tf.stack([sobel_y_filter, sobel_y_filter, sobel_y_filter]) 17 | 18 | def phi_func(mtensor, epsilon=0.001): 19 | return tf.sqrt(mtensor + epsilon**2) 20 | 21 | # E_data 22 | def E_data(I, W, J, alpha): 23 | est_error = tf.multiply(alpha, W) + tf.multiply(1-alpha, I) - J 24 | est_error = phi_func(tf.square(est_error)) 25 | est_error = tf.reduce_mean(est_error) 26 | return est_error 27 | 28 | # regularizer term for I, W 29 | def E_reg(I, alpha): 30 | alpha_ = tf.expand_dims(alpha, 0) 31 | ax = tf.nn.conv2d(alpha_, sobel_x_filter, strides=[1, 1, 1, 1], padding="SAME") 32 | ay = tf.nn.conv2d(alpha_, sobel_y_filter, strides=[1, 1, 1, 1], padding="SAME") 33 | Ix2 = tf.square(tf.nn.conv2d(I, sobel_x_filter, strides=[1, 1, 1, 1], padding="SAME")) 34 | Iy2 = tf.square(tf.nn.conv2d(I, sobel_y_filter, strides=[1, 1, 1, 1], padding="SAME")) 35 | est_error = tf.multiply(tf.abs(ax), Ix2) + tf.multiply(tf.abs(ay), Iy2) 36 | est_error = tf.reduce_mean(phi_func(est_error)) 37 | return est_error 38 | 39 | # regularization term for alpha 40 | def E_reg_alpha(alpha): 41 | alpha_ = tf.expand_dims(alpha, 0) 42 | ax2 = tf.square(tf.nn.conv2d(alpha_, sobel_x_filter, strides=[1, 1, 1, 1], padding="SAME")) 43 | ay2 = tf.square(tf.nn.conv2d(alpha_, sobel_y_filter, strides=[1, 1, 1, 1], padding="SAME")) 44 | est_error = tf.reduce_mean(phi_func(ax2 + ay2)) 45 | return est_error 46 | 47 | # fidelity term 48 | # W = all watermarks, or W_median 49 | def E_f(alpha, W, W_m): 50 | aW = tf.multiply(alpha, W) 51 | shape = aW.shape.as_list() 52 | if len(shape) == 3: 53 | aW = tf.expand_dims(aW, 0) 54 | # find edge map of alpha*W 55 | aWx = tf.nn.conv2d(aW, sobel_x_filter, strides=[1, 1, 1, 1], padding="SAME") 56 | aWy = tf.nn.conv2d(aW, sobel_y_filter, strides=[1, 1, 1, 1], padding="SAME") 57 | aW_ = tf.sqrt(tf.square(aWx) + tf.square(aWy)) 58 | 59 | # find edge map of W_m 60 | W_m__ = tf.expand_dims(W_m, 0) 61 | W_mx = tf.nn.conv2d(W_m__, sobel_x_filter, strides=[1, 1, 1, 1], padding="SAME") 62 | W_my = tf.nn.conv2d(W_m__, sobel_y_filter, strides=[1, 1, 1, 1], padding="SAME") 63 | W_m_ = tf.sqrt(tf.square(W_mx) + tf.square(W_my)) 64 | 65 | return tf.reduce_mean(phi_func(tf.square(aW_ - W_m_))) 66 | 67 | # auxiliary term 68 | def E_aux(W, W_k): 69 | return tf.reduce_mean(tf.abs(W - W_k)) 70 | 71 | # We try to use Tensorflow to perform the 3 steps 72 | def image_watermark_decompose_model(num_images, m, n, chan=3, l_i=1, l_w=1, l_alpha=1, beta=1, gamma=1, lr=0.07): 73 | # We have the following parameters 74 | # num_images = number of images, m, n, number of channels 75 | # lambda_i, lambda_w, lambda_alpha, beta, and gamma are parameters 76 | # Input to network: 77 | # J(k) = (num_images, m, n, chan) -> all the images 78 | # W_m = (m, n, chan) -> estimate of the watermark obtained before 79 | # W_median = (m, n, chan) -> new estimate of W 80 | # alpha = (m, n, chan) -> estimate of alpha matte 81 | # Entities to estimate 82 | # I(k) = (num_images, m, n, chan) -> all watermarked images 83 | # W(k) = (num_images, m, n, chan) -> all watermarks 84 | 85 | # All placeholders 86 | J = tf.placeholder(tf.float32, shape=(num_images, m, n, chan), name='J') 87 | alpha = tf.placeholder(tf.float32, shape=(m, n, chan), name='alpha') 88 | W_m = tf.placeholder(tf.float32, shape=(m, n, chan), name='W_m') 89 | W_median = tf.placeholder(tf.float32, shape=(m, n, chan), name='W_median') 90 | 91 | # All variables 92 | I = tf.Variable(np.random.randn(num_images, m, n, chan), name='I', dtype=tf.float32) 93 | W = tf.Variable(np.random.randn(num_images, m, n, chan), name='W', dtype=tf.float32) 94 | 95 | # compute loss 96 | loss = E_data(I, W, J, alpha) + l_i*E_reg(I, alpha) + l_w*E_reg(W, alpha) \ 97 | + beta*E_f(alpha, W, W_m) + gamma*E_aux(W_median, W) 98 | 99 | optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss) 100 | return { 101 | 'J': J, 102 | 'alpha': alpha, 103 | 'W_m': W_m, 104 | 'W_median': W_median, 105 | 'I': I, 106 | 'W': W, 107 | 'loss': loss, 108 | 'step': optimizer, 109 | } 110 | 111 | 112 | # matte update 113 | def matte_update_model(num_images, m, n, chan=3, l_alpha=1, beta=1, lr=0.07): 114 | # We use the rest of the items as constants and only estimate alpha 115 | 116 | # All placeholders 117 | J = tf.placeholder(tf.float32, shape=(num_images, m, n, chan), name='J') 118 | W_m = tf.placeholder(tf.float32, shape=(m, n, chan), name='W_m') 119 | W_median = tf.placeholder(tf.float32, shape=(m, n, chan), name='W_median') 120 | I = tf.placeholder(tf.float32, shape=(num_images, m, n, chan), name='I') 121 | W = tf.placeholder(tf.float32, shape=(num_images, m, n, chan), name='W') 122 | 123 | alpha = tf.Variable(np.random.randn(m, n, chan), dtype=tf.float32) 124 | 125 | loss = E_data(I, W, J, alpha) + l_alpha*E_reg_alpha(alpha) + beta*E_f(alpha, W_median, W_m) 126 | optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss) 127 | 128 | return { 129 | 'J': J, 130 | 'alpha': alpha, 131 | 'W_m': W_m, 132 | 'W_median': W_median, 133 | 'I': I, 134 | 'W': W, 135 | 'loss': loss, 136 | 'step': optimizer, 137 | } 138 | -------------------------------------------------------------------------------- /src/watermark_reconstruct.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | import scipy 5 | from scipy.sparse import * 6 | from scipy.sparse import linalg 7 | from estimate_watermark import * 8 | from closed_form_matting import * 9 | from numpy import nan, isnan 10 | 11 | def get_cropped_images(foldername, num_images, start, end, shape): 12 | ''' 13 | This is the part where we get all the images, extract their parts, and then add it to our matrix 14 | ''' 15 | images_cropped = np.zeros((num_images,) + shape) 16 | # get images 17 | # Store all the watermarked images 18 | # start, and end are already stored 19 | # just crop and store image 20 | image_paths = [] 21 | _s, _e = start, end 22 | index = 0 23 | 24 | # Iterate over all images 25 | for r, dirs, files in os.walk(foldername): 26 | 27 | for file in files: 28 | _img = cv2.imread(os.sep.join([r, file])) 29 | if _img is not None: 30 | # estimate the watermark part 31 | image_paths.append(os.sep.join([r, file])) 32 | _img = _img[_s[0]:(_s[0]+_e[0]), _s[1]:(_s[1]+_e[1]), :] 33 | # add to list images 34 | images_cropped[index, :, :, :] = _img 35 | index+=1 36 | else: 37 | print("%s not found."%(file)) 38 | 39 | return (images_cropped, image_paths) 40 | 41 | 42 | # get sobel coordinates for y 43 | def _get_ysobel_coord(coord, shape): 44 | i, j, k = coord 45 | m, n, p = shape 46 | return [ 47 | (i-1, j, k, -2), (i-1, j-1, k, -1), (i-1, j+1, k, -1), 48 | (i+1, j, k, 2), (i+1, j-1, k, 1), (i+1, j+1, k, 1) 49 | ] 50 | 51 | # get sobel coordinates for x 52 | def _get_xsobel_coord(coord, shape): 53 | i, j, k = coord 54 | m, n, p = shape 55 | return [ 56 | (i, j-1, k, -2), (i-1, j-1, k, -1), (i-1, j+1, k, -1), 57 | (i, j+1, k, 2), (i+1, j-1, k, 1), (i+1, j+1, k, 1) 58 | ] 59 | 60 | # filter 61 | def _filter_list_item(coord, shape): 62 | i, j, k, v = coord 63 | m, n, p = shape 64 | if i>=0 and i=0 and j