├── result.png ├── util.py ├── README.md └── watermark.py /result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LJSthu/Python-Remove-Watermark/HEAD/result.png -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | for i in range(1,49): 2 | s = '![image](./jiangyi3/img'+str(i)+'.jpg)' 3 | print(s) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-Remove-Watermark 2 | A simple program to remove the watermark from a PDF file. 3 | 4 | 5 | ### How? 6 | 7 | 1. convert the PDF file into images using `pdf2image` 8 | 2. convert the images to numpy array 9 | 3. find the specific pixel by watermarks' rgb values and change them into (255,255,255) 10 | 4. save the modified images 11 | 12 | 13 | ### Environment 14 | 15 | First you need to install the dependencies: 16 | ``` 17 | $ pip install pdf2image 18 | ``` 19 | ``` 20 | $ pip install scikit-image 21 | ``` 22 | 23 | Inside the repository create a directory that will receive the modified images: 24 | ``` 25 | $ mkdir jiangyi3 26 | ``` 27 | To execute: 28 | ``` 29 | $ python watermark.py 30 | ``` 31 | Don't forget to indicate the pdf's path you want to convert. 32 | 33 | 34 | ### Results 35 | ![image](./result.png) -------------------------------------------------------------------------------- /watermark.py: -------------------------------------------------------------------------------- 1 | from skimage import io 2 | from pdf2image import convert_from_path 3 | import numpy as np 4 | # imgs = io.imread('./test.png') 5 | # io.imsave('./hh.png',imgs) 6 | # imgs = np.array(imgs) 7 | # print(imgs.shape) 8 | # r = [] 9 | # g = [] 10 | # b = [] 11 | # alpha = [] 12 | 13 | def judge(x,y): 14 | temp = -(600.0/1575.0) * x 15 | if y > 1350 + temp and y < 1500 + temp: 16 | return True 17 | else: 18 | return False 19 | 20 | # for i in range(imgs.shape[0]): 21 | # for j in range(imgs.shape[1]): 22 | # if not judge(j,i): 23 | # continue 24 | # if imgs[i][j][1] > 100 and imgs[i][j][1] < 250 and imgs[i][j][2] > 100 and imgs[i][j][2] < 250: 25 | # imgs[i][j][0] = imgs[i][j][1] = imgs[i][j][2] = 255 26 | # if imgs[i][j][1] < 10 and imgs[i][j][2] < 100: 27 | # imgs[i][j][0] = imgs[i][j][1] = imgs[i][j][2] = 0 28 | 29 | # io.imsave('./hh.png',imgs) 30 | # print(r) 31 | # print(g) 32 | # print(b) 33 | # print(alpha) 34 | 35 | def select_pixel(r,g,b): 36 | if (r == 208 and g == 208 and b == 208 ) or (r == 196 and g == 196 and b == 196) \ 37 | or (r == 206 and g == 206 and b == 206 ): 38 | return True 39 | else: 40 | return False 41 | def select_pixel2(r,g,b): 42 | if r > 175 and r < 250 and g > 175 and g < 250 and b > 175 and b < 250: 43 | return True 44 | else: 45 | return False 46 | def handle(imgs): 47 | for i in range(imgs.shape[0]): 48 | for j in range(imgs.shape[1]): 49 | # if not judge(j,i): 50 | # continue 51 | # if imgs[i][j][1] > 100 and imgs[i][j][1] < 250 and imgs[i][j][2] > 100 and imgs[i][j][2] < 250: 52 | if select_pixel2(imgs[i][j][0],imgs[i][j][1],imgs[i][j][2]): 53 | imgs[i][j][0] = imgs[i][j][1] = imgs[i][j][2] = 255 54 | # if not select_pixel(imgs[i][j][0],imgs[i][j][1],imgs[i][j][2]): 55 | # imgs[i][j][0] = imgs[i][j][1] = imgs[i][j][2] = 0 56 | return imgs 57 | 58 | images = convert_from_path('./jiangyi3.pdf') 59 | # images = np.array(images) 60 | index = 0 61 | for img in images: 62 | index += 1 63 | img = np.array(img) 64 | print(img.shape) 65 | img = handle(img) 66 | io.imsave('./jiangyi3/img'+str(index)+'.jpg', img) 67 | # break 68 | print(index) --------------------------------------------------------------------------------