├── result.png
├── util.py
├── README.md
└── watermark.py


/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LJSthu/Python-Remove-Watermark/HEAD/result.png


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
1 | for i in range(1,49):
2 |     s = '![image](./jiangyi3/img'+str(i)+'.jpg)'
3 |     print(s)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python-Remove-Watermark
 2 | A simple program to remove the watermark from a PDF file. 
 3 | 
 4 | 
 5 | ### How?
 6 | 
 7 | 1. convert the PDF file into images using `pdf2image`
 8 | 2. convert the images to numpy array
 9 | 3. find the specific pixel by watermarks' rgb values and change them into (255,255,255)
10 | 4. save the modified images
11 | 
12 | 
13 | ### Environment
14 | 
15 | First you need to install the dependencies:
16 | ```
17 | $ pip install pdf2image
18 | ```
19 | ```
20 | $ pip install scikit-image
21 | ```
22 | 
23 | Inside the repository create a directory that will receive the modified images:
24 | ```
25 | $ mkdir jiangyi3
26 | ```
27 | To execute:
28 | ```
29 | $ python watermark.py
30 | ```
31 | Don't forget to indicate the pdf's path you want to convert.
32 | 
33 | 
34 | ### Results
35 | ![image](./result.png)


--------------------------------------------------------------------------------
/watermark.py:
--------------------------------------------------------------------------------
 1 | from skimage import io
 2 | from pdf2image import convert_from_path
 3 | import numpy as np
 4 | # imgs = io.imread('./test.png')
 5 | # io.imsave('./hh.png',imgs)
 6 | # imgs = np.array(imgs)
 7 | # print(imgs.shape)
 8 | # r = []
 9 | # g = []
10 | # b = []
11 | # alpha = []
12 | 
13 | def judge(x,y):
14 |     temp = -(600.0/1575.0) * x
15 |     if y > 1350 + temp and y < 1500 + temp:
16 |         return True
17 |     else:
18 |         return False
19 | 
20 | # for  i in range(imgs.shape[0]):
21 | #     for j in range(imgs.shape[1]):
22 | #         if not judge(j,i):
23 | #             continue
24 | #         if imgs[i][j][1] > 100 and imgs[i][j][1] < 250 and imgs[i][j][2] > 100 and imgs[i][j][2] < 250:
25 | #             imgs[i][j][0] =  imgs[i][j][1] = imgs[i][j][2] = 255
26 | #         if imgs[i][j][1] < 10 and imgs[i][j][2] < 100:
27 | #             imgs[i][j][0] =  imgs[i][j][1] = imgs[i][j][2] = 0 
28 | 
29 | # io.imsave('./hh.png',imgs)
30 | # print(r)
31 | # print(g)
32 | # print(b)
33 | # print(alpha)
34 | 
35 | def select_pixel(r,g,b):
36 |     if (r == 208 and g == 208 and b == 208 ) or (r == 196 and g == 196 and b == 196) \
37 |         or (r == 206 and g == 206 and b == 206 ):
38 |         return True
39 |     else:
40 |         return False
41 | def select_pixel2(r,g,b):
42 |     if r > 175 and r < 250 and g > 175 and g < 250 and b > 175 and b < 250:
43 |         return True
44 |     else:
45 |         return False
46 | def handle(imgs):
47 |     for  i in range(imgs.shape[0]):
48 |         for j in range(imgs.shape[1]):
49 |             # if not judge(j,i):
50 |             #     continue
51 |             # if imgs[i][j][1] > 100 and imgs[i][j][1] < 250 and imgs[i][j][2] > 100 and imgs[i][j][2] < 250:
52 |             if select_pixel2(imgs[i][j][0],imgs[i][j][1],imgs[i][j][2]):
53 |                 imgs[i][j][0] =  imgs[i][j][1] = imgs[i][j][2] = 255
54 |             # if not select_pixel(imgs[i][j][0],imgs[i][j][1],imgs[i][j][2]):
55 |             #     imgs[i][j][0] =  imgs[i][j][1] = imgs[i][j][2] = 0 
56 |     return imgs
57 | 
58 | images = convert_from_path('./jiangyi3.pdf')
59 | # images = np.array(images)
60 | index = 0
61 | for img in images:
62 |     index += 1
63 |     img = np.array(img)
64 |     print(img.shape)
65 |     img = handle(img)
66 |     io.imsave('./jiangyi3/img'+str(index)+'.jpg', img)
67 |     # break
68 |     print(index)


--------------------------------------------------------------------------------