├── Rapport Impact Ludovic DARMET.pdf
├── README.md
└── Src
    ├── gaussian_noise_creation.py
    ├── lmbd_creation.py
    └── net.py


/Rapport Impact Ludovic DARMET.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ludovicdmt/GitImpact/HEAD/Rapport Impact Ludovic DARMET.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Image Manipulation Detection
 2 | =========
 3 | 
 4 | Code for Universal Image Manipulation Detection, adapted from [A Deep Learning Approach to Universal Image Manipulation Detection Using a New Convolutional Layer](http://dl.acm.org/citation.cfm?id=2930786), using Caffe. 
 5 | 
 6 | This work has been done during a last year project in Ecole Centrale de Lille (engineering school) under the surpervision of Patrick Bas (CNRS Lille, CRISTAL) and John Klein (CNRS Lille, CRISTAL). 
 7 | 
 8 | Add gaussian noise
 9 | -------------------------
10 | 
11 | Use `Src/make_gaussian_noise.py` to add a AWGN to your own images.
12 | 
13 | LMDB database
14 | ------------
15 | 
16 | Use `Src/lmdb_creation.py` to add build an LMDB database for train and validation set.
17 | 
18 | 
19 | Network
20 | -----
21 | 
22 | Run `Src/main.py` to train the network and your images. 
23 | 
24 | 	
25 | Rapport Impact
26 | --------------
27 | 
28 | Report about this work (in French)
29 | 
30 | 


--------------------------------------------------------------------------------
/Src/gaussian_noise_creation.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # -*- coding: utf-8 -*-
 4 | """
 5 | Created on Sat Nov  5 09:24:49 2016
 6 | 
 7 | @author: bas
 8 | """
 9 | 
10 | from PIL import Image
11 | import multiprocessing
12 | from multiprocessing import Pool
13 | import numpy as np
14 | import glob
15 | import fnmatch
16 | 
17 | import os
18 | import shutil
19 | 
20 | source_root = '/Volumes/DISKIMAGES/Im_256/'
21 | process_root = '/Volumes/DISKIMAGES/Proc_256_05/'
22 | #source_dir_list = ['450D/','600D/','D600/',\
23 | #'D90/','SigmaDPMerrill/','galaxy/',\
24 | #'5DMK2/','6D/'	,'D800/','M9/'\
25 | #	,'alphaA7/','iphone/']
26 | #source_dir_list = ['5DMK2/']
27 | crop_size = 256, 256
28 | 
29 | 
30 | def generate_process(im_name):
31 |     try:
32 |         print im_name
33 |         pil_image = Image.open(im_name)
34 |     
35 |         # convert to luminance
36 |         im_size = pil_image.size
37 |         sigma = 2
38 |         im_array = np.asarray(pil_image).astype(float)
39 |         noise = np.random.randn(im_size[0],im_size[1])*sigma
40 |         im_array = im_array + noise
41 |         im_array = np.round(im_array)
42 |         im_array[im_array<0]=0
43 |         im_array[im_array>=255]=255
44 |         
45 |         # create appropriate dirrectorries
46 |         dir_model = os.path.split(os.path.split(im_name)[0])[0]
47 |         dir_model = dir_model.replace(source_root,process_root)
48 |         #print dir_model
49 |         if not os.path.exists(dir_model):
50 |             os.makedirs(dir_model)
51 |         dir_block = os.path.split(im_name)[0]
52 |         dir_block = dir_block.replace(source_root,process_root)
53 |         #print dir_block
54 |         if not os.path.exists(dir_block):
55 |             os.makedirs(dir_block)
56 |         im_out_name = im_name.replace(source_root,process_root)
57 |         im_array = im_array.astype(dtype=np.uint8)
58 |         im_noise_pgm = Image.fromarray(im_array)
59 |         im_noise_pgm.save(im_out_name)
60 |              
61 |     except:
62 |         pass
63 | #                            
64 | 
65 | if __name__ == "__main__":
66 |     list_im = []
67 |     for root, dirnames, filenames in os.walk(source_root):
68 |         for filename in fnmatch.filter(filenames, '*.pgm'):
69 |             list_im.append(os.path.join(root, filename))
70 |  
71 |     #print list_im[:10]
72 |     if not os.path.exists(process_root):
73 |         os.makedirs(process_root)    
74 |     nbCores = multiprocessing.cpu_count()
75 |     pool = Pool(nbCores)
76 |     pool.map(generate_process, list_im)
77 |     pool.close()
78 |     pool.join()
79 | #    generate_process(list_im[0])
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/Src/lmbd_creation.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | 
  5 | import os
  6 | import glob
  7 | import random
  8 | import numpy as np
  9 | 
 10 | from PIL import Image
 11 | 
 12 | from caffe.proto import caffe_pb2
 13 | import lmdb
 14 | 
 15 | #Size of images
 16 | IMAGE_WIDTH = 256
 17 | IMAGE_HEIGHT = 256
 18 | 
 19 | #def transform_img(img, img_width=IMAGE_WIDTH, img_height=IMAGE_HEIGHT):
 20 | #
 21 | #    #Histogram Equalization
 22 | #    #img[:, :, 0] = cv2.equalizeHist(img[:, :, 0])
 23 | #    #img[:, :, 1] = cv2.equalizeHist(img[:, :, 1])
 24 | #    #img[:, :, 2] = cv2.equalizeHist(img[:, :, 2])
 25 | #
 26 | #    #Image Resizing
 27 | #    img = cv2.resize(img, (img_width, img_height), interpolation = cv2.INTER_CUBIC)
 28 | #
 29 | #    return img
 30 | 
 31 | def make_datum(img, label):
 32 |     #image is numpy.ndarray format. BGR instead of RGB
 33 |     return caffe_pb2.Datum(
 34 |         channels=1, # images are in black and white 
 35 |         width=IMAGE_WIDTH,
 36 |         height=IMAGE_HEIGHT,
 37 |         label=label,
 38 |         data=img.tostring())
 39 |         
 40 | train_lmdb = '/path/to/lmdb_train'
 41 | validation_lmdb = '/path/to/lmdb_validation'
 42 | 
 43 | os.system('rm -rf  ' + train_lmdb)
 44 | os.system('rm -rf  ' + validation_lmdb)
 45 | 
 46 | ############## Read images #############################
 47 | 
 48 | cover = [img for img in glob.glob("/path/to/images/Im_256/*/*/*.pgm")]
 49 | gaussian = [img for img in glob.glob("/path/to/images/Proc_256/*/*/*.pgm")] #Altered images
 50 | print 'Nombre d images cover:' ,str(len(cover))
 51 | print 'Nombre d images gaussian:', str(len(gaussian))
 52 | 
 53 | ############## Train test split ########################
 54 | 
 55 | all_images = cover
 56 | all_images.extend(gaussian)
 57 | 
 58 | print 'Total number of images:', str( len(all_images))
 59 | random.shuffle(all_images)
 60 | 
 61 | train_test_split = 0.8
 62 | 
 63 | split_index = int(len(all_images)*train_test_split)
 64 | 
 65 | train_data = all_images[0:split_index]
 66 | 
 67 | print 'Number of images in train', str(len(train_data))
 68 | 
 69 | test_data = all_images[split_index::]
 70 | 
 71 | print 'Number of images in test',str(len(test_data))
 72 | 
 73 | #Shuffle
 74 | random.shuffle(train_data)
 75 | random.shuffle(test_data)
 76 | 
 77 | ############## LMDB creation ###########################
 78 | 
 79 | print 'Creating train_lmdb'
 80 | 
 81 | in_db = lmdb.open(train_lmdb, map_size=int(1e12))
 82 | with in_db.begin(write=True) as in_txn:
 83 |     for in_idx, img_path in enumerate(train_data):
 84 |         if in_idx %  6 == 0:
 85 |             continue
 86 |         try:
 87 |             img = Image.open(img_path) # read in black and white
 88 |         
 89 |         except IOError:
 90 |             print str(img_path), 'can not be read.'
 91 | 
 92 |         else:
 93 |             img = np.asarray(img)
 94 | 
 95 |         
 96 |             if 'Im_256' in img_path:
 97 |                 label = 0
 98 |             else:
 99 |                label = 1
100 |             datum = make_datum(img, label)
101 |             in_txn.put('{:0>5d}'.format(in_idx), datum.SerializeToString())
102 |        # print '{:0>5d}'.format(in_idx) + ':' + img_path
103 | in_db.close()
104 | 
105 | 
106 | print '\nCreating validation_lmdb'
107 | 
108 | in_db = lmdb.open(validation_lmdb, map_size=int(1e12))
109 | with in_db.begin(write=True) as in_txn:
110 |     for in_idx, img_path in enumerate(test_data):
111 |         if in_idx % 6 != 0:
112 |             continue
113 |         try:
114 |             img = Image.open(img_path)
115 |         
116 |         except IOError:
117 |             print str(img_path), 'can not be read.'
118 | 
119 |         else:
120 |             img = np.asarray(img)
121 | 
122 | 
123 |             if 'Im_256' in img_path:
124 |                 label = 0
125 |             else:
126 |                 label = 1
127 |             datum = make_datum(img, label)
128 |             in_txn.put('{:0>5d}'.format(in_idx), datum.SerializeToString())
129 |        # print '{:0>5d}'.format(in_idx) + ':' + img_path
130 | in_db.close()
131 | 
132 | print '\nFinished processing all images'
133 | 
134 | 


--------------------------------------------------------------------------------
/Src/net.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | 
  4 | import caffe
  5 | import numpy as np
  6 | from pylab import *
  7 | import csv
  8 | import scipy as sp
  9 | 
 10 | #Comment or uncomment following lines to set GPU mode
 11 | 
 12 | #caffe.set_mode_cpu()
 13 | 
 14 | caffe.set_device(0) # 0 correspond to the identification number of the GPU used
 15 | caffe.set_mode_gpu() # Chercher à utiliser les 2 GPU
 16 | 
 17 | # Prototxt directions
 18 | train_net_path = 'net_auto_train.prototxt'
 19 | test_net_path = 'net_auto_test.prototxt'
 20 | solver_config_path = 'net_auto_solver.prototxt'
 21 | 
 22 | 
 23 | '''
 24 | Net definition
 25 | '''
 26 | from caffe import layers as L, params as P
 27 | 
 28 | # Can be used directly
 29 | def Conv(bottom, ks, nout, stride=1, pad=0):
 30 |     conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
 31 |                                 num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='xavier'))
 32 |     batch_norm = L.BatchNorm(conv, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
 33 |     scale = L.Scale(batch_norm, bias_term=True, in_place=True)
 34 |     relu = L.ReLU(scale, in_place=True)
 35 |     return relu
 36 |  
 37 | 
 38 | 
 39 | 
 40 | def lenet(lmdb, batch_size):
 41 | 
 42 |     n = caffe.NetSpec()
 43 |     # Input layer
 44 |     n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb,
 45 |                              transform_param=dict(scale=1./255), ntop=2)
 46 | 
 47 |     # Residual convolution
 48 |     n.convres = L.Convolution(n.data, kernel_size=5, num_output=12,stride=1, weight_filler=dict(type='xavier'))
 49 |     # No activation for this first layer
 50 | 
 51 |     # Two layers of convolution
 52 |     n.conv1 = L.Convolution(n.convres, kernel_size=7, num_output=64,stride=2, weight_filler=dict(type='xavier'))
 53 |     n.batch_norm1 = L.BatchNorm(n.conv1, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
 54 |     n.scale1 = L.Scale(n.batch_norm1, bias_term=True, in_place=True)
 55 |     n.relu2 = L.TanH(n.scale1, in_place=True)
 56 |     #n.relu2 = L.ReLU(n.scale1, in_place=True)
 57 |     n.pool1 = L.Pooling(n.relu2, kernel_size=3, stride=2, pool=P.Pooling.MAX)
 58 | 
 59 |     n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=48, stride=1, weight_filler=dict(type='xavier'))
 60 |     n.batch_norm2 = L.BatchNorm(n.conv2, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
 61 |     n.scale2 = L.Scale(n.batch_norm2, bias_term=True, in_place=True)
 62 |     n.relu3 = L.TanH(n.scale2, in_place=True)
 63 |     #n.relu3 = L.ReLU(n.scale2, in_place=True)
 64 |     n.pool2 = L.Pooling(n.relu3, kernel_size=3, stride=2, pool=P.Pooling.MAX)
 65 | 
 66 |     # Dense classifier
 67 |     n.fc1 =   L.InnerProduct(n.pool2, num_output=4096, weight_filler=dict(type='xavier'))
 68 |     n.relu4 = L.ReLU(n.fc1, in_place=True)
 69 |     n.drop1 = L.Dropout(n.relu4, in_place=True)
 70 | 
 71 |     n.fc2 =   L.InnerProduct(n.drop1, num_output=4096, weight_filler=dict(type='xavier'))
 72 |     n.relu5 = L.ReLU(n.fc2, in_place=True)
 73 |     n.drop2 = L.Dropout(n.relu5, in_place=True)
 74 | 
 75 |     # Outputs
 76 |     n.score = L.InnerProduct(n.drop2, num_output=2, weight_filler=dict(type='xavier'))
 77 |     n.loss =  L.SoftmaxWithLoss(n.score, n.label)
 78 | 
 79 |     return n.to_proto()
 80 | 
 81 | 
 82 | # The net has been written to disk in a more verbose  but human-readable
 83 | # serialization format using Google's protobuf library.
 84 | 
 85 | def make_net():
 86 |     print 'Make train net'
 87 |     with open(train_net_path, 'w') as f:
 88 |         f.write(str(lenet('../Input/train_lmdb', 16)))
 89 |     print 'Make test net'
 90 |     with open(test_net_path, 'w') as f:
 91 |         f.write(str(lenet('../Input/validation_lmdb', 16)))
 92 | 
 93 | net = make_net()
 94 | 
 95 | '''
 96 | Define the solver
 97 | '''
 98 | 
 99 | from caffe.proto import caffe_pb2
100 | s = caffe_pb2.SolverParameter()
101 | 
102 | # Set a seed for reproducible experiments:
103 | # this controls for randomization in training.
104 | s.random_seed = 0xCAFFE
105 | 
106 | # Specify locations of the train and (maybe) test networks.
107 | s.train_net = train_net_path
108 | s.test_net.append(test_net_path)
109 | s.test_interval = 2000  # Test after every 500 training iterations.
110 | s.test_iter.append(100) # Test on 100 batches each time we test.
111 | 
112 | s.max_iter = 10000     # no. of times to update the net (training iterations)
113 |  
114 | # EDIT HERE to try different solvers
115 | # solver types include "SGD", "Adam", and "Nesterov" among others.
116 | s.type = "SGD"
117 | 
118 | # Set the initial learning rate for SGD.
119 | s.base_lr = 0.0001  # EDIT HERE to try different learning rates
120 | # Set momentum to accelerate learning by
121 | # taking weighted average of current and previous updates.
122 | s.momentum = 0.9
123 | # Set weight decay to regularize and prevent overfitting
124 | s.weight_decay = 5e-4
125 | 
126 | # Set `lr_policy` to define how the learning rate changes during training.
127 | # This is the same policy as our default LeNet.
128 | s.lr_policy = 'inv'
129 | s.gamma = 0.0001
130 | s.power = 0.75
131 | # EDIT HERE to try the fixed rate (and compare with adaptive solvers)
132 | # `fixed` is the simplest policy that keeps the learning rate constant.
133 | #s.lr_policy = 'fixed'
134 | 
135 | # Display the current training loss and accuracy every 1000 iterations.
136 | #s.display = 1000
137 | 
138 | # Snapshots are files used to store networks we've trained.
139 | # We'll snapshot every 5K iterations -- twice during training.
140 | #s.snapshot = 5000
141 | #s.snapshot_prefix = 'mnist/custom_net'
142 | 
143 | # Train on the GPU
144 | s.solver_mode = caffe_pb2.SolverParameter.GPU
145 | 
146 | print 'Write solver'
147 | # Write the solver to a temporary file and return its filename.
148 | with open(solver_config_path, 'w') as f:
149 |     f.write(str(s))
150 | 
151 | ### load the solver and create train and test nets
152 | solver = None  # ignore this workaround for lmdb data (can't instantiate two solvers on the same data)
153 | print 'Get solver'
154 | solver = caffe.get_solver(solver_config_path)
155 | print 'Solver init ok'
156 | 
157 | '''
158 | Training loop
159 | '''
160 | 
161 | 
162 | def logloss(act, pred):
163 |     epsilon = 1e-15
164 |     pred = sp.maximum(epsilon, pred)
165 |     pred = sp.minimum(1-epsilon, pred)
166 |     ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
167 |     ll = ll * -1.0/len(act)
168 |     return ll
169 | 
170 | def normalize(nparray,alpha=-1):
171 | 
172 |     # Normalization of our first convolutional layer
173 | 
174 |     nparray = np.array(nparray)
175 |     nparray[2,2] = alpha # Evaluate the influence of this alpha
176 |     nparray = np.ma.array(nparray, mask=False)
177 |     nparray.mask[2,2] = True  #Mask the center so he will not appear in the normalisation
178 |     sumation = nparray.sum()
179 |     nparray = nparray/sumation
180 |     nparray = np.array(nparray) # return a nparray
181 | 
182 |     return nparray
183 | 
184 | def training_net(niter):
185 |     niter = niter
186 |     test_interval = niter/250
187 | 
188 |     # Losses will also be stored in the log
189 |     train_loss = np.zeros(niter)
190 |     test_acc = np.zeros(int(np.ceil(niter / test_interval)))
191 | 
192 | 
193 |     # the main solver loop
194 |     for it in range(niter):
195 |         if it % 100 == 0:
196 |             print 'Iteration number', it, 'on ' , niter
197 | 
198 |         ###### Set the first conv layer do derivate ######
199 |         filters = solver.net.params['convres'][0].data[:,0]
200 |         for i in range(12):
201 |             filters[i] = normalize(filters[i],alpha=-1)
202 |         solver.net.params['convres'][0].data[:,0] = filters
203 |         
204 |         ##### Solver batch and in train error evaluation
205 |         solver.step(16)  # Number of images per batch (memory limitation of the GPU)
206 | 
207 |         # store the train loss
208 |         train_loss[it] = solver.net.blobs['loss'].data
209 | 
210 |         # run a full test every so often
211 |         # (Caffe can also do this for us and write to a log, but we show here
212 |         #  how to do it directly in Python, where more complicated things are easier.)
213 |         if it % test_interval == 0:
214 |             print 'Iteration', it, 'testing...'
215 |             correct = 0
216 |             ll = 0
217 |             for test_it in range(100):
218 |                 solver.test_nets[0].forward()
219 |                 correct += sum(solver.test_nets[0].blobs['score'].data.argmax(1)
220 |                                == np.int_(solver.test_nets[0].blobs['label'].data)) # Accuracy                               
221 |                 
222 | 
223 |             test_acc[it // test_interval] = correct / (100*16.) # batch_size = 16 and 100 batches at each test
224 | 
225 |     return test_acc, train_loss
226 | 
227 | 
228 | [test_acc, train_loss] = training_net(7000)
229 | print 'Test accuracy',  test_acc
230 | print '---------------------------------------------------'
231 | 


--------------------------------------------------------------------------------