├── Rapport Impact Ludovic DARMET.pdf ├── README.md └── Src ├── gaussian_noise_creation.py ├── lmbd_creation.py └── net.py /Rapport Impact Ludovic DARMET.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ludovicdmt/GitImpact/HEAD/Rapport Impact Ludovic DARMET.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Image Manipulation Detection 2 | ========= 3 | 4 | Code for Universal Image Manipulation Detection, adapted from [A Deep Learning Approach to Universal Image Manipulation Detection Using a New Convolutional Layer](http://dl.acm.org/citation.cfm?id=2930786), using Caffe. 5 | 6 | This work has been done during a last year project in Ecole Centrale de Lille (engineering school) under the surpervision of Patrick Bas (CNRS Lille, CRISTAL) and John Klein (CNRS Lille, CRISTAL). 7 | 8 | Add gaussian noise 9 | ------------------------- 10 | 11 | Use `Src/make_gaussian_noise.py` to add a AWGN to your own images. 12 | 13 | LMDB database 14 | ------------ 15 | 16 | Use `Src/lmdb_creation.py` to add build an LMDB database for train and validation set. 17 | 18 | 19 | Network 20 | ----- 21 | 22 | Run `Src/main.py` to train the network and your images. 23 | 24 | 25 | Rapport Impact 26 | -------------- 27 | 28 | Report about this work (in French) 29 | 30 | -------------------------------------------------------------------------------- /Src/gaussian_noise_creation.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # -*- coding: utf-8 -*- 4 | """ 5 | Created on Sat Nov 5 09:24:49 2016 6 | 7 | @author: bas 8 | """ 9 | 10 | from PIL import Image 11 | import multiprocessing 12 | from multiprocessing import Pool 13 | import numpy as np 14 | import glob 15 | import fnmatch 16 | 17 | import os 18 | import shutil 19 | 20 | source_root = '/Volumes/DISKIMAGES/Im_256/' 21 | process_root = '/Volumes/DISKIMAGES/Proc_256_05/' 22 | #source_dir_list = ['450D/','600D/','D600/',\ 23 | #'D90/','SigmaDPMerrill/','galaxy/',\ 24 | #'5DMK2/','6D/' ,'D800/','M9/'\ 25 | # ,'alphaA7/','iphone/'] 26 | #source_dir_list = ['5DMK2/'] 27 | crop_size = 256, 256 28 | 29 | 30 | def generate_process(im_name): 31 | try: 32 | print im_name 33 | pil_image = Image.open(im_name) 34 | 35 | # convert to luminance 36 | im_size = pil_image.size 37 | sigma = 2 38 | im_array = np.asarray(pil_image).astype(float) 39 | noise = np.random.randn(im_size[0],im_size[1])*sigma 40 | im_array = im_array + noise 41 | im_array = np.round(im_array) 42 | im_array[im_array<0]=0 43 | im_array[im_array>=255]=255 44 | 45 | # create appropriate dirrectorries 46 | dir_model = os.path.split(os.path.split(im_name)[0])[0] 47 | dir_model = dir_model.replace(source_root,process_root) 48 | #print dir_model 49 | if not os.path.exists(dir_model): 50 | os.makedirs(dir_model) 51 | dir_block = os.path.split(im_name)[0] 52 | dir_block = dir_block.replace(source_root,process_root) 53 | #print dir_block 54 | if not os.path.exists(dir_block): 55 | os.makedirs(dir_block) 56 | im_out_name = im_name.replace(source_root,process_root) 57 | im_array = im_array.astype(dtype=np.uint8) 58 | im_noise_pgm = Image.fromarray(im_array) 59 | im_noise_pgm.save(im_out_name) 60 | 61 | except: 62 | pass 63 | # 64 | 65 | if __name__ == "__main__": 66 | list_im = [] 67 | for root, dirnames, filenames in os.walk(source_root): 68 | for filename in fnmatch.filter(filenames, '*.pgm'): 69 | list_im.append(os.path.join(root, filename)) 70 | 71 | #print list_im[:10] 72 | if not os.path.exists(process_root): 73 | os.makedirs(process_root) 74 | nbCores = multiprocessing.cpu_count() 75 | pool = Pool(nbCores) 76 | pool.map(generate_process, list_im) 77 | pool.close() 78 | pool.join() 79 | # generate_process(list_im[0]) 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /Src/lmbd_creation.py: -------------------------------------------------------------------------------- 1 | 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | import os 6 | import glob 7 | import random 8 | import numpy as np 9 | 10 | from PIL import Image 11 | 12 | from caffe.proto import caffe_pb2 13 | import lmdb 14 | 15 | #Size of images 16 | IMAGE_WIDTH = 256 17 | IMAGE_HEIGHT = 256 18 | 19 | #def transform_img(img, img_width=IMAGE_WIDTH, img_height=IMAGE_HEIGHT): 20 | # 21 | # #Histogram Equalization 22 | # #img[:, :, 0] = cv2.equalizeHist(img[:, :, 0]) 23 | # #img[:, :, 1] = cv2.equalizeHist(img[:, :, 1]) 24 | # #img[:, :, 2] = cv2.equalizeHist(img[:, :, 2]) 25 | # 26 | # #Image Resizing 27 | # img = cv2.resize(img, (img_width, img_height), interpolation = cv2.INTER_CUBIC) 28 | # 29 | # return img 30 | 31 | def make_datum(img, label): 32 | #image is numpy.ndarray format. BGR instead of RGB 33 | return caffe_pb2.Datum( 34 | channels=1, # images are in black and white 35 | width=IMAGE_WIDTH, 36 | height=IMAGE_HEIGHT, 37 | label=label, 38 | data=img.tostring()) 39 | 40 | train_lmdb = '/path/to/lmdb_train' 41 | validation_lmdb = '/path/to/lmdb_validation' 42 | 43 | os.system('rm -rf ' + train_lmdb) 44 | os.system('rm -rf ' + validation_lmdb) 45 | 46 | ############## Read images ############################# 47 | 48 | cover = [img for img in glob.glob("/path/to/images/Im_256/*/*/*.pgm")] 49 | gaussian = [img for img in glob.glob("/path/to/images/Proc_256/*/*/*.pgm")] #Altered images 50 | print 'Nombre d images cover:' ,str(len(cover)) 51 | print 'Nombre d images gaussian:', str(len(gaussian)) 52 | 53 | ############## Train test split ######################## 54 | 55 | all_images = cover 56 | all_images.extend(gaussian) 57 | 58 | print 'Total number of images:', str( len(all_images)) 59 | random.shuffle(all_images) 60 | 61 | train_test_split = 0.8 62 | 63 | split_index = int(len(all_images)*train_test_split) 64 | 65 | train_data = all_images[0:split_index] 66 | 67 | print 'Number of images in train', str(len(train_data)) 68 | 69 | test_data = all_images[split_index::] 70 | 71 | print 'Number of images in test',str(len(test_data)) 72 | 73 | #Shuffle 74 | random.shuffle(train_data) 75 | random.shuffle(test_data) 76 | 77 | ############## LMDB creation ########################### 78 | 79 | print 'Creating train_lmdb' 80 | 81 | in_db = lmdb.open(train_lmdb, map_size=int(1e12)) 82 | with in_db.begin(write=True) as in_txn: 83 | for in_idx, img_path in enumerate(train_data): 84 | if in_idx % 6 == 0: 85 | continue 86 | try: 87 | img = Image.open(img_path) # read in black and white 88 | 89 | except IOError: 90 | print str(img_path), 'can not be read.' 91 | 92 | else: 93 | img = np.asarray(img) 94 | 95 | 96 | if 'Im_256' in img_path: 97 | label = 0 98 | else: 99 | label = 1 100 | datum = make_datum(img, label) 101 | in_txn.put('{:0>5d}'.format(in_idx), datum.SerializeToString()) 102 | # print '{:0>5d}'.format(in_idx) + ':' + img_path 103 | in_db.close() 104 | 105 | 106 | print '\nCreating validation_lmdb' 107 | 108 | in_db = lmdb.open(validation_lmdb, map_size=int(1e12)) 109 | with in_db.begin(write=True) as in_txn: 110 | for in_idx, img_path in enumerate(test_data): 111 | if in_idx % 6 != 0: 112 | continue 113 | try: 114 | img = Image.open(img_path) 115 | 116 | except IOError: 117 | print str(img_path), 'can not be read.' 118 | 119 | else: 120 | img = np.asarray(img) 121 | 122 | 123 | if 'Im_256' in img_path: 124 | label = 0 125 | else: 126 | label = 1 127 | datum = make_datum(img, label) 128 | in_txn.put('{:0>5d}'.format(in_idx), datum.SerializeToString()) 129 | # print '{:0>5d}'.format(in_idx) + ':' + img_path 130 | in_db.close() 131 | 132 | print '\nFinished processing all images' 133 | 134 | -------------------------------------------------------------------------------- /Src/net.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | import caffe 5 | import numpy as np 6 | from pylab import * 7 | import csv 8 | import scipy as sp 9 | 10 | #Comment or uncomment following lines to set GPU mode 11 | 12 | #caffe.set_mode_cpu() 13 | 14 | caffe.set_device(0) # 0 correspond to the identification number of the GPU used 15 | caffe.set_mode_gpu() # Chercher à utiliser les 2 GPU 16 | 17 | # Prototxt directions 18 | train_net_path = 'net_auto_train.prototxt' 19 | test_net_path = 'net_auto_test.prototxt' 20 | solver_config_path = 'net_auto_solver.prototxt' 21 | 22 | 23 | ''' 24 | Net definition 25 | ''' 26 | from caffe import layers as L, params as P 27 | 28 | # Can be used directly 29 | def Conv(bottom, ks, nout, stride=1, pad=0): 30 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 31 | num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='xavier')) 32 | batch_norm = L.BatchNorm(conv, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) 33 | scale = L.Scale(batch_norm, bias_term=True, in_place=True) 34 | relu = L.ReLU(scale, in_place=True) 35 | return relu 36 | 37 | 38 | 39 | 40 | def lenet(lmdb, batch_size): 41 | 42 | n = caffe.NetSpec() 43 | # Input layer 44 | n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, 45 | transform_param=dict(scale=1./255), ntop=2) 46 | 47 | # Residual convolution 48 | n.convres = L.Convolution(n.data, kernel_size=5, num_output=12,stride=1, weight_filler=dict(type='xavier')) 49 | # No activation for this first layer 50 | 51 | # Two layers of convolution 52 | n.conv1 = L.Convolution(n.convres, kernel_size=7, num_output=64,stride=2, weight_filler=dict(type='xavier')) 53 | n.batch_norm1 = L.BatchNorm(n.conv1, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) 54 | n.scale1 = L.Scale(n.batch_norm1, bias_term=True, in_place=True) 55 | n.relu2 = L.TanH(n.scale1, in_place=True) 56 | #n.relu2 = L.ReLU(n.scale1, in_place=True) 57 | n.pool1 = L.Pooling(n.relu2, kernel_size=3, stride=2, pool=P.Pooling.MAX) 58 | 59 | n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=48, stride=1, weight_filler=dict(type='xavier')) 60 | n.batch_norm2 = L.BatchNorm(n.conv2, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) 61 | n.scale2 = L.Scale(n.batch_norm2, bias_term=True, in_place=True) 62 | n.relu3 = L.TanH(n.scale2, in_place=True) 63 | #n.relu3 = L.ReLU(n.scale2, in_place=True) 64 | n.pool2 = L.Pooling(n.relu3, kernel_size=3, stride=2, pool=P.Pooling.MAX) 65 | 66 | # Dense classifier 67 | n.fc1 = L.InnerProduct(n.pool2, num_output=4096, weight_filler=dict(type='xavier')) 68 | n.relu4 = L.ReLU(n.fc1, in_place=True) 69 | n.drop1 = L.Dropout(n.relu4, in_place=True) 70 | 71 | n.fc2 = L.InnerProduct(n.drop1, num_output=4096, weight_filler=dict(type='xavier')) 72 | n.relu5 = L.ReLU(n.fc2, in_place=True) 73 | n.drop2 = L.Dropout(n.relu5, in_place=True) 74 | 75 | # Outputs 76 | n.score = L.InnerProduct(n.drop2, num_output=2, weight_filler=dict(type='xavier')) 77 | n.loss = L.SoftmaxWithLoss(n.score, n.label) 78 | 79 | return n.to_proto() 80 | 81 | 82 | # The net has been written to disk in a more verbose but human-readable 83 | # serialization format using Google's protobuf library. 84 | 85 | def make_net(): 86 | print 'Make train net' 87 | with open(train_net_path, 'w') as f: 88 | f.write(str(lenet('../Input/train_lmdb', 16))) 89 | print 'Make test net' 90 | with open(test_net_path, 'w') as f: 91 | f.write(str(lenet('../Input/validation_lmdb', 16))) 92 | 93 | net = make_net() 94 | 95 | ''' 96 | Define the solver 97 | ''' 98 | 99 | from caffe.proto import caffe_pb2 100 | s = caffe_pb2.SolverParameter() 101 | 102 | # Set a seed for reproducible experiments: 103 | # this controls for randomization in training. 104 | s.random_seed = 0xCAFFE 105 | 106 | # Specify locations of the train and (maybe) test networks. 107 | s.train_net = train_net_path 108 | s.test_net.append(test_net_path) 109 | s.test_interval = 2000 # Test after every 500 training iterations. 110 | s.test_iter.append(100) # Test on 100 batches each time we test. 111 | 112 | s.max_iter = 10000 # no. of times to update the net (training iterations) 113 | 114 | # EDIT HERE to try different solvers 115 | # solver types include "SGD", "Adam", and "Nesterov" among others. 116 | s.type = "SGD" 117 | 118 | # Set the initial learning rate for SGD. 119 | s.base_lr = 0.0001 # EDIT HERE to try different learning rates 120 | # Set momentum to accelerate learning by 121 | # taking weighted average of current and previous updates. 122 | s.momentum = 0.9 123 | # Set weight decay to regularize and prevent overfitting 124 | s.weight_decay = 5e-4 125 | 126 | # Set `lr_policy` to define how the learning rate changes during training. 127 | # This is the same policy as our default LeNet. 128 | s.lr_policy = 'inv' 129 | s.gamma = 0.0001 130 | s.power = 0.75 131 | # EDIT HERE to try the fixed rate (and compare with adaptive solvers) 132 | # `fixed` is the simplest policy that keeps the learning rate constant. 133 | #s.lr_policy = 'fixed' 134 | 135 | # Display the current training loss and accuracy every 1000 iterations. 136 | #s.display = 1000 137 | 138 | # Snapshots are files used to store networks we've trained. 139 | # We'll snapshot every 5K iterations -- twice during training. 140 | #s.snapshot = 5000 141 | #s.snapshot_prefix = 'mnist/custom_net' 142 | 143 | # Train on the GPU 144 | s.solver_mode = caffe_pb2.SolverParameter.GPU 145 | 146 | print 'Write solver' 147 | # Write the solver to a temporary file and return its filename. 148 | with open(solver_config_path, 'w') as f: 149 | f.write(str(s)) 150 | 151 | ### load the solver and create train and test nets 152 | solver = None # ignore this workaround for lmdb data (can't instantiate two solvers on the same data) 153 | print 'Get solver' 154 | solver = caffe.get_solver(solver_config_path) 155 | print 'Solver init ok' 156 | 157 | ''' 158 | Training loop 159 | ''' 160 | 161 | 162 | def logloss(act, pred): 163 | epsilon = 1e-15 164 | pred = sp.maximum(epsilon, pred) 165 | pred = sp.minimum(1-epsilon, pred) 166 | ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) 167 | ll = ll * -1.0/len(act) 168 | return ll 169 | 170 | def normalize(nparray,alpha=-1): 171 | 172 | # Normalization of our first convolutional layer 173 | 174 | nparray = np.array(nparray) 175 | nparray[2,2] = alpha # Evaluate the influence of this alpha 176 | nparray = np.ma.array(nparray, mask=False) 177 | nparray.mask[2,2] = True #Mask the center so he will not appear in the normalisation 178 | sumation = nparray.sum() 179 | nparray = nparray/sumation 180 | nparray = np.array(nparray) # return a nparray 181 | 182 | return nparray 183 | 184 | def training_net(niter): 185 | niter = niter 186 | test_interval = niter/250 187 | 188 | # Losses will also be stored in the log 189 | train_loss = np.zeros(niter) 190 | test_acc = np.zeros(int(np.ceil(niter / test_interval))) 191 | 192 | 193 | # the main solver loop 194 | for it in range(niter): 195 | if it % 100 == 0: 196 | print 'Iteration number', it, 'on ' , niter 197 | 198 | ###### Set the first conv layer do derivate ###### 199 | filters = solver.net.params['convres'][0].data[:,0] 200 | for i in range(12): 201 | filters[i] = normalize(filters[i],alpha=-1) 202 | solver.net.params['convres'][0].data[:,0] = filters 203 | 204 | ##### Solver batch and in train error evaluation 205 | solver.step(16) # Number of images per batch (memory limitation of the GPU) 206 | 207 | # store the train loss 208 | train_loss[it] = solver.net.blobs['loss'].data 209 | 210 | # run a full test every so often 211 | # (Caffe can also do this for us and write to a log, but we show here 212 | # how to do it directly in Python, where more complicated things are easier.) 213 | if it % test_interval == 0: 214 | print 'Iteration', it, 'testing...' 215 | correct = 0 216 | ll = 0 217 | for test_it in range(100): 218 | solver.test_nets[0].forward() 219 | correct += sum(solver.test_nets[0].blobs['score'].data.argmax(1) 220 | == np.int_(solver.test_nets[0].blobs['label'].data)) # Accuracy 221 | 222 | 223 | test_acc[it // test_interval] = correct / (100*16.) # batch_size = 16 and 100 batches at each test 224 | 225 | return test_acc, train_loss 226 | 227 | 228 | [test_acc, train_loss] = training_net(7000) 229 | print 'Test accuracy', test_acc 230 | print '---------------------------------------------------' 231 | --------------------------------------------------------------------------------