├── README.md ├── model.ipynb ├── model.py ├── save ├── MF_clf_model.ckpt.data-00000-of-00001 ├── MF_clf_model.ckpt.index ├── MF_clf_model.ckpt.meta └── checkpoint ├── tfrecords.ipynb └── tfrecords.py /README.md: -------------------------------------------------------------------------------- 1 | # image_manipulation_detector 2 | A tensorflow implementation of paper "A Deep Learning Approach To Universal Image Manipulation Detection Using A New Convolutional Layer" 3 | -------------------------------------------------------------------------------- /model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# A Deep Learning Approach To Universal Image Manipulation Detection Using A New Convolutional Layer" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "import tensorflow as tf\n", 19 | "import pandas as pd\n", 20 | "import math\n", 21 | "import sys\n", 22 | "from glob import glob\n", 23 | "import cv2" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "from tqdm import tqdm " 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "def images_square_grid(images):\n", 42 | " \"\"\"\n", 43 | " Save images as a square grid\n", 44 | " :param images: Images to be used for the grid\n", 45 | " :param mode: The mode to use for images\n", 46 | " :return: Image of images in a square grid\n", 47 | " \"\"\"\n", 48 | " # Get maximum size for square grid of images\n", 49 | " save_size = math.floor(np.sqrt(images.shape[0]))\n", 50 | "\n", 51 | " # Scale to 0-255\n", 52 | " images = (((images - images.min()) * 255) / (images.max() - images.min())).astype(np.uint8)\n", 53 | "\n", 54 | " # Put images in a square arrangement\n", 55 | " images_in_square = np.reshape(\n", 56 | " images[:save_size*save_size],\n", 57 | " (save_size, save_size, images.shape[1], images.shape[2]))\n", 58 | " h = images.shape[1]\n", 59 | " w = images.shape[2]\n", 60 | " # Combine images to grid image\n", 61 | " new_im = np.ones((save_size*h, save_size*w), dtype=np.uint8)\n", 62 | " for col_i, col_images in enumerate(images_in_square):\n", 63 | " for image_i, image in enumerate(col_images):\n", 64 | " new_im[col_i * h: col_i * h + h, \n", 65 | " image_i * h: image_i * h + h] = image\n", 66 | "\n", 67 | " return new_im" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "IMAGE_SIZE = 64\n", 84 | "IMAGE_CHANNEL = 1\n", 85 | "NUM_LABELS = 2\n", 86 | "\n", 87 | "CONV_RES_DEEP = 12\n", 88 | "CONV_RES_SIZE = 5\n", 89 | "\n", 90 | "CONV1_DEEP = 64\n", 91 | "CONV1_SIZE = 7\n", 92 | "\n", 93 | "CONV2_DEEP = 48\n", 94 | "CONV2_SIZE = 5\n", 95 | "\n", 96 | "FC_SIZE1 = 256\n", 97 | "FC_SIZE2 = 256" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "# generate data using a batch size 64\n", 107 | "# where input shape is (64, 227, 227, 1)\n", 108 | "def parser(record):\n", 109 | " features = tf.parse_single_example(\n", 110 | " record,\n", 111 | " features={\n", 112 | " \"image_raw\" : tf.FixedLenFeature([], tf.string),\n", 113 | " \"label\" : tf.FixedLenFeature([], tf.int64)\n", 114 | " }\n", 115 | " )\n", 116 | " image = tf.decode_raw(features['image_raw'], tf.uint8)\n", 117 | "# image.set_shape([256, 256])\n", 118 | " image = tf.reshape(image, [256, 256, 1])\n", 119 | " label = features['label']\n", 120 | " \n", 121 | " return image, label\n", 122 | "\n", 123 | "def preprocessing(image, label):\n", 124 | " img = tf.image.resize_image_with_crop_or_pad(image, IMAGE_SIZE, IMAGE_SIZE)\n", 125 | " img = tf.image.convert_image_dtype(img, tf.float32)\n", 126 | " # scale image to 0~1\n", 127 | "# img = (((img - tf.reduce_min(img))) / (tf.reduce_max(img) - tf.reduce_min(img)))\n", 128 | " img = img / 255\n", 129 | " \n", 130 | " ont_hot = tf.one_hot(label, depth=NUM_LABELS)\n", 131 | "# label = tf.expand_dims(label , -1)\n", 132 | " \n", 133 | " return img, ont_hot\n", 134 | "\n", 135 | "def dataset(file, batch_size=32, \n", 136 | " num_epochs=1, is_shuffle=False, shuffle_buffer=10000, \n", 137 | " preprocess=preprocessing):\n", 138 | "# if train_file is None:\n", 139 | "# train_file = \"mini_dataset/my_dataset/train.tfrecords\"\n", 140 | "# if test_file is None:\n", 141 | "# test_file = \"mini_dataset/my_dataset/test.tfrecords\"\n", 142 | " input_file = tf.train.match_filenames_once(file)\n", 143 | " dataset = tf.data.TFRecordDataset(input_file)\n", 144 | " dataset = dataset.map(parser)\n", 145 | " dataset = dataset.map(preprocess)\n", 146 | " if is_shuffle:\n", 147 | " dataset = dataset.shuffle(shuffle_buffer)\n", 148 | " \n", 149 | " dataset = dataset.batch(batch_size)\n", 150 | "\n", 151 | " dataset = dataset.repeat(num_epochs)\n", 152 | " \n", 153 | " return dataset" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "# test dataset and preprocessing\n", 163 | "data = dataset(\"dataset/tfrecords/MF_clf_train.tfrecords-*\")\n", 164 | "ite = data.make_initializable_iterator()\n", 165 | "img_batch, label_batch = ite.get_next()\n", 166 | "with tf.Session() as sess:\n", 167 | " sess.run([tf.global_variables_initializer(),\n", 168 | " tf.local_variables_initializer()])\n", 169 | " sess.run(ite.initializer)\n", 170 | " for i in range(2):\n", 171 | " image, lab = sess.run([img_batch, label_batch])\n", 172 | " print(\" \", image.shape, image.dtype)\n", 173 | " print(\" \", lab.shape, lab.dtype)\n", 174 | " \n", 175 | "plt.figure(figsize=(8,8))\n", 176 | "plt.imshow(images_square_grid(image), cmap=\"gray\");\n" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "def get_placeholder():\n", 186 | " input_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name='input_placeholder')\n", 187 | " \n", 188 | " labels_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, NUM_LABELS], name='label_placeholder')\n", 189 | " \n", 190 | " dropout_placeholder = tf.placeholder(dtype=tf.float32, shape=[], name='dropout_placeholder')\n", 191 | " \n", 192 | " return input_placeholder, labels_placeholder, dropout_placeholder" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "def constraint_weights(weights):\n", 202 | "# assert weights.shape == (CONV_RES_SIZE, CONV_RES_SIZE, IMAGE_CHANNEL, CONV_RES_DEEP)\n", 203 | " mid_inx = CONV_RES_SIZE // 2\n", 204 | " weights[mid_inx, mid_inx, :, :] = 0\n", 205 | " weights = weights / np.sum(weights, axis=(0, 1))\n", 206 | " weights[mid_inx, mid_inx, :, :] = -1\n", 207 | " \n", 208 | " return weights" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "def get_weights(shape, name=\"weights\"):\n", 218 | " return tf.Variable(tf.truncated_normal(shape=shape, stddev=0.1), name=name)\n", 219 | "\n", 220 | "def get_bias(shape, name=\"bias\"):\n", 221 | " return tf.Variable(tf.constant(0.01, shape=shape), dtype=tf.float32, name = name)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "def build_nn(input_tensor, labels, dropout):\n", 231 | " with tf.variable_scope('layer1_conv_res'):\n", 232 | " conv_res_weights = get_weights([CONV_RES_SIZE, CONV_RES_SIZE, IMAGE_CHANNEL, CONV_RES_DEEP])\n", 233 | " conv_res_bias = get_bias([CONV_RES_DEEP, ])\n", 234 | " \n", 235 | " conv = tf.nn.conv2d(input_tensor, conv_res_weights, strides=[1,1,1,1], padding='VALID')\n", 236 | " layer1 = tf.nn.bias_add(conv, conv_res_bias)\n", 237 | " \n", 238 | " print(\"conv1\", layer1.get_shape().as_list())\n", 239 | " # BATHCH_SIZE, 223, 223, 12\n", 240 | " with tf.variable_scope('layer2_conv1'):\n", 241 | " conv1_wights = get_weights([CONV1_SIZE, CONV1_SIZE, CONV_RES_DEEP, CONV1_DEEP])\n", 242 | " conv1_bias = get_bias([CONV1_DEEP, ])\n", 243 | " \n", 244 | " conv1 = tf.nn.conv2d(layer1, conv1_wights, strides=[1, 2, 2, 1], padding='SAME')\n", 245 | " conv1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias))\n", 246 | " conv1 = tf.nn.lrn(conv1, depth_radius=5, bias=2, alpha=1e-4, beta=.75)\n", 247 | " layer2 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')\n", 248 | " \n", 249 | " print(\"conv2\", layer2.get_shape().as_list())\n", 250 | " # batch_size, 56, 56, 64\n", 251 | " with tf.variable_scope('layer3_conv2'):\n", 252 | " conv2_wights = get_weights([CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP])\n", 253 | " conv2_bias = get_bias([CONV2_DEEP, ])\n", 254 | " \n", 255 | " conv2 = tf.nn.conv2d(layer2, conv2_wights, strides=[1, 1, 1, 1], padding='SAME')\n", 256 | " conv2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias))\n", 257 | " conv2 = tf.nn.lrn(conv2, depth_radius=5, bias=2, alpha=1e-4, beta=.75)\n", 258 | " layer3 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')\n", 259 | " \n", 260 | " print(\"conv3\", layer3.get_shape().as_list())\n", 261 | " # batch_size, 28, 28, 48\n", 262 | " # reshape\n", 263 | " layer3_shape = layer3.get_shape().as_list()\n", 264 | " nodes = layer3_shape[1] * layer3_shape[2] * layer3_shape[3]\n", 265 | " layer3_flatten = tf.reshape(layer3, [-1, nodes])\n", 266 | "# print(layer3_shape)\n", 267 | " \n", 268 | " with tf.variable_scope(\"layer4_fc1\"):\n", 269 | " fc1_weights = get_weights([nodes, FC_SIZE1])\n", 270 | " fc1_bias = get_bias([FC_SIZE1])\n", 271 | " fc1 = tf.nn.relu(tf.matmul(layer3_flatten, fc1_weights) + fc1_bias)\n", 272 | " layer4 = tf.nn.dropout(fc1, dropout)\n", 273 | " \n", 274 | " with tf.variable_scope(\"layer5_fc2\"):\n", 275 | " fc2_weights = get_weights([FC_SIZE1, FC_SIZE2])\n", 276 | " fc2_bias = get_bias([FC_SIZE2])\n", 277 | " fc2 = tf.nn.relu(tf.matmul(layer4, fc2_weights) + fc2_bias)\n", 278 | " layer5 = tf.nn.dropout(fc2, dropout)\n", 279 | " \n", 280 | " with tf.variable_scope(\"layer6_softmax\"):\n", 281 | " softmax_weights = get_weights([FC_SIZE2, NUM_LABELS])\n", 282 | " softmax_bias = get_bias([NUM_LABELS, ])\n", 283 | " logits = tf.matmul(layer5, softmax_weights) + softmax_bias\n", 284 | " pred = tf.nn.softmax(logits)\n", 285 | " \n", 286 | " with tf.variable_scope(\"loss\"):\n", 287 | " correct = tf.equal(tf.argmax(pred, 1), tf.argmax(labels, 1))\n", 288 | " acc = tf.reduce_mean(tf.cast(correct, tf.float32))\n", 289 | " loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels))\n", 290 | " \n", 291 | "# tf.summary.scalar('loss',loss)\n", 292 | "# tf.summary.scalar('acc',acc)\n", 293 | " return loss, acc, conv_res_weights" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": { 307 | "scrolled": true 308 | }, 309 | "outputs": [], 310 | "source": [ 311 | "def trian(PRINT_LOSS_EVERY_ITE=500, PRINT_ACC_EVERY_ITE=1000):\n", 312 | " \n", 313 | " tf.reset_default_graph()\n", 314 | " \n", 315 | " train_dataset = dataset(\"tfrecords/MF_clf_train.tfrecords-*\", \n", 316 | " batch_size=64, \n", 317 | " num_epochs=50, \n", 318 | " is_shuffle=False)\n", 319 | " test_dataset = dataset(\"tfrecords/MF_clf_test.tfrecords\", \n", 320 | " batch_size=100, \n", 321 | " num_epochs=1)\n", 322 | " iterator = train_dataset.make_initializable_iterator()\n", 323 | " test_iterator = test_dataset.make_initializable_iterator()\n", 324 | " \n", 325 | " image_batch, label_batch = iterator.get_next()\n", 326 | " test_image_batch, test_label_batch = test_iterator.get_next()\n", 327 | " \n", 328 | " input_placeholder, labels_placeholder, dropout_placeholder = get_placeholder()\n", 329 | " loss, acc, weights= build_nn(input_placeholder, labels_placeholder, dropout_placeholder)\n", 330 | " \n", 331 | "# global_step = tf.Variable(0, trainable=False)\n", 332 | "# lr = tf.train.exponential_decay(1e-4, global_step, 1000, 0.95)\n", 333 | "# train = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss, global_step=global_step)\n", 334 | "# train = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9).minimize(loss)\n", 335 | " train = tf.train.AdamOptimizer(1e-4).minimize(loss)\n", 336 | " \n", 337 | " saver = tf.train.Saver()\n", 338 | " \n", 339 | " train_losses = []\n", 340 | " test_losses = []\n", 341 | " train_acces = []\n", 342 | " test_acces = []\n", 343 | " \n", 344 | " with tf.Session() as sess:\n", 345 | " sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])\n", 346 | " sess.run([iterator.initializer, test_iterator.initializer])\n", 347 | " \n", 348 | " #################### test data ####################\n", 349 | " dev_imgs, dev_labs = [], []\n", 350 | " while True:\n", 351 | " try:\n", 352 | " b_imgs, b_labs = sess.run([test_image_batch, test_label_batch])\n", 353 | " dev_imgs.append(b_imgs)\n", 354 | " dev_labs.append(b_labs)\n", 355 | " except tf.errors.OutOfRangeError:\n", 356 | " break\n", 357 | " dev_imgs = np.concatenate(dev_imgs)\n", 358 | " dev_labs = np.concatenate(dev_labs)\n", 359 | " ###################################################\n", 360 | " \n", 361 | " num_iterat = 1\n", 362 | " while True:\n", 363 | " try:\n", 364 | " _weights = sess.run(weights)\n", 365 | " weights.load(constraint_weights(_weights), sess)\n", 366 | " \n", 367 | " _image_batch, _label_batch = sess.run([image_batch, label_batch])\n", 368 | " _, _loss, _acc = sess.run([train, loss, acc], feed_dict={\n", 369 | " input_placeholder: _image_batch,\n", 370 | " labels_placeholder: _label_batch,\n", 371 | " dropout_placeholder: 0.5 })\n", 372 | " num_iterat += 1\n", 373 | " \n", 374 | " train_acces.append(_acc)\n", 375 | " train_losses.append(_loss)\n", 376 | " \n", 377 | " if num_iterat > 20000:\n", 378 | " break\n", 379 | "\n", 380 | " \n", 381 | " sys.stdout.write(\"\\r ite {:>3} train loss:{:>6.2f} train acc:{:.4f}\".format(num_iterat, _loss, _acc))\n", 382 | " if num_iterat % PRINT_LOSS_EVERY_ITE == 0:\n", 383 | " print(\"\")\n", 384 | " \n", 385 | " \n", 386 | " if num_iterat % PRINT_ACC_EVERY_ITE == 0:\n", 387 | " _loss, _acc = sess.run([loss, acc], feed_dict={\n", 388 | " input_placeholder : dev_imgs,\n", 389 | " labels_placeholder : dev_labs,\n", 390 | " dropout_placeholder : 1})\n", 391 | "\n", 392 | " test_losses.append(_loss)\n", 393 | " test_acces.append(_acc)\n", 394 | " \n", 395 | " print(\"\\ntest loss = %.5f test acc = %.6f\" % (_loss, _acc) )\n", 396 | " \n", 397 | " if _acc > 0.99:\n", 398 | " saver.save(sess, \"save/MF_clf_model.ckpt\")\n", 399 | " break\n", 400 | " except tf.errors.OutOfRangeError:\n", 401 | " _weights = sess.run(weights)\n", 402 | " weights.load(constraint_weights(_weights), sess)\n", 403 | " print(\"training end\")\n", 404 | " \n", 405 | " saver.save(sess, \"save/MF_clf_model.ckpt\")\n", 406 | " break\n", 407 | " \n", 408 | " \n", 409 | " plt.figure(figsize=(12, 8))\n", 410 | " plt.subplot(221)\n", 411 | " plt.plot(train_losses, c='b')\n", 412 | " plt.subplot(222)\n", 413 | " plt.plot(train_acces, c='b')\n", 414 | " plt.subplot(223)\n", 415 | " plt.plot(test_losses, c='r')\n", 416 | " plt.subplot(224)\n", 417 | " plt.plot(test_acces, c='r')\n", 418 | " plt.show()\n", 419 | "\n", 420 | "# trian()" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": null, 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": {}, 434 | "outputs": [], 435 | "source": [] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "metadata": {}, 440 | "source": [ 441 | "# 加载训练好的模型进行结果验证" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "metadata": {}, 448 | "outputs": [], 449 | "source": [ 450 | "# 加载模型\n", 451 | "\n", 452 | "input_placeholder, labels_placeholder, dropout_placeholder = get_placeholder()\n", 453 | "loss, acc, weights= build_nn(input_placeholder, labels_placeholder, dropout_placeholder)\n", 454 | "\n", 455 | "saver = tf.train.Saver()\n", 456 | "\n", 457 | "sess = tf.Session()\n", 458 | "saver.restore(sess, \"save/MF_clf_model.ckpt\")\n", 459 | "\n", 460 | "_weights = sess.run(weights)\n", 461 | "weights.load(constraint_weights(_weights), sess)\n", 462 | "pred = tf.get_default_graph().get_tensor_by_name(\"layer6_softmax/Softmax:0\")" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": null, 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [ 471 | "def split64(gray_img):\n", 472 | " height, width = gray_img.shape\n", 473 | " # [num_raws, num_cols] are subimage numbers on vertical or horizontal direction\n", 474 | " num_raws, num_cols = height//64, width//64\n", 475 | " start_raws, start_cols = height%64//2, width%64//2\n", 476 | " sub_imgs = []\n", 477 | " indexes = [(i, j) for i in range(num_raws) for j in range(num_cols)]\n", 478 | " for i, j in indexes:\n", 479 | " x, y = start_cols + j * 64, start_raws + i * 64\n", 480 | " sub_img = gray_img[y:y+64, x:x+64]\n", 481 | " sub_imgs.append(sub_img)\n", 482 | " return np.array(sub_imgs)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "## 在UCID小分辨率图片上测试效果" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": null, 495 | "metadata": {}, 496 | "outputs": [], 497 | "source": [ 498 | "import pandas as pd\n", 499 | "\n", 500 | "a_lst = []\n", 501 | "for f in glob(\"dataset/ucid/*.tif\"):\n", 502 | " try:\n", 503 | " g = cv2.imread(f)[:,:,1]\n", 504 | " imgs = split64(g)\n", 505 | " p_imgs = np.array([cv2.medianBlur(i, 5) for i in imgs])\n", 506 | "\n", 507 | " x = np.concatenate([imgs, p_imgs])\n", 508 | " x = x.reshape([-1, 64, 64, 1])\n", 509 | " x = x / 255\n", 510 | " y = np.zeros([x.shape[0]])\n", 511 | " y[x.shape[0]//2:] = 1\n", 512 | " \n", 513 | " y = pd.get_dummies(y).values\n", 514 | "\n", 515 | " _a = sess.run(acc, feed_dict={\n", 516 | " input_placeholder: x, \n", 517 | " labels_placeholder: y, \n", 518 | " dropout_placeholder: 1.0\n", 519 | " })\n", 520 | " a_lst.append(_a)\n", 521 | " except:\n", 522 | " continue\n", 523 | "print(\"average accurracy on UCID is %.2f%%\" % (np.array(a_lst).mean()*100) )" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "metadata": {}, 530 | "outputs": [], 531 | "source": [ 532 | "a_lst = []\n", 533 | "for f in glob(\"dataset/ucid/*.tif\"):\n", 534 | " try:\n", 535 | " g = cv2.imread(f)[:,:,1]\n", 536 | " imgs = split64(g)\n", 537 | " x = imgs.reshape([-1, 64, 64, 1])\n", 538 | " x = x / 255\n", 539 | " y = np.array([[1, 0] for _ in range(x.shape[0])])\n", 540 | "\n", 541 | " _a = sess.run(acc, feed_dict={\n", 542 | " input_placeholder: x, \n", 543 | " labels_placeholder: y, \n", 544 | " dropout_placeholder: 1.0\n", 545 | " })\n", 546 | " a_lst.append(_a)\n", 547 | " except:\n", 548 | " continue\n", 549 | "print(\"average error on UCID is %.2f%%\" % (100-np.array(a_lst).mean()*100) )" 550 | ] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": {}, 555 | "source": [ 556 | "__This means an image is recongnize as altered one if it has 7.55% or more blocks(64 by 64) diagnosised as positive , in which 7.55% is a threshold computed on UCID dataset.__ " 557 | ] 558 | }, 559 | { 560 | "cell_type": "markdown", 561 | "metadata": {}, 562 | "source": [ 563 | "## 人物照片上测试分类准确率" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": null, 569 | "metadata": {}, 570 | "outputs": [], 571 | "source": [ 572 | "g = cv2.imread(\"./yy.jpg\")[:,:,1]\n", 573 | "imgs = split64(g)\n", 574 | "p_imgs = np.array([cv2.medianBlur(i, 5) for i in imgs])\n", 575 | "\n", 576 | "x = np.concatenate([imgs, p_imgs])\n", 577 | "x = x.reshape([-1, 64, 64, 1])\n", 578 | "x = x / 255\n", 579 | "y = np.zeros([x.shape[0]])\n", 580 | "y[x.shape[0]//2:] = 1\n", 581 | "import pandas as pd\n", 582 | "y = pd.get_dummies(y).values\n", 583 | "\n", 584 | "a = sess.run(acc, feed_dict={\n", 585 | " input_placeholder: x, \n", 586 | " labels_placeholder: y, \n", 587 | " dropout_placeholder: 1.0\n", 588 | "})\n", 589 | "print(\"acc %.2f%% over %d blocks\" % (a*100, x.shape[0]))" 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "metadata": {}, 595 | "source": [ 596 | "## 检测修过图的照片" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": null, 602 | "metadata": {}, 603 | "outputs": [], 604 | "source": [ 605 | "gray_img = cv2.imread(\"me.jpg\", cv2.IMREAD_ANYCOLOR)[:,:,1]\n", 606 | "\n", 607 | "height, width = gray_img.shape\n", 608 | "# [num_raws, num_cols] are subimage numbers on vertical or horizontal direction\n", 609 | "num_raws, num_cols = height//64, width//64\n", 610 | "start_raws, start_cols = height%64//2, width%64//2\n", 611 | "img_boxes = np.copy(gray_img) # for test perpose\n", 612 | "\n", 613 | "indexes = [(i, j) for i in range(num_raws) for j in range(num_cols)]\n", 614 | "\n", 615 | "for i, j in indexes:\n", 616 | " x, y = start_cols + j * 64, start_raws + i * 64\n", 617 | " sub_img = gray_img[y:y+64, x:x+64]\n", 618 | " p = sess.run(pred, feed_dict={input_placeholder: sub_img.reshape([1,64,64,1])/255, \n", 619 | " dropout_placeholder:1.0})\n", 620 | " if np.argmax(p, 1) == 1:\n", 621 | " cv2.rectangle(img_boxes, (x, y), (x+64, y+64), (0, 255, 0), 5)\n", 622 | " \n", 623 | "plt.figure(figsize=(9, 16))\n", 624 | "plt.imshow(img_boxes, cmap=\"gray\");" 625 | ] 626 | }, 627 | { 628 | "cell_type": "markdown", 629 | "metadata": {}, 630 | "source": [ 631 | "## 原图 vs PS后图片" 632 | ] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "execution_count": null, 637 | "metadata": {}, 638 | "outputs": [], 639 | "source": [ 640 | "# 检测函数\n", 641 | "def detect(file):\n", 642 | " gray_img = cv2.imread(file, cv2.IMREAD_ANYCOLOR)[:,:,1]\n", 643 | " height, width = gray_img.shape\n", 644 | " # [num_raws, num_cols] are subimage numbers on vertical or horizontal direction\n", 645 | " num_raws, num_cols = height//64, width//64\n", 646 | " start_raws, start_cols = height%64//2, width%64//2\n", 647 | " sub_imgs = []\n", 648 | " img_boxes = np.copy(gray_img) # for test perpose\n", 649 | " indexes = [(i, j) for i in range(num_raws) for j in range(num_cols)]\n", 650 | " for i, j in indexes:\n", 651 | " x, y = start_cols + j * 64, start_raws + i * 64\n", 652 | " sub_img = gray_img[y:y+64, x:x+64]\n", 653 | " sub_imgs.append(sub_img)\n", 654 | " p = sess.run(pred, feed_dict={input_placeholder: sub_img.reshape([1,64,64,1])/255, \n", 655 | " dropout_placeholder:1.0})\n", 656 | " if np.argmax(p, 1) == 1:\n", 657 | " cv2.rectangle(img_boxes, (x, y), (x+64, y+64), (0, 255, 0), 5)\n", 658 | " return img_boxes" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": null, 664 | "metadata": {}, 665 | "outputs": [], 666 | "source": [ 667 | "r1 = detect('tt.jpg')\n", 668 | "r2 = detect('tt-ps.jpg')\n", 669 | "\n", 670 | "plt.figure(figsize=(18, 18))\n", 671 | "plt.subplot(211)\n", 672 | "plt.imshow(r1, cmap='gray')\n", 673 | "plt.title('unchange img')\n", 674 | "plt.subplot(212)\n", 675 | "plt.imshow(r2, cmap='gray')\n", 676 | "plt.title('after PS')\n", 677 | "plt.show()" 678 | ] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "execution_count": null, 683 | "metadata": {}, 684 | "outputs": [], 685 | "source": [] 686 | }, 687 | { 688 | "cell_type": "markdown", 689 | "metadata": {}, 690 | "source": [ 691 | "## residual层的特征可视化" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "metadata": {}, 698 | "outputs": [], 699 | "source": [ 700 | "res = tf.get_default_graph().get_tensor_by_name('layer1_conv_res/BiasAdd:0')" 701 | ] 702 | }, 703 | { 704 | "cell_type": "code", 705 | "execution_count": null, 706 | "metadata": {}, 707 | "outputs": [], 708 | "source": [ 709 | "f = \"dataset/ucid/ucid00004.tif\"\n", 710 | "\n", 711 | "g = cv2.imread(f)[:,:,1]\n", 712 | "imgs = split64(g)\n", 713 | "x = np.array([cv2.medianBlur(i, 5) for i in imgs])\n", 714 | "x = x.reshape([-1, 64, 64, 1])\n", 715 | "x = x / 255\n", 716 | "\n", 717 | "res_o = sess.run(res, feed_dict={input_placeholder: x})\n", 718 | "res_imgs = [[x[i,:,:,0]]+[res_o[i, :,:, j] for j in range(12)] for i in range(5)]\n", 719 | "\n", 720 | "fig, axes = plt.subplots(nrows=5, ncols=12, sharex=True, sharey=True, figsize=(12*1.5,5*1.5))\n", 721 | "\n", 722 | "for images, row in zip(res_imgs, axes):\n", 723 | " for img, ax in zip(images, row):\n", 724 | " ax.imshow(img, cmap='Greys_r')\n", 725 | " ax.get_xaxis().set_visible(False)\n", 726 | " ax.get_yaxis().set_visible(False)\n", 727 | "\n", 728 | "fig.tight_layout(pad=0.1)\n", 729 | "print(\"residual output among several changed images, x = channels, y = inputs\")\n", 730 | "plt.show()\n", 731 | "\n", 732 | "print(\"residual output among several original images\")\n", 733 | "g = cv2.imread(f)[:,:,1]\n", 734 | "imgs = split64(g)\n", 735 | "x = imgs.reshape([-1, 64, 64, 1])\n", 736 | "x = x / 255\n", 737 | "\n", 738 | "res_o = sess.run(res, feed_dict={input_placeholder: x})\n", 739 | "res_imgs = [[x[i,:,:,0]]+[res_o[i, :,:, j] for j in range(12)] for i in range(5)]\n", 740 | "\n", 741 | "fig, axes = plt.subplots(nrows=5, ncols=12, sharex=True, sharey=True, figsize=(12*1.5,5*1.5))\n", 742 | "\n", 743 | "for images, row in zip(res_imgs, axes):\n", 744 | " for img, ax in zip(images, row):\n", 745 | " ax.imshow(img, cmap='Greys_r')\n", 746 | " ax.get_xaxis().set_visible(False)\n", 747 | " ax.get_yaxis().set_visible(False)\n", 748 | "\n", 749 | "fig.tight_layout(pad=0.1)\n", 750 | "plt.show()" 751 | ] 752 | }, 753 | { 754 | "cell_type": "code", 755 | "execution_count": null, 756 | "metadata": {}, 757 | "outputs": [], 758 | "source": [] 759 | }, 760 | { 761 | "cell_type": "markdown", 762 | "metadata": {}, 763 | "source": [ 764 | "## test on a small PS image dataset find in Baidu Image" 765 | ] 766 | }, 767 | { 768 | "cell_type": "code", 769 | "execution_count": null, 770 | "metadata": {}, 771 | "outputs": [], 772 | "source": [ 773 | "results = []\n", 774 | "for f in glob(\"test_img/*.*\"):\n", 775 | "# try:\n", 776 | " g = cv2.imread(f)[:,:,1]\n", 777 | " imgs = split64(g)\n", 778 | " x = imgs.reshape([-1, 64, 64, 1])\n", 779 | " x = x / 255\n", 780 | "\n", 781 | " _p = sess.run(pred, feed_dict={\n", 782 | " input_placeholder: x, \n", 783 | " dropout_placeholder: 1.0\n", 784 | " })\n", 785 | " score = np.argmax(_p, 1).mean()\n", 786 | " results.append(score)\n", 787 | "# if score > 0.01:\n", 788 | " r1 = detect(f)\n", 789 | " plt.imshow(r1, cmap='gray')\n", 790 | " plt.show()\n", 791 | "# except:\n", 792 | "# continue\n", 793 | "results = np.array(results)" 794 | ] 795 | }, 796 | { 797 | "cell_type": "code", 798 | "execution_count": null, 799 | "metadata": {}, 800 | "outputs": [], 801 | "source": [] 802 | }, 803 | { 804 | "cell_type": "code", 805 | "execution_count": null, 806 | "metadata": {}, 807 | "outputs": [], 808 | "source": [ 809 | "[n.name for n in tf.get_default_graph().as_graph_def().node]" 810 | ] 811 | } 812 | ], 813 | "metadata": { 814 | "kernelspec": { 815 | "display_name": "Python 3", 816 | "language": "python", 817 | "name": "python3" 818 | }, 819 | "language_info": { 820 | "codemirror_mode": { 821 | "name": "ipython", 822 | "version": 3 823 | }, 824 | "file_extension": ".py", 825 | "mimetype": "text/x-python", 826 | "name": "python", 827 | "nbconvert_exporter": "python", 828 | "pygments_lexer": "ipython3", 829 | "version": "3.6.6" 830 | } 831 | }, 832 | "nbformat": 4, 833 | "nbformat_minor": 2 834 | } 835 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # A Deep Learning Approach To Universal Image Manipulation Detection Using A New Convolutional Layer 5 | 6 | # In[ ]: 7 | 8 | 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import tensorflow as tf 12 | import pandas as pd 13 | import math 14 | import sys 15 | from glob import glob 16 | import cv2 17 | 18 | 19 | # In[ ]: 20 | 21 | 22 | from tqdm import tqdm 23 | 24 | 25 | # In[ ]: 26 | 27 | 28 | def images_square_grid(images): 29 | """ 30 | Save images as a square grid 31 | :param images: Images to be used for the grid 32 | :param mode: The mode to use for images 33 | :return: Image of images in a square grid 34 | """ 35 | # Get maximum size for square grid of images 36 | save_size = math.floor(np.sqrt(images.shape[0])) 37 | 38 | # Scale to 0-255 39 | images = (((images - images.min()) * 255) / (images.max() - images.min())).astype(np.uint8) 40 | 41 | # Put images in a square arrangement 42 | images_in_square = np.reshape( 43 | images[:save_size*save_size], 44 | (save_size, save_size, images.shape[1], images.shape[2])) 45 | h = images.shape[1] 46 | w = images.shape[2] 47 | # Combine images to grid image 48 | new_im = np.ones((save_size*h, save_size*w), dtype=np.uint8) 49 | for col_i, col_images in enumerate(images_in_square): 50 | for image_i, image in enumerate(col_images): 51 | new_im[col_i * h: col_i * h + h, 52 | image_i * h: image_i * h + h] = image 53 | 54 | return new_im 55 | 56 | 57 | # In[ ]: 58 | 59 | 60 | 61 | 62 | 63 | # In[ ]: 64 | 65 | 66 | IMAGE_SIZE = 64 67 | IMAGE_CHANNEL = 1 68 | NUM_LABELS = 2 69 | 70 | CONV_RES_DEEP = 12 71 | CONV_RES_SIZE = 5 72 | 73 | CONV1_DEEP = 64 74 | CONV1_SIZE = 7 75 | 76 | CONV2_DEEP = 48 77 | CONV2_SIZE = 5 78 | 79 | FC_SIZE1 = 256 80 | FC_SIZE2 = 256 81 | 82 | 83 | # In[ ]: 84 | 85 | 86 | # generate data using a batch size 64 87 | # where input shape is (64, 227, 227, 1) 88 | def parser(record): 89 | features = tf.parse_single_example( 90 | record, 91 | features={ 92 | "image_raw" : tf.FixedLenFeature([], tf.string), 93 | "label" : tf.FixedLenFeature([], tf.int64) 94 | } 95 | ) 96 | image = tf.decode_raw(features['image_raw'], tf.uint8) 97 | # image.set_shape([256, 256]) 98 | image = tf.reshape(image, [256, 256, 1]) 99 | label = features['label'] 100 | 101 | return image, label 102 | 103 | def preprocessing(image, label): 104 | img = tf.image.resize_image_with_crop_or_pad(image, IMAGE_SIZE, IMAGE_SIZE) 105 | img = tf.image.convert_image_dtype(img, tf.float32) 106 | # scale image to 0~1 107 | # img = (((img - tf.reduce_min(img))) / (tf.reduce_max(img) - tf.reduce_min(img))) 108 | img = img / 255 109 | 110 | ont_hot = tf.one_hot(label, depth=NUM_LABELS) 111 | # label = tf.expand_dims(label , -1) 112 | 113 | return img, ont_hot 114 | 115 | def dataset(file, batch_size=32, 116 | num_epochs=1, is_shuffle=False, shuffle_buffer=10000, 117 | preprocess=preprocessing): 118 | # if train_file is None: 119 | # train_file = "mini_dataset/my_dataset/train.tfrecords" 120 | # if test_file is None: 121 | # test_file = "mini_dataset/my_dataset/test.tfrecords" 122 | input_file = tf.train.match_filenames_once(file) 123 | dataset = tf.data.TFRecordDataset(input_file) 124 | dataset = dataset.map(parser) 125 | dataset = dataset.map(preprocess) 126 | if is_shuffle: 127 | dataset = dataset.shuffle(shuffle_buffer) 128 | 129 | dataset = dataset.batch(batch_size) 130 | 131 | dataset = dataset.repeat(num_epochs) 132 | 133 | return dataset 134 | 135 | 136 | # In[ ]: 137 | 138 | 139 | # test dataset and preprocessing 140 | data = dataset("tfrecords/MF_clf_train.tfrecords-*") 141 | ite = data.make_initializable_iterator() 142 | img_batch, label_batch = ite.get_next() 143 | with tf.Session() as sess: 144 | sess.run([tf.global_variables_initializer(), 145 | tf.local_variables_initializer()]) 146 | sess.run(ite.initializer) 147 | for i in range(2): 148 | image, lab = sess.run([img_batch, label_batch]) 149 | print(" ", image.shape, image.dtype) 150 | print(" ", lab.shape, lab.dtype) 151 | 152 | plt.figure(figsize=(8,8)) 153 | plt.imshow(images_square_grid(image), cmap="gray"); 154 | 155 | 156 | # In[ ]: 157 | 158 | 159 | def get_placeholder(): 160 | input_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name='input_placeholder') 161 | 162 | labels_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, NUM_LABELS], name='label_placeholder') 163 | 164 | dropout_placeholder = tf.placeholder(dtype=tf.float32, shape=[], name='dropout_placeholder') 165 | 166 | return input_placeholder, labels_placeholder, dropout_placeholder 167 | 168 | 169 | # In[ ]: 170 | 171 | 172 | def constraint_weights(weights): 173 | # assert weights.shape == (CONV_RES_SIZE, CONV_RES_SIZE, IMAGE_CHANNEL, CONV_RES_DEEP) 174 | mid_inx = CONV_RES_SIZE // 2 175 | weights[mid_inx, mid_inx, :, :] = 0 176 | weights = weights / np.sum(weights, axis=(0, 1)) 177 | weights[mid_inx, mid_inx, :, :] = -1 178 | 179 | return weights 180 | 181 | 182 | # In[ ]: 183 | 184 | 185 | def get_weights(shape, name="weights"): 186 | return tf.Variable(tf.truncated_normal(shape=shape, stddev=0.1), name=name) 187 | 188 | def get_bias(shape, name="bias"): 189 | return tf.Variable(tf.constant(0.01, shape=shape), dtype=tf.float32, name = name) 190 | 191 | 192 | # In[ ]: 193 | 194 | 195 | def build_nn(input_tensor, labels, dropout): 196 | with tf.variable_scope('layer1_conv_res'): 197 | conv_res_weights = get_weights([CONV_RES_SIZE, CONV_RES_SIZE, IMAGE_CHANNEL, CONV_RES_DEEP]) 198 | conv_res_bias = get_bias([CONV_RES_DEEP, ]) 199 | 200 | conv = tf.nn.conv2d(input_tensor, conv_res_weights, strides=[1,1,1,1], padding='VALID') 201 | layer1 = tf.nn.bias_add(conv, conv_res_bias) 202 | 203 | print("conv1", layer1.get_shape().as_list()) 204 | # BATHCH_SIZE, 223, 223, 12 205 | with tf.variable_scope('layer2_conv1'): 206 | conv1_wights = get_weights([CONV1_SIZE, CONV1_SIZE, CONV_RES_DEEP, CONV1_DEEP]) 207 | conv1_bias = get_bias([CONV1_DEEP, ]) 208 | 209 | conv1 = tf.nn.conv2d(layer1, conv1_wights, strides=[1, 2, 2, 1], padding='SAME') 210 | conv1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias)) 211 | conv1 = tf.nn.lrn(conv1, depth_radius=5, bias=2, alpha=1e-4, beta=.75) 212 | layer2 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') 213 | 214 | print("conv2", layer2.get_shape().as_list()) 215 | # batch_size, 56, 56, 64 216 | with tf.variable_scope('layer3_conv2'): 217 | conv2_wights = get_weights([CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP]) 218 | conv2_bias = get_bias([CONV2_DEEP, ]) 219 | 220 | conv2 = tf.nn.conv2d(layer2, conv2_wights, strides=[1, 1, 1, 1], padding='SAME') 221 | conv2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias)) 222 | conv2 = tf.nn.lrn(conv2, depth_radius=5, bias=2, alpha=1e-4, beta=.75) 223 | layer3 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') 224 | 225 | print("conv3", layer3.get_shape().as_list()) 226 | # batch_size, 28, 28, 48 227 | # reshape 228 | layer3_shape = layer3.get_shape().as_list() 229 | nodes = layer3_shape[1] * layer3_shape[2] * layer3_shape[3] 230 | layer3_flatten = tf.reshape(layer3, [-1, nodes]) 231 | # print(layer3_shape) 232 | 233 | with tf.variable_scope("layer4_fc1"): 234 | fc1_weights = get_weights([nodes, FC_SIZE1]) 235 | fc1_bias = get_bias([FC_SIZE1]) 236 | fc1 = tf.nn.relu(tf.matmul(layer3_flatten, fc1_weights) + fc1_bias) 237 | layer4 = tf.nn.dropout(fc1, dropout) 238 | 239 | with tf.variable_scope("layer5_fc2"): 240 | fc2_weights = get_weights([FC_SIZE1, FC_SIZE2]) 241 | fc2_bias = get_bias([FC_SIZE2]) 242 | fc2 = tf.nn.relu(tf.matmul(layer4, fc2_weights) + fc2_bias) 243 | layer5 = tf.nn.dropout(fc2, dropout) 244 | 245 | with tf.variable_scope("layer6_softmax"): 246 | softmax_weights = get_weights([FC_SIZE2, NUM_LABELS]) 247 | softmax_bias = get_bias([NUM_LABELS, ]) 248 | logits = tf.matmul(layer5, softmax_weights) + softmax_bias 249 | pred = tf.nn.softmax(logits) 250 | 251 | with tf.variable_scope("loss"): 252 | correct = tf.equal(tf.argmax(pred, 1), tf.argmax(labels, 1)) 253 | acc = tf.reduce_mean(tf.cast(correct, tf.float32)) 254 | loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels)) 255 | 256 | # tf.summary.scalar('loss',loss) 257 | # tf.summary.scalar('acc',acc) 258 | return loss, acc, conv_res_weights 259 | 260 | 261 | # In[ ]: 262 | 263 | 264 | 265 | 266 | 267 | # In[ ]: 268 | 269 | 270 | def trian(PRINT_LOSS_EVERY_ITE=500, PRINT_ACC_EVERY_ITE=1000): 271 | 272 | tf.reset_default_graph() 273 | 274 | train_dataset = dataset("tfrecords/MF_clf_train.tfrecords-*", 275 | batch_size=64, 276 | num_epochs=50, 277 | is_shuffle=False) 278 | test_dataset = dataset("tfrecords/MF_clf_test.tfrecords", 279 | batch_size=100, 280 | num_epochs=1) 281 | iterator = train_dataset.make_initializable_iterator() 282 | test_iterator = test_dataset.make_initializable_iterator() 283 | 284 | image_batch, label_batch = iterator.get_next() 285 | test_image_batch, test_label_batch = test_iterator.get_next() 286 | 287 | input_placeholder, labels_placeholder, dropout_placeholder = get_placeholder() 288 | loss, acc, weights= build_nn(input_placeholder, labels_placeholder, dropout_placeholder) 289 | 290 | # global_step = tf.Variable(0, trainable=False) 291 | # lr = tf.train.exponential_decay(1e-4, global_step, 1000, 0.95) 292 | # train = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss, global_step=global_step) 293 | # train = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9).minimize(loss) 294 | train = tf.train.AdamOptimizer(1e-4).minimize(loss) 295 | 296 | saver = tf.train.Saver() 297 | 298 | train_losses = [] 299 | test_losses = [] 300 | train_acces = [] 301 | test_acces = [] 302 | 303 | with tf.Session() as sess: 304 | sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) 305 | sess.run([iterator.initializer, test_iterator.initializer]) 306 | 307 | #################### test data #################### 308 | dev_imgs, dev_labs = [], [] 309 | while True: 310 | try: 311 | b_imgs, b_labs = sess.run([test_image_batch, test_label_batch]) 312 | dev_imgs.append(b_imgs) 313 | dev_labs.append(b_labs) 314 | except tf.errors.OutOfRangeError: 315 | break 316 | dev_imgs = np.concatenate(dev_imgs) 317 | dev_labs = np.concatenate(dev_labs) 318 | ################################################### 319 | 320 | num_iterat = 1 321 | while True: 322 | try: 323 | _weights = sess.run(weights) 324 | weights.load(constraint_weights(_weights), sess) 325 | 326 | _image_batch, _label_batch = sess.run([image_batch, label_batch]) 327 | _, _loss, _acc = sess.run([train, loss, acc], feed_dict={ 328 | input_placeholder: _image_batch, 329 | labels_placeholder: _label_batch, 330 | dropout_placeholder: 0.5 }) 331 | num_iterat += 1 332 | 333 | train_acces.append(_acc) 334 | train_losses.append(_loss) 335 | 336 | if num_iterat > 20000: 337 | break 338 | 339 | 340 | sys.stdout.write("\r ite {:>3} train loss:{:>6.2f} train acc:{:.4f}".format(num_iterat, _loss, _acc)) 341 | if num_iterat % PRINT_LOSS_EVERY_ITE == 0: 342 | print("") 343 | 344 | 345 | if num_iterat % PRINT_ACC_EVERY_ITE == 0: 346 | _loss, _acc = sess.run([loss, acc], feed_dict={ 347 | input_placeholder : dev_imgs, 348 | labels_placeholder : dev_labs, 349 | dropout_placeholder : 1}) 350 | 351 | test_losses.append(_loss) 352 | test_acces.append(_acc) 353 | 354 | print("\ntest loss = %.5f test acc = %.6f" % (_loss, _acc) ) 355 | 356 | if _acc > 0.99: 357 | saver.save(sess, "save/MF_clf_model.ckpt") 358 | break 359 | except tf.errors.OutOfRangeError: 360 | _weights = sess.run(weights) 361 | weights.load(constraint_weights(_weights), sess) 362 | print("training end") 363 | 364 | saver.save(sess, "save/MF_clf_model.ckpt") 365 | break 366 | 367 | 368 | plt.figure(figsize=(12, 8)) 369 | plt.subplot(221) 370 | plt.plot(train_losses, c='b') 371 | plt.subplot(222) 372 | plt.plot(train_acces, c='b') 373 | plt.subplot(223) 374 | plt.plot(test_losses, c='r') 375 | plt.subplot(224) 376 | plt.plot(test_acces, c='r') 377 | plt.show() 378 | 379 | # trian() 380 | 381 | 382 | # In[ ]: 383 | 384 | 385 | 386 | 387 | 388 | # In[ ]: 389 | 390 | 391 | 392 | 393 | 394 | # # 加载训练好的模型进行结果验证 395 | 396 | # In[ ]: 397 | 398 | 399 | # 加载模型 400 | 401 | input_placeholder, labels_placeholder, dropout_placeholder = get_placeholder() 402 | loss, acc, weights= build_nn(input_placeholder, labels_placeholder, dropout_placeholder) 403 | 404 | saver = tf.train.Saver() 405 | 406 | sess = tf.Session() 407 | saver.restore(sess, "save/MF_clf_model.ckpt") 408 | 409 | _weights = sess.run(weights) 410 | weights.load(constraint_weights(_weights), sess) 411 | pred = tf.get_default_graph().get_tensor_by_name("layer6_softmax/Softmax:0") 412 | 413 | 414 | # In[ ]: 415 | 416 | 417 | def split64(gray_img): 418 | height, width = gray_img.shape 419 | # [num_raws, num_cols] are subimage numbers on vertical or horizontal direction 420 | num_raws, num_cols = height//64, width//64 421 | start_raws, start_cols = height%64//2, width%64//2 422 | sub_imgs = [] 423 | indexes = [(i, j) for i in range(num_raws) for j in range(num_cols)] 424 | for i, j in indexes: 425 | x, y = start_cols + j * 64, start_raws + i * 64 426 | sub_img = gray_img[y:y+64, x:x+64] 427 | sub_imgs.append(sub_img) 428 | return np.array(sub_imgs) 429 | 430 | 431 | # ## 在UCID小分辨率图片上测试效果 432 | 433 | # In[ ]: 434 | 435 | 436 | import pandas as pd 437 | 438 | a_lst = [] 439 | for f in glob("dataset/ucid/*.tif"): 440 | try: 441 | g = cv2.imread(f)[:,:,1] 442 | imgs = split64(g) 443 | p_imgs = np.array([cv2.medianBlur(i, 5) for i in imgs]) 444 | 445 | x = np.concatenate([imgs, p_imgs]) 446 | x = x.reshape([-1, 64, 64, 1]) 447 | x = x / 255 448 | y = np.zeros([x.shape[0]]) 449 | y[x.shape[0]//2:] = 1 450 | 451 | y = pd.get_dummies(y).values 452 | 453 | _a = sess.run(acc, feed_dict={ 454 | input_placeholder: x, 455 | labels_placeholder: y, 456 | dropout_placeholder: 1.0 457 | }) 458 | a_lst.append(_a) 459 | except: 460 | continue 461 | print("average accurracy on UCID is %.2f%%" % (np.array(a_lst).mean()*100) ) 462 | 463 | 464 | # In[ ]: 465 | 466 | 467 | a_lst = [] 468 | for f in glob("dataset/ucid/*.tif"): 469 | try: 470 | g = cv2.imread(f)[:,:,1] 471 | imgs = split64(g) 472 | x = imgs.reshape([-1, 64, 64, 1]) 473 | x = x / 255 474 | y = np.array([[1, 0] for _ in range(x.shape[0])]) 475 | 476 | _a = sess.run(acc, feed_dict={ 477 | input_placeholder: x, 478 | labels_placeholder: y, 479 | dropout_placeholder: 1.0 480 | }) 481 | a_lst.append(_a) 482 | except: 483 | continue 484 | print("average error on UCID is %.2f%%" % (100-np.array(a_lst).mean()*100) ) 485 | 486 | 487 | # __This means an image is recongnize as altered one if it has 7.55% or more blocks(64 by 64) diagnosised as positive , in which 7.55% is a threshold computed on UCID dataset.__ 488 | 489 | # ## 人物照片上测试分类准确率 490 | 491 | # In[ ]: 492 | 493 | 494 | g = cv2.imread("./yy.jpg")[:,:,1] 495 | imgs = split64(g) 496 | p_imgs = np.array([cv2.medianBlur(i, 5) for i in imgs]) 497 | 498 | x = np.concatenate([imgs, p_imgs]) 499 | x = x.reshape([-1, 64, 64, 1]) 500 | x = x / 255 501 | y = np.zeros([x.shape[0]]) 502 | y[x.shape[0]//2:] = 1 503 | import pandas as pd 504 | y = pd.get_dummies(y).values 505 | 506 | a = sess.run(acc, feed_dict={ 507 | input_placeholder: x, 508 | labels_placeholder: y, 509 | dropout_placeholder: 1.0 510 | }) 511 | print("acc %.2f%% over %d blocks" % (a*100, x.shape[0])) 512 | 513 | 514 | # ## 检测修过图的照片 515 | 516 | # In[ ]: 517 | 518 | 519 | gray_img = cv2.imread("me.jpg", cv2.IMREAD_ANYCOLOR)[:,:,1] 520 | 521 | height, width = gray_img.shape 522 | # [num_raws, num_cols] are subimage numbers on vertical or horizontal direction 523 | num_raws, num_cols = height//64, width//64 524 | start_raws, start_cols = height%64//2, width%64//2 525 | img_boxes = np.copy(gray_img) # for test perpose 526 | 527 | indexes = [(i, j) for i in range(num_raws) for j in range(num_cols)] 528 | 529 | for i, j in indexes: 530 | x, y = start_cols + j * 64, start_raws + i * 64 531 | sub_img = gray_img[y:y+64, x:x+64] 532 | p = sess.run(pred, feed_dict={input_placeholder: sub_img.reshape([1,64,64,1])/255, 533 | dropout_placeholder:1.0}) 534 | if np.argmax(p, 1) == 1: 535 | cv2.rectangle(img_boxes, (x, y), (x+64, y+64), (0, 255, 0), 5) 536 | 537 | plt.figure(figsize=(9, 16)) 538 | plt.imshow(img_boxes, cmap="gray"); 539 | 540 | 541 | # ## 原图 vs PS后图片 542 | 543 | # In[ ]: 544 | 545 | 546 | # 检测函数 547 | def detect(file): 548 | gray_img = cv2.imread(file, cv2.IMREAD_ANYCOLOR)[:,:,1] 549 | height, width = gray_img.shape 550 | # [num_raws, num_cols] are subimage numbers on vertical or horizontal direction 551 | num_raws, num_cols = height//64, width//64 552 | start_raws, start_cols = height%64//2, width%64//2 553 | sub_imgs = [] 554 | img_boxes = np.copy(gray_img) # for test perpose 555 | indexes = [(i, j) for i in range(num_raws) for j in range(num_cols)] 556 | for i, j in indexes: 557 | x, y = start_cols + j * 64, start_raws + i * 64 558 | sub_img = gray_img[y:y+64, x:x+64] 559 | sub_imgs.append(sub_img) 560 | p = sess.run(pred, feed_dict={input_placeholder: sub_img.reshape([1,64,64,1])/255, 561 | dropout_placeholder:1.0}) 562 | if np.argmax(p, 1) == 1: 563 | cv2.rectangle(img_boxes, (x, y), (x+64, y+64), (0, 255, 0), 5) 564 | return img_boxes 565 | 566 | 567 | # In[ ]: 568 | 569 | 570 | r1 = detect('tt.jpg') 571 | r2 = detect('tt-ps.jpg') 572 | 573 | plt.figure(figsize=(18, 18)) 574 | plt.subplot(211) 575 | plt.imshow(r1, cmap='gray') 576 | plt.title('unchange img') 577 | plt.subplot(212) 578 | plt.imshow(r2, cmap='gray') 579 | plt.title('after PS') 580 | plt.show() 581 | 582 | 583 | # In[ ]: 584 | 585 | 586 | 587 | 588 | 589 | # ## residual层的特征可视化 590 | 591 | # In[ ]: 592 | 593 | 594 | res = tf.get_default_graph().get_tensor_by_name('layer1_conv_res/BiasAdd:0') 595 | 596 | 597 | # In[ ]: 598 | 599 | 600 | f = "dataset/ucid/ucid00004.tif" 601 | 602 | g = cv2.imread(f)[:,:,1] 603 | imgs = split64(g) 604 | x = np.array([cv2.medianBlur(i, 5) for i in imgs]) 605 | x = x.reshape([-1, 64, 64, 1]) 606 | x = x / 255 607 | 608 | res_o = sess.run(res, feed_dict={input_placeholder: x}) 609 | res_imgs = [[x[i,:,:,0]]+[res_o[i, :,:, j] for j in range(12)] for i in range(5)] 610 | 611 | fig, axes = plt.subplots(nrows=5, ncols=12, sharex=True, sharey=True, figsize=(12*1.5,5*1.5)) 612 | 613 | for images, row in zip(res_imgs, axes): 614 | for img, ax in zip(images, row): 615 | ax.imshow(img, cmap='Greys_r') 616 | ax.get_xaxis().set_visible(False) 617 | ax.get_yaxis().set_visible(False) 618 | 619 | fig.tight_layout(pad=0.1) 620 | print("residual output among several changed images, x = channels, y = inputs") 621 | plt.show() 622 | 623 | print("residual output among several original images") 624 | g = cv2.imread(f)[:,:,1] 625 | imgs = split64(g) 626 | x = imgs.reshape([-1, 64, 64, 1]) 627 | x = x / 255 628 | 629 | res_o = sess.run(res, feed_dict={input_placeholder: x}) 630 | res_imgs = [[x[i,:,:,0]]+[res_o[i, :,:, j] for j in range(12)] for i in range(5)] 631 | 632 | fig, axes = plt.subplots(nrows=5, ncols=12, sharex=True, sharey=True, figsize=(12*1.5,5*1.5)) 633 | 634 | for images, row in zip(res_imgs, axes): 635 | for img, ax in zip(images, row): 636 | ax.imshow(img, cmap='Greys_r') 637 | ax.get_xaxis().set_visible(False) 638 | ax.get_yaxis().set_visible(False) 639 | 640 | fig.tight_layout(pad=0.1) 641 | plt.show() 642 | 643 | 644 | # In[ ]: 645 | 646 | 647 | 648 | 649 | 650 | # ## test on a small PS image dataset find in Baidu Image 651 | 652 | # In[ ]: 653 | 654 | 655 | results = [] 656 | for f in glob("test_img/*.*"): 657 | # try: 658 | g = cv2.imread(f)[:,:,1] 659 | imgs = split64(g) 660 | x = imgs.reshape([-1, 64, 64, 1]) 661 | x = x / 255 662 | 663 | _p = sess.run(pred, feed_dict={ 664 | input_placeholder: x, 665 | dropout_placeholder: 1.0 666 | }) 667 | score = np.argmax(_p, 1).mean() 668 | results.append(score) 669 | # if score > 0.01: 670 | r1 = detect(f) 671 | plt.imshow(r1, cmap='gray') 672 | plt.show() 673 | # except: 674 | # continue 675 | results = np.array(results) 676 | 677 | 678 | # In[ ]: 679 | 680 | 681 | 682 | 683 | 684 | # In[ ]: 685 | 686 | 687 | [n.name for n in tf.get_default_graph().as_graph_def().node] 688 | 689 | -------------------------------------------------------------------------------- /save/MF_clf_model.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dawson-chen/image_manipulation_detector/f71d4b08c35a107e57b40f22b9aeb6adf3f9241c/save/MF_clf_model.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /save/MF_clf_model.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dawson-chen/image_manipulation_detector/f71d4b08c35a107e57b40f22b9aeb6adf3f9241c/save/MF_clf_model.ckpt.index -------------------------------------------------------------------------------- /save/MF_clf_model.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dawson-chen/image_manipulation_detector/f71d4b08c35a107e57b40f22b9aeb6adf3f9241c/save/MF_clf_model.ckpt.meta -------------------------------------------------------------------------------- /save/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "MF_clf_model.ckpt" 2 | all_model_checkpoint_paths: "MF_clf_model.ckpt" 3 | -------------------------------------------------------------------------------- /tfrecords.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "np.random.seed(2018)\n", 11 | "import cv2\n", 12 | "import tensorflow as tf\n", 13 | "from tqdm import tqdm\n", 14 | "from glob import glob\n", 15 | "import os" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "def split_256(gray_img):\n", 25 | " height, width = gray_img.shape\n", 26 | " num_raws, num_cols = height//256, width//256\n", 27 | " start_raws, start_cols = height%256//2, width%256//2\n", 28 | " \n", 29 | " sub_imgs = []\n", 30 | " \n", 31 | " indexes = [(i, j) for i in range(num_raws) for j in range(num_cols)]\n", 32 | " \n", 33 | " for i, j in indexes:\n", 34 | " x, y = start_cols + j * 256, start_raws + i * 256\n", 35 | " sub_img = gray_img[y:y+256, x:x+256]\n", 36 | " sub_imgs.append(sub_img)\n", 37 | " return np.array(sub_imgs)\n", 38 | "\n", 39 | "\n", 40 | "def images_square_grid(images):\n", 41 | " \"\"\"\n", 42 | " Save images as a square grid\n", 43 | " :param images: Images to be used for the grid\n", 44 | " :param mode: The mode to use for images\n", 45 | " :return: Image of images in a square grid\n", 46 | " \"\"\"\n", 47 | " # Get maximum size for square grid of images\n", 48 | " save_size = math.floor(np.sqrt(images.shape[0]))\n", 49 | "\n", 50 | " images = (((images - images.min()) * 255) / (images.max() - images.min())).astype(np.uint8)\n", 51 | "\n", 52 | " images_in_square = np.reshape(\n", 53 | " images[:save_size*save_size],\n", 54 | " (save_size, save_size, images.shape[1], images.shape[2]))\n", 55 | "\n", 56 | " # Combine images to grid image\n", 57 | " new_im = np.ones((save_size*256, save_size*256), dtype=np.uint8)\n", 58 | " for col_i, col_images in enumerate(images_in_square):\n", 59 | " for image_i, image in enumerate(col_images):\n", 60 | " new_im[col_i * 256: col_i * 256 + 256, \n", 61 | " image_i * 256: image_i * 256 + 256] = image\n", 62 | "\n", 63 | " return new_im" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "img_file_lst = glob(\"dataset/ddimgdb/*\")\n", 73 | "np.random.shuffle(img_file_lst)\n", 74 | "print(\" %d items\" % len(img_file_lst))\n", 75 | "print(\" show top 5 items\")\n", 76 | "print(\"\\n\".join(img_file_lst[:5]))\n", 77 | "\n", 78 | "output_path= \"dataset/db256_1\"\n", 79 | "if not os.path.exists(output_path):\n", 80 | " os.mkdir(output_path)\n", 81 | "\n", 82 | "all_files = len(img_file_lst)\n", 83 | "\n", 84 | "for i_file, file in tqdm(enumerate(img_file_lst), total=all_files):\n", 85 | " name = file.split(\"\\\\\")[1].split(\".\")[0]\n", 86 | " \n", 87 | " img = cv2.imread(file, cv2.IMREAD_COLOR)\n", 88 | " gray_img = cv2.split(img)[1] # r,g,b split\n", 89 | " \n", 90 | " sub_imgs = split_256(gray_img)\n", 91 | " nums_imgs = len(sub_imgs)\n", 92 | " \n", 93 | " for i, img in enumerate(sub_imgs):\n", 94 | " cv2.imwrite(\"%s/%s-%.3d-of-%.3d.jpg\" % (output_path, name, i+1, nums_imgs), img)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "def __int64_feature(value):\n", 111 | " return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))\n", 112 | "\n", 113 | "def __bytes_feature(value):\n", 114 | " return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "files = glob(output_path)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "n_file = files[:50000]\n", 133 | "p_file = [\"TAG:positive\" + each for each in n_file]" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "train_file = n_file + p_file\n", 143 | "np.random.shuffle(train_file)\n", 144 | "\n", 145 | "test_file = files[-800:] + [\"TAG:positive\"+each for each in files[-800:]]\n", 146 | "np.random.shuffle(test_file)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "print(\"train size:\", len(train_file), \" test size:\", len(test_file))" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "instances_per_shard = 5000\n", 165 | "num_shards = len(train_file) // instances_per_shard\n", 166 | "\n", 167 | "for i in range(num_shards):\n", 168 | " \n", 169 | " tfrecord = \"tfrecords/MF_clf_train.tfrecords-%.2d-of-%.2d\" % (i+1, num_shards)\n", 170 | " writer = tf.python_io.TFRecordWriter(tfrecord)\n", 171 | " \n", 172 | " s_i = instances_per_shard * i\n", 173 | " e_i = s_i + instances_per_shard\n", 174 | " for file in tqdm( train_file[s_i : e_i] ):\n", 175 | " if file.startswith(\"TAG:positive\"):\n", 176 | " img = cv2.imread(file[12:], cv2.IMREAD_GRAYSCALE)\n", 177 | " img = cv2.medianBlur(img, ksize=5)\n", 178 | "\n", 179 | " label = 1\n", 180 | " else:\n", 181 | " img = cv2.imread(file, cv2.IMREAD_GRAYSCALE)\n", 182 | " label = 0\n", 183 | " \n", 184 | " image_raw = img.reshape([256 * 256])\n", 185 | " image_raw = image_raw.tostring()\n", 186 | "\n", 187 | " example = tf.train.Example(features=tf.train.Features(feature={\n", 188 | " 'image_raw':__bytes_feature(image_raw),\n", 189 | " 'label':__int64_feature(label),\n", 190 | " 'path':__bytes_feature( bytes(file, encoding='utf-8') ) \n", 191 | " }))\n", 192 | " writer.write(example.SerializeToString())\n", 193 | " writer.close()" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "instances_per_shard = 5000\n", 203 | "num_shards = len(test_file) // instances_per_shard\n", 204 | "\n", 205 | "tfrecord = \"tfrecords/MF_clf_test.tfrecords\"\n", 206 | "writer = tf.python_io.TFRecordWriter(tfrecord)\n", 207 | "\n", 208 | "for file in tqdm( test_file ):\n", 209 | " if file.startswith(\"TAG:positive\"):\n", 210 | " img = cv2.imread(file[12:], cv2.IMREAD_GRAYSCALE)\n", 211 | " img = cv2.medianBlur(img, ksize=5)\n", 212 | "\n", 213 | " label = 1\n", 214 | " else:\n", 215 | " img = cv2.imread(file, cv2.IMREAD_GRAYSCALE)\n", 216 | " label = 0\n", 217 | "\n", 218 | " image_raw = img.reshape([256 * 256])\n", 219 | " image_raw = image_raw.tostring()\n", 220 | "\n", 221 | " example = tf.train.Example(features=tf.train.Features(feature={\n", 222 | " 'image_raw':__bytes_feature(image_raw),\n", 223 | " 'label':__int64_feature(label),\n", 224 | " 'path':__bytes_feature( bytes(file, encoding='utf-8') ) \n", 225 | " }))\n", 226 | " writer.write(example.SerializeToString())\n", 227 | "writer.close()" 228 | ] 229 | } 230 | ], 231 | "metadata": { 232 | "kernelspec": { 233 | "display_name": "Python 3", 234 | "language": "python", 235 | "name": "python3" 236 | }, 237 | "language_info": { 238 | "codemirror_mode": { 239 | "name": "ipython", 240 | "version": 3 241 | }, 242 | "file_extension": ".py", 243 | "mimetype": "text/x-python", 244 | "name": "python", 245 | "nbconvert_exporter": "python", 246 | "pygments_lexer": "ipython3", 247 | "version": "3.6.6" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 2 252 | } 253 | -------------------------------------------------------------------------------- /tfrecords.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[ ]: 5 | 6 | 7 | import numpy as np 8 | np.random.seed(2018) 9 | import cv2 10 | import tensorflow as tf 11 | from tqdm import tqdm 12 | from glob import glob 13 | import os 14 | 15 | 16 | # In[ ]: 17 | 18 | 19 | def split_256(gray_img): 20 | height, width = gray_img.shape 21 | num_raws, num_cols = height//256, width//256 22 | start_raws, start_cols = height%256//2, width%256//2 23 | 24 | sub_imgs = [] 25 | 26 | indexes = [(i, j) for i in range(num_raws) for j in range(num_cols)] 27 | 28 | for i, j in indexes: 29 | x, y = start_cols + j * 256, start_raws + i * 256 30 | sub_img = gray_img[y:y+256, x:x+256] 31 | sub_imgs.append(sub_img) 32 | return np.array(sub_imgs) 33 | 34 | 35 | def images_square_grid(images): 36 | """ 37 | Save images as a square grid 38 | :param images: Images to be used for the grid 39 | :param mode: The mode to use for images 40 | :return: Image of images in a square grid 41 | """ 42 | # Get maximum size for square grid of images 43 | save_size = math.floor(np.sqrt(images.shape[0])) 44 | 45 | images = (((images - images.min()) * 255) / (images.max() - images.min())).astype(np.uint8) 46 | 47 | images_in_square = np.reshape( 48 | images[:save_size*save_size], 49 | (save_size, save_size, images.shape[1], images.shape[2])) 50 | 51 | # Combine images to grid image 52 | new_im = np.ones((save_size*256, save_size*256), dtype=np.uint8) 53 | for col_i, col_images in enumerate(images_in_square): 54 | for image_i, image in enumerate(col_images): 55 | new_im[col_i * 256: col_i * 256 + 256, 56 | image_i * 256: image_i * 256 + 256] = image 57 | 58 | return new_im 59 | 60 | 61 | # In[ ]: 62 | 63 | 64 | img_file_lst = glob("dataset/ddimgdb/*") 65 | np.random.shuffle(img_file_lst) 66 | print(" %d items" % len(img_file_lst)) 67 | print(" show top 5 items") 68 | print("\n".join(img_file_lst[:5])) 69 | 70 | output_path= "dataset/db256_1" 71 | if not os.path.exists(output_path): 72 | os.mkdir(output_path) 73 | 74 | all_files = len(img_file_lst) 75 | 76 | for i_file, file in tqdm(enumerate(img_file_lst), total=all_files): 77 | name = file.split("\\")[1].split(".")[0] 78 | 79 | img = cv2.imread(file, cv2.IMREAD_COLOR) 80 | gray_img = cv2.split(img)[1] # r,g,b split 81 | 82 | sub_imgs = split_256(gray_img) 83 | nums_imgs = len(sub_imgs) 84 | 85 | for i, img in enumerate(sub_imgs): 86 | cv2.imwrite("%s/%s-%.3d-of-%.3d.jpg" % (output_path, name, i+1, nums_imgs), img) 87 | 88 | 89 | # In[ ]: 90 | 91 | 92 | 93 | 94 | 95 | # In[ ]: 96 | 97 | 98 | def __int64_feature(value): 99 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 100 | 101 | def __bytes_feature(value): 102 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 103 | 104 | 105 | # In[ ]: 106 | 107 | 108 | files = glob(output_path) 109 | 110 | 111 | # In[ ]: 112 | 113 | 114 | n_file = files[:50000] 115 | p_file = ["TAG:positive" + each for each in n_file] 116 | 117 | 118 | # In[ ]: 119 | 120 | 121 | train_file = n_file + p_file 122 | np.random.shuffle(train_file) 123 | 124 | test_file = files[-800:] + ["TAG:positive"+each for each in files[-800:]] 125 | np.random.shuffle(test_file) 126 | 127 | 128 | # In[ ]: 129 | 130 | 131 | print("train size:", len(train_file), " test size:", len(test_file)) 132 | 133 | 134 | # In[ ]: 135 | 136 | 137 | instances_per_shard = 5000 138 | num_shards = len(train_file) // instances_per_shard 139 | 140 | for i in range(num_shards): 141 | 142 | tfrecord = "tfrecords/MF_clf_train.tfrecords-%.2d-of-%.2d" % (i+1, num_shards) 143 | writer = tf.python_io.TFRecordWriter(tfrecord) 144 | 145 | s_i = instances_per_shard * i 146 | e_i = s_i + instances_per_shard 147 | for file in tqdm( train_file[s_i : e_i] ): 148 | if file.startswith("TAG:positive"): 149 | img = cv2.imread(file[12:], cv2.IMREAD_GRAYSCALE) 150 | img = cv2.medianBlur(img, ksize=5) 151 | 152 | label = 1 153 | else: 154 | img = cv2.imread(file, cv2.IMREAD_GRAYSCALE) 155 | label = 0 156 | 157 | image_raw = img.reshape([256 * 256]) 158 | image_raw = image_raw.tostring() 159 | 160 | example = tf.train.Example(features=tf.train.Features(feature={ 161 | 'image_raw':__bytes_feature(image_raw), 162 | 'label':__int64_feature(label), 163 | 'path':__bytes_feature( bytes(file, encoding='utf-8') ) 164 | })) 165 | writer.write(example.SerializeToString()) 166 | writer.close() 167 | 168 | 169 | # In[ ]: 170 | 171 | 172 | instances_per_shard = 5000 173 | num_shards = len(test_file) // instances_per_shard 174 | 175 | tfrecord = "tfrecords/MF_clf_test.tfrecords" 176 | writer = tf.python_io.TFRecordWriter(tfrecord) 177 | 178 | for file in tqdm( test_file ): 179 | if file.startswith("TAG:positive"): 180 | img = cv2.imread(file[12:], cv2.IMREAD_GRAYSCALE) 181 | img = cv2.medianBlur(img, ksize=5) 182 | 183 | label = 1 184 | else: 185 | img = cv2.imread(file, cv2.IMREAD_GRAYSCALE) 186 | label = 0 187 | 188 | image_raw = img.reshape([256 * 256]) 189 | image_raw = image_raw.tostring() 190 | 191 | example = tf.train.Example(features=tf.train.Features(feature={ 192 | 'image_raw':__bytes_feature(image_raw), 193 | 'label':__int64_feature(label), 194 | 'path':__bytes_feature( bytes(file, encoding='utf-8') ) 195 | })) 196 | writer.write(example.SerializeToString()) 197 | writer.close() 198 | 199 | --------------------------------------------------------------------------------