├── _config.yml ├── .gitattributes ├── diva-service ├── run.json ├── upload.json └── install.json ├── docker ├── code │ ├── mask.png │ ├── input.png │ ├── fcnn_bin.h5 │ ├── script.sh │ └── test_IoU.py └── Dockerfile ├── frontend ├── goodfcn.png ├── index.html ├── style.css ├── script-js.js └── upload.php ├── FCN ├── models │ ├── fcnn_bin.h5 │ └── fcnn_bin_simple.h5 ├── hooks │ └── hook-skimage.io.py ├── post.py ├── dataset-convertion │ └── crop.py ├── img_utils.py ├── fcn_helper_function.py ├── fcnn-light-1.py ├── fcnn-light-2.py ├── test_classifier.py └── classifier_fcnn.py ├── LICENSE ├── .gitignore ├── environment.yml └── README.md /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-minimal -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /diva-service/run.json: -------------------------------------------------------------------------------- 1 | {"data":[{"inputImage": "binary-image/AS_1948_00_00006.bin.png"}]} 2 | -------------------------------------------------------------------------------- /docker/code/mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jumpst3r/printed-hw-segmentation/HEAD/docker/code/mask.png -------------------------------------------------------------------------------- /frontend/goodfcn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jumpst3r/printed-hw-segmentation/HEAD/frontend/goodfcn.png -------------------------------------------------------------------------------- /FCN/models/fcnn_bin.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jumpst3r/printed-hw-segmentation/HEAD/FCN/models/fcnn_bin.h5 -------------------------------------------------------------------------------- /docker/code/input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jumpst3r/printed-hw-segmentation/HEAD/docker/code/input.png -------------------------------------------------------------------------------- /docker/code/fcnn_bin.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jumpst3r/printed-hw-segmentation/HEAD/docker/code/fcnn_bin.h5 -------------------------------------------------------------------------------- /FCN/models/fcnn_bin_simple.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jumpst3r/printed-hw-segmentation/HEAD/FCN/models/fcnn_bin_simple.h5 -------------------------------------------------------------------------------- /FCN/hooks/hook-skimage.io.py: -------------------------------------------------------------------------------- 1 | from PyInstaller.utils.hooks import collect_data_files, collect_submodules 2 | 3 | datas = collect_data_files("skimage.io._plugins") 4 | hiddenimports = collect_submodules('skimage.io._plugins') 5 | -------------------------------------------------------------------------------- /docker/code/script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | inputImage=${1} 3 | outputFolder=${2} 4 | 5 | /input/printed-hw-segmentation --enableCRF ${inputImage} ${outputFolder} 6 | #remove non CRF version (ugly fix) 7 | rm /output/fcn_out.png 8 | -------------------------------------------------------------------------------- /diva-service/upload.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "binary-image", 3 | "files":[ 4 | { 5 | "type":"url", 6 | "value":"http://134.21.72.190:8080/files/binary-image/original/AS_1948_00_00006.bin.png" 7 | } 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:cosmic 2 | LABEL maintainer="nicolas.dutly@unifr.ch" 3 | RUN apt-get update && apt-get install libxcb1 libexpat1 dos2unix libgl1-mesa-glx --no-install-recommends -y && rm -rf /var/lib/apt/lists/* 4 | COPY ./code/* /input/ 5 | ADD https://github.com/Jumpst3r/printed-hw-segmentation/releases/download/v1.0/printed-hw-segmentation /input/ 6 | RUN cd /input && chmod 777 printed-hw-segmentation && chmod +x script.sh && dos2unix script.sh && mkdir models && mv fcnn_bin.h5 models/ 7 | WORKDIR /input/ 8 | -------------------------------------------------------------------------------- /frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |

8 | Printed-handwritten segmentation using DIVA services 9 |

10 |
11 |
12 | 13 | 14 |
15 |
16 | 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Nicolas Dutly 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docker/code/test_IoU.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | import numpy as np 4 | import pytest 5 | import skimage.io as io 6 | 7 | 8 | class Test: 9 | 10 | def test_runnable(self): 11 | return_code = subprocess.call("./printed-hw-segmentation input.png ./", shell=True) 12 | assert return_code == 0, 'Launcher script or python runnable failed' 13 | print("Successfully ran python runnable") 14 | 15 | 16 | 17 | 18 | def test_IoU(self): 19 | THRESH = 0.95 20 | def get_IoU(prediction, target): 21 | intersection = np.logical_and(target, prediction) 22 | union = np.logical_or(target, prediction) 23 | return float(np.sum(intersection)) / float(np.sum(union)) 24 | 25 | im_output = io.imread('fcn_out_post.png') 26 | im_mask = io.imread('mask.png') 27 | # Compute label-wise IoU scores 28 | IoUs = [] 29 | for channel in range(3): 30 | IoUs.append(get_IoU(im_output[:, :, channel], im_mask[:, :, channel])) 31 | IoUs = np.array(IoUs) 32 | assert IoUs[0] > 0.7, 'IoU for label [printed] on trained data is less than 0.7 (IoU=' + str(IoUs[0]) + ')' 33 | assert IoUs[1] > 0.7, 'IoU for label [handwritten] on trained data is less than 0.7 (IoU=' + str( 34 | IoUs[1]) + ')' 35 | assert IoUs[2] > 0.7, 'IoU for label [background] on trained data is less than 0.7 (IoU=' + str(IoUs[2]) + ')' 36 | print("IoU test passed with threshold [" + str(THRESH) + "]") 37 | print("IoU for label [printed]: " + str(IoUs[0])) 38 | print("IoU for label [handwritten]: " + str(IoUs[1])) 39 | print("IoU for label [background]: " + str(IoUs[2])) 40 | print("Mean IoU: " + str(IoUs.mean())) 41 | -------------------------------------------------------------------------------- /FCN/post.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pydensecrf.densecrf as dcrf 3 | from pydensecrf.utils import (unary_from_softmax) 4 | from skimage.color import gray2rgb, rgba2rgb 5 | from skimage import img_as_ubyte 6 | import numpy.random as rd 7 | NB_ITERATIONS = 10 8 | 9 | """ 10 | Function which returns the labelled image after applying CRF 11 | adapted from https://github.com/lucasb-eyer/pydensecrf/tree/master/pydensecrf 12 | """ 13 | 14 | 15 | def crf(original_image, annotated_image): 16 | rd.seed(123) 17 | if len(original_image.shape) < 3: 18 | original_image = gray2rgb(original_image) 19 | if len(original_image.shape) == 3 and original_image.shape[2]==4: 20 | original_image = rgba2rgb(original_image) 21 | original_image = img_as_ubyte(original_image) 22 | annotated_image = np.moveaxis(annotated_image, -1, 0) 23 | annotated_image = annotated_image.copy(order='C') 24 | 25 | d = dcrf.DenseCRF2D(original_image.shape[1], original_image.shape[0], 3) 26 | 27 | U = unary_from_softmax(annotated_image) 28 | d.setUnaryEnergy(U) 29 | 30 | d.addPairwiseGaussian(sxy=(3, 3), compat=3, kernel=dcrf.DIAG_KERNEL, 31 | normalization=dcrf.NORMALIZE_SYMMETRIC) 32 | 33 | d.addPairwiseBilateral(sxy=(80, 80), srgb=(13, 13, 13), rgbim=original_image, 34 | compat=10, 35 | kernel=dcrf.DIAG_KERNEL, 36 | normalization=dcrf.NORMALIZE_SYMMETRIC) 37 | 38 | Q = d.inference(NB_ITERATIONS) 39 | 40 | MAP = np.argmax(Q, axis=0).reshape(original_image.shape[0], original_image.shape[1]) 41 | 42 | result = np.zeros((MAP.shape[0], MAP.shape[1], 3)) 43 | result[:, :, 2] = MAP 44 | 45 | result[:, :, 2][result[:, :, 2] == 2] = 4 46 | result[:, :, 2][result[:, :, 2] == 1] = 2 47 | result[:, :, 2][result[:, :, 2] == 0] = 1 48 | 49 | return result 50 | -------------------------------------------------------------------------------- /FCN/dataset-convertion/crop.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This file is designed to convert the dataset printed-hw-seg (link in the thesis). The version which can be downloaded 3 | is designed to work out of the box with with DeepDiva but needs to be converted to be able to train the 4 | models in this repository. The models assume inputs of size 256x256. The dataset contains high-res images. 5 | 6 | As such this script can be used to extract random crops from the images. This produces a dataset which can then 7 | be used with the models provided in this repo. 8 | ''' 9 | 10 | import warnings 11 | 12 | import numpy.random as random 13 | import skimage.io as io 14 | from skimage.color import gray2rgb 15 | 16 | warnings.filterwarnings("ignore") 17 | 18 | DATA_ROOT = '../printed-hw-seg/' 19 | 20 | in_folder = DATA_ROOT + 'train/data/folder/*.png' 21 | mask_folder = DATA_ROOT + 'train/gt/folder/*.png' 22 | 23 | in_folder_out = DATA_ROOT + 'train/data/' 24 | mask_folder_out = DATA_ROOT + 'train/gt/' 25 | 26 | HEIGHT = 256 27 | WIDTH = 256 28 | CROP_PER_IM = 200 29 | 30 | indb = io.imread_collection(in_folder) 31 | maskdb = io.imread_collection(mask_folder) 32 | 33 | print(len(indb)) 34 | 35 | 36 | def crop(img, mask): 37 | assert img.shape == mask.shape, print(str(img.shape) + str(mask.shape)) 38 | x = random.randint(0, img.shape[1] - WIDTH) 39 | y = random.randint(0, img.shape[0] - HEIGHT) 40 | 41 | img_in = img[y:y + HEIGHT, x:x + WIDTH] 42 | img_out = mask[y:y + HEIGHT, x:x + WIDTH] 43 | 44 | assert img_in.shape == img_out.shape 45 | 46 | assert img.shape == mask.shape 47 | 48 | return (img_in, img_out) 49 | 50 | 51 | for k, (im, mask) in enumerate(zip(indb, maskdb)): 52 | for i in range(CROP_PER_IM): 53 | if len(im.shape) < 3: 54 | im = gray2rgb(im) 55 | crop_in, crop_out = crop(im, mask) 56 | io.imsave(mask_folder_out + str(i) + str(k) + '.png', crop_out) 57 | io.imsave(in_folder_out + str(i) + str(k) + '.png', crop_in) 58 | -------------------------------------------------------------------------------- /diva-service/install.json: -------------------------------------------------------------------------------- 1 | { 2 | "general": { 3 | "name": "Printed Handwritten Segmentation", 4 | "description": "pixelwise printed and handwritten segmentation using a lightweight FCN and CRF post-processing", 5 | "developer": "Nicolas Dutly", 6 | "affiliation": "University Of Fribourg", 7 | "email": "nicolas.dutly@unifr.ch", 8 | "author": "Nicolas Dutly", 9 | "DOI": "", 10 | "type": "segmentation", 11 | "license": "MIT", 12 | "ownsCopyright": "1" 13 | }, 14 | "input": [ 15 | { 16 | "file": { 17 | "name": "inputImage", 18 | "description": "The input image to segment", 19 | "options": { 20 | "required": true, 21 | "mimeTypes": { 22 | "allowed": [ 23 | "image/jpeg", 24 | "image/png" 25 | ], 26 | "default": "image/png" 27 | } 28 | } 29 | } 30 | }, 31 | { 32 | "outputFolder": {} 33 | } 34 | ], 35 | "output": [ 36 | { 37 | "file": { 38 | "name": "outputSegmented", 39 | "type": "image", 40 | "description": "Generated Segmented Image", 41 | "options": { 42 | "mimeTypes": { 43 | "allowed": [ 44 | "image/png" 45 | ], 46 | "default": "image/png" 47 | }, 48 | "colorspace": "binary", 49 | "visualization": true 50 | } 51 | } 52 | } 53 | ], 54 | "method": { 55 | "inputFolder": "input/", 56 | "imageType": "docker", 57 | "imageName": "jumpst3r/printed-hw-segmentation", 58 | "testData": "https://drive.google.com/uc?export=download&id=1Q9F5rUN93xTWnRxRB8g4eGXLYZzdDUnR", 59 | "executableType": "bash", 60 | "executable_path": "/input/script.sh" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.sav 3 | *.zip 4 | *.7z 5 | *._.DS_Store 6 | *.xml 7 | *.tif 8 | *.tiff 9 | *.gif 10 | *.rar 11 | *.tgz 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | pip-wheel-metadata/ 35 | share/python-wheels/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | MANIFEST 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .nox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *.cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | .python-version 95 | 96 | # celery beat schedule file 97 | celerybeat-schedule 98 | 99 | # SageMath parsed files 100 | *.sage.py 101 | 102 | # Images 103 | .png 104 | .jpg 105 | 106 | # Dir's 107 | .idea/ 108 | .vscode/ 109 | 110 | # Environments 111 | .env 112 | .venv 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ 136 | *.bin 137 | 138 | -------------------------------------------------------------------------------- /FCN/img_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains various helper function for image processing 3 | """ 4 | import cv2 5 | import numpy as np 6 | from skimage import img_as_float, img_as_ubyte 7 | from skimage.filters import threshold_sauvola 8 | from skimage.color import rgb2gray 9 | 10 | def getbinim(image): 11 | if len(image.shape) >= 3: 12 | image = rgb2gray(image) 13 | thresh_sauvola = threshold_sauvola(image) 14 | return img_as_float(image < thresh_sauvola) 15 | 16 | # adapted from https://www.pyimagesearch.com/2015/09/28/implementing-the-max-rgb-filter-in-opencv/ 17 | def max_rgb_filter(image): 18 | image = image[:, :, ::-1] 19 | image = img_as_ubyte(image) 20 | # split the image into its BGR components 21 | (B, G, R) = cv2.split(image) 22 | # find the maximum pixel intensity values for each 23 | # (x, y)-coordinate,, then set all pixel values less 24 | # than M to zero 25 | M = np.maximum(np.maximum(R, G), B) 26 | R[R < M] = 0 27 | G[G < M] = 0 28 | B[B < M] = 0 29 | 30 | # merge the channels back together and return the image 31 | image = cv2.merge([B, G, R]) 32 | image = img_as_float(image) 33 | image = image[:, :, ::-1] 34 | return np.ceil(image) 35 | 36 | 37 | # Util functions to manipulate masks 38 | 39 | 40 | def rgb2mask(mask): 41 | result = np.zeros((mask.shape)) 42 | result[:, :][np.where((mask[:, :] == [255, 0, 0]).all(axis=2))] = [0, 0, 1] 43 | result[:, :][np.where((mask[:, :] == [0, 255, 0]).all(axis=2))] = [0, 0, 2] 44 | result[:, :][np.where((mask[:, :] == [0, 0, 255]).all(axis=2))] = [0, 0, 4] 45 | result[:, :][np.where((mask[:, :] == [1, 0, 0]).all(axis=2))] = [0, 0, 1] 46 | result[:, :][np.where((mask[:, :] == [0, 1, 0]).all(axis=2))] = [0, 0, 2] 47 | result[:, :][np.where((mask[:, :] == [0, 0, 1]).all(axis=2))] = [0, 0, 4] 48 | return result 49 | 50 | def mask2rgb(mask): 51 | result = np.zeros((mask.shape)) 52 | result[:, :][np.where((mask[:, :] == [0, 0, 1]).all(axis=2))] = [1, 0, 0] 53 | result[:, :][np.where((mask[:, :] == [0, 0, 2]).all(axis=2))] = [0, 1, 0] 54 | result[:, :][np.where((mask[:, :] == [0, 0, 4]).all(axis=2))] = [0, 0, 1] 55 | return result 56 | 57 | def getclass(n, mask): 58 | result = np.zeros((mask.shape)) 59 | if n == 1: result[:, :][np.where((mask[:, :] == [0, 0, 1]).all(axis=2))] = [0, 0, 1] 60 | if n == 2: result[:, :][np.where((mask[:, :] == [0, 0, 2]).all(axis=2))] = [0, 0, 2] 61 | result[:, :][np.where((result[:, :] == [0, 0, 0]).all(axis=2))] = [0, 0, 4] 62 | return result 63 | 64 | def getBinclassImg(n, mask): 65 | result = np.zeros((mask.shape)) 66 | if n == 1: result[:, :][np.where((mask[:, :] == [0, 0, 1]).all(axis=2))] = [1, 1, 1] 67 | if n == 2: result[:, :][np.where((mask[:, :] == [0, 0, 2]).all(axis=2))] = [1, 1, 1] 68 | if n == 3: result[:, :][np.where((mask[:, :] == [0, 0, 4]).all(axis=2))] = [1, 1, 1] 69 | return result[:,:,0] 70 | 71 | 72 | def get_IoU(prediction, target): 73 | intersection = np.logical_and(target, prediction) 74 | union = np.logical_or(target, prediction) 75 | return float(np.sum(intersection)) / float(np.sum(union)) 76 | -------------------------------------------------------------------------------- /FCN/fcn_helper_function.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This file includes a number of helper functions for the keras models. 3 | ''' 4 | 5 | 6 | import keras.backend as K 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | """ 11 | A weighted version of categorical_crossentropy for keras (2.0.6). This lets you apply a weight to unbalanced classes. 12 | @url: https://gist.github.com/wassname/ce364fddfc8a025bfab4348cf5de852d 13 | @author: wassname 14 | """ 15 | def weighted_categorical_crossentropy(weights): 16 | if isinstance(weights, list) or isinstance(np.ndarray): 17 | weights = K.variable(weights) 18 | 19 | def loss(target, output, from_logits=False): 20 | if not from_logits: 21 | output /= tf.reduce_sum(output, 22 | len(output.get_shape()) - 1, 23 | True) 24 | _epsilon = tf.convert_to_tensor( 25 | K.epsilon(), dtype=output.dtype.base_dtype) 26 | output = tf.clip_by_value(output, _epsilon, 1. - _epsilon) 27 | weighted_losses = target * tf.log(output) * weights 28 | return - tf.reduce_sum(weighted_losses, len(output.get_shape()) - 1) 29 | else: 30 | raise ValueError( 31 | 'WeightedCategoricalCrossentropy: not valid with logits') 32 | return loss 33 | ######################################## IoU metric ############################################ 34 | # https://www.kaggle.com/c/tgs-salt-identification-challenge/discussion/63044 35 | def castF(x): 36 | return K.cast(x, K.floatx()) 37 | 38 | 39 | def castB(x): 40 | return K.cast(x, bool) 41 | 42 | 43 | def iou_loss_core(true, pred): # this can be used as a loss if you make it negative 44 | intersection = true * pred 45 | notTrue = 1 - true 46 | union = true + (notTrue * pred) 47 | 48 | return (K.sum(intersection, axis=-1) + K.epsilon()) / (K.sum(union, axis=-1) + K.epsilon()) 49 | 50 | 51 | def IoU(true, pred): # any shape can go - can't be a loss function 52 | 53 | tresholds = [0.5 + (i*.05) for i in range(10)] 54 | 55 | # flattened images (batch, pixels) 56 | true = K.batch_flatten(true) 57 | pred = K.batch_flatten(pred) 58 | pred = castF(K.greater(pred, 0.5)) 59 | 60 | # total white pixels - (batch,) 61 | trueSum = K.sum(true, axis=-1) 62 | predSum = K.sum(pred, axis=-1) 63 | 64 | # has mask or not per image - (batch,) 65 | true1 = castF(K.greater(trueSum, 1)) 66 | pred1 = castF(K.greater(predSum, 1)) 67 | 68 | # to get images that have mask in both true and pred 69 | truePositiveMask = castB(true1 * pred1) 70 | 71 | # separating only the possible true positives to check iou 72 | testTrue = tf.boolean_mask(true, truePositiveMask) 73 | testPred = tf.boolean_mask(pred, truePositiveMask) 74 | 75 | # getting iou and threshold comparisons 76 | iou = iou_loss_core(testTrue, testPred) 77 | truePositives = [castF(K.greater(iou, tres)) for tres in tresholds] 78 | 79 | # mean of thressholds for true positives and total sum 80 | truePositives = K.mean(K.stack(truePositives, axis=-1), axis=-1) 81 | truePositives = K.sum(truePositives) 82 | 83 | # to get images that don't have mask in both true and pred 84 | trueNegatives = (1-true1) * (1 - pred1) # = 1 -true1 - pred1 + true1*pred1 85 | trueNegatives = K.sum(trueNegatives) 86 | 87 | return (truePositives + trueNegatives) / castF(K.shape(true)[0]) 88 | 89 | ######################################################################################################################## 90 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: BscThesis 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _tflow_select=2.1.0=gpu 6 | - absl-py=0.7.0=py36_0 7 | - astor=0.7.1=py36_0 8 | - blas=1.0=mkl 9 | - bzip2=1.0.6=h14c3975_5 10 | - c-ares=1.15.0=h7b6447c_1 11 | - ca-certificates=2019.1.23=0 12 | - cairo=1.14.12=h8948797_3 13 | - certifi=2019.3.9=py36_0 14 | - cloudpickle=0.8.0=py36_0 15 | - cudatoolkit=9.0=h13b8566_0 16 | - cudnn=7.3.1=cuda9.0_0 17 | - cupti=9.0.176=0 18 | - cycler=0.10.0=py36_0 19 | - cytoolz=0.9.0.1=py36h14c3975_1 20 | - dask-core=1.1.4=py36_1 21 | - dbus=1.13.6=h746ee38_0 22 | - decorator=4.4.0=py36_1 23 | - expat=2.2.6=he6710b0_0 24 | - ffmpeg=4.0=hcdf2ecd_0 25 | - fontconfig=2.13.0=h9420a91_0 26 | - freeglut=3.0.0=hf484d3e_5 27 | - freetype=2.9.1=h8a8886c_1 28 | - gast=0.2.2=py36_0 29 | - glib=2.56.2=hd408876_0 30 | - graphite2=1.3.13=h23475e2_0 31 | - grpcio=1.16.1=py36hf8bcb03_1 32 | - gst-plugins-base=1.14.0=hbbd80ab_1 33 | - gstreamer=1.14.0=hb453b48_1 34 | - h5py=2.8.0=py36h989c5e5_3 35 | - harfbuzz=1.8.8=hffaf4a1_0 36 | - hdf5=1.10.2=hba1933b_1 37 | - icu=58.2=h9c2bf20_1 38 | - imageio=2.5.0=py36_0 39 | - intel-openmp=2019.3=199 40 | - jasper=2.0.14=h07fcdf6_1 41 | - jpeg=9b=h024ee3a_2 42 | - keras=2.2.4=0 43 | - keras-applications=1.0.7=py_0 44 | - keras-base=2.2.4=py36_0 45 | - keras-preprocessing=1.0.9=py_0 46 | - kiwisolver=1.0.1=py36hf484d3e_0 47 | - libedit=3.1.20181209=hc058e9b_0 48 | - libffi=3.2.1=hd88cf55_4 49 | - libgcc-ng=8.2.0=hdf63c60_1 50 | - libgfortran-ng=7.3.0=hdf63c60_0 51 | - libglu=9.0.0=hf484d3e_1 52 | - libopencv=3.4.2=hb342d67_1 53 | - libopus=1.3=h7b6447c_0 54 | - libpng=1.6.36=hbc83047_0 55 | - libprotobuf=3.6.1=hd408876_0 56 | - libstdcxx-ng=8.2.0=hdf63c60_1 57 | - libtiff=4.0.10=h2733197_2 58 | - libuuid=1.0.3=h1bed415_2 59 | - libvpx=1.7.0=h439df22_0 60 | - libxcb=1.13=h1bed415_1 61 | - libxml2=2.9.9=he19cac6_0 62 | - markdown=3.0.1=py36_0 63 | - matplotlib=3.0.3=py36h5429711_0 64 | - mkl=2019.3=199 65 | - mkl_fft=1.0.10=py36ha843d7b_0 66 | - mkl_random=1.0.2=py36hd81dba3_0 67 | - ncurses=6.1=he6710b0_1 68 | - networkx=2.2=py36_1 69 | - numpy=1.16.2=py36h7e9f1db_0 70 | - numpy-base=1.16.2=py36hde5b4d6_0 71 | - olefile=0.46=py36_0 72 | - opencv=3.4.2=py36h6fd60c2_1 73 | - openssl=1.1.1b=h7b6447c_1 74 | - pcre=8.43=he6710b0_0 75 | - pillow=5.4.1=py36h34e0f95_0 76 | - pip=19.0.3=py36_0 77 | - pixman=0.38.0=h7b6447c_0 78 | - protobuf=3.6.1=py36he6710b0_0 79 | - py-opencv=3.4.2=py36hb342d67_1 80 | - pyparsing=2.3.1=py36_0 81 | - pyqt=5.9.2=py36h05f1152_2 82 | - pytest=4.3.1=py36_0 83 | - python=3.6.8=h0371630_0 84 | - python-dateutil=2.8.0=py36_0 85 | - pytz=2018.9=py36_0 86 | - pywavelets=1.0.2=py36hdd07704_0 87 | - pyyaml=5.1=py36h7b6447c_0 88 | - qt=5.9.7=h5867ecd_1 89 | - readline=7.0=h7b6447c_5 90 | - scikit-image=0.14.2=py36he6710b0_0 91 | - scipy=1.2.1=py36h7c811a0_0 92 | - setuptools=40.8.0=py36_0 93 | - sip=4.19.8=py36hf484d3e_0 94 | - six=1.12.0=py36_0 95 | - sqlite=3.27.2=h7b6447c_0 96 | - tensorboard=1.12.2=py36he6710b0_0 97 | - tensorflow=1.12.0=gpu_py36he68c306_0 98 | - tensorflow-base=1.12.0=gpu_py36h8e0ae2d_0 99 | - tensorflow-gpu=1.12.0=h0d30ee6_0 100 | - termcolor=1.1.0=py36_1 101 | - tk=8.6.8=hbc83047_0 102 | - toolz=0.9.0=py36_0 103 | - tornado=6.0.2=py36h7b6447c_0 104 | - tqdm=4.31.1=py36_1 105 | - werkzeug=0.14.1=py36_0 106 | - wheel=0.33.1=py36_0 107 | - xz=5.2.4=h14c3975_4 108 | - yaml=0.1.7=had09818_2 109 | - zlib=1.2.11=h7b6447c_3 110 | - zstd=1.3.7=h0b5b093_0 111 | - pip: 112 | - cython==0.29.6 113 | - dask==1.1.4 114 | - pydensecrf==1.0rc2 115 | prefix: /home/nicolas/anaconda3/envs/BscThesis 116 | 117 | -------------------------------------------------------------------------------- /frontend/style.css: -------------------------------------------------------------------------------- 1 | @import url("https://fonts.googleapis.com/css?family=Lato"); 2 | 3 | * { 4 | margin: 0; 5 | padding: 0; 6 | font-family: Lato, Arial; 7 | -moz-box-sizing: border-box; 8 | -webkit-box-sizing: border-box; 9 | box-sizing: border-box; 10 | } 11 | 12 | body { 13 | color: #fff; 14 | padding: 55px 25px; 15 | background-color: #e74c3c; 16 | } 17 | 18 | h1 { 19 | font-weight: normal; 20 | font-size: 40px; 21 | font-weight: normal; 22 | text-transform: uppercase; 23 | } 24 | 25 | h1 span { 26 | font-size: 13px; 27 | display: block; 28 | padding-left: 4px; 29 | } 30 | 31 | p { 32 | margin-top: 200px; 33 | } 34 | 35 | p a { 36 | text-transform: uppercase; 37 | text-decoration: none; 38 | display: inline-block; 39 | color: #fff; 40 | padding: 5px 10px; 41 | margin: 0 5px; 42 | background-color: #b83729; 43 | -moz-transition: all 0.2s ease-in; 44 | -o-transition: all 0.2s ease-in; 45 | -webkit-transition: all 0.2s ease-in; 46 | transition: all 0.2s ease-in; 47 | } 48 | 49 | p a:hover { 50 | background-color: #ab3326; 51 | } 52 | 53 | .custom-file-upload-hidden { 54 | display: none; 55 | visibility: hidden; 56 | position: absolute; 57 | left: -9999px; 58 | } 59 | 60 | .custom-file-upload { 61 | display: block; 62 | width: auto; 63 | font-size: 16px; 64 | margin-top: 30px; 65 | } 66 | 67 | .custom-file-upload label { 68 | display: block; 69 | margin-bottom: 5px; 70 | } 71 | 72 | .file-upload-wrapper { 73 | position: relative; 74 | margin-bottom: 5px; 75 | } 76 | 77 | .file-upload-input { 78 | width: 300px; 79 | color: #fff; 80 | font-size: 16px; 81 | padding: 11px 17px; 82 | border: none; 83 | background-color: #c0392b; 84 | -moz-transition: all 0.2s ease-in; 85 | -o-transition: all 0.2s ease-in; 86 | -webkit-transition: all 0.2s ease-in; 87 | transition: all 0.2s ease-in; 88 | float: left; 89 | /* IE 9 Fix */ 90 | } 91 | 92 | button { 93 | display: inline-block; 94 | border: none; 95 | padding: 1rem 2rem; 96 | margin: 0; 97 | text-decoration: none; 98 | background: #0069ed; 99 | color: #ffffff; 100 | font-family: sans-serif; 101 | font-size: 1rem; 102 | cursor: pointer; 103 | text-align: center; 104 | transition: background 250ms ease-in-out, 105 | transform 150ms ease; 106 | -webkit-appearance: none; 107 | -moz-appearance: none; 108 | } 109 | 110 | button:hover, 111 | button:focus { 112 | background: #0053ba; 113 | } 114 | 115 | button:focus { 116 | outline: 1px solid #fff; 117 | outline-offset: -4px; 118 | } 119 | 120 | button:active { 121 | transform: scale(0.99); 122 | } 123 | 124 | .file-upload-input:hover, .file-upload-input:focus { 125 | background-color: #ab3326; 126 | outline: none; 127 | } 128 | 129 | input { 130 | cursor: pointer; 131 | display: inline-block; 132 | color: #fff; 133 | font-size: 16px; 134 | text-transform: uppercase; 135 | padding: 11px 20px; 136 | border: none; 137 | margin-left: -1px; 138 | margin-right: 20px; 139 | background-color: #962d22; 140 | float: left; 141 | /* IE 9 Fix */ 142 | -moz-transition: all 0.2s ease-in; 143 | -o-transition: all 0.2s ease-in; 144 | -webkit-transition: all 0.2s ease-in; 145 | transition: all 0.2s ease-in; 146 | } 147 | 148 | input:hover { 149 | background-color: #6d2018; 150 | } 151 | 152 | 153 | input { 154 | float: left; 155 | font-size: 16px; 156 | text-transform: uppercase; 157 | padding: 11px 20px; 158 | border: none; 159 | margin-left: -1px; 160 | margin-right: 20px; 161 | background-color: #4CAF50; 162 | border: none; 163 | color: white; 164 | padding: 16px 32px; 165 | text-decoration: none; 166 | margin: 4px 2px; 167 | cursor: pointer; 168 | } 169 | 170 | input { 171 | height: 55px; 172 | } -------------------------------------------------------------------------------- /FCN/fcnn-light-1.py: -------------------------------------------------------------------------------- 1 | import skimage.io as io 2 | from keras.layers import * 3 | from keras.models import * 4 | from keras.utils import to_categorical 5 | from skimage import img_as_float 6 | from skimage.color import gray2rgb 7 | from sklearn.model_selection import train_test_split 8 | import matplotlib.pyplot as plt 9 | from fcn_helper_function import * 10 | from img_utils import getbinim 11 | import pickle 12 | 13 | """ 14 | This file defines the model fcn-light-1. It is a simple fully convolutional model based on the FCN-8 architecture 15 | """ 16 | 17 | np.random.seed(123) 18 | 19 | # If you need to create feature vectors, you need to 20 | # 1) Download the dataset printed-hw-seg 21 | # 2) Run the crop.py utility 22 | # 3) Uncomment the code bellow 23 | 24 | # Preferably you can also download the prepared feature vectors by contacting me 25 | 26 | ''' 27 | X_train = [] 28 | X_valid = [] 29 | y_train = [] 30 | y_valid = [] 31 | 32 | print("Reading images...") 33 | 34 | inputs_train = io.imread_collection("printed-hw-seg/train/data/*.png") 35 | inputs_valid = io.imread_collection("printed-hw-seg/val/data/*.png") 36 | 37 | masks_train = io.imread_collection("printed-hw-seg/train/gt/*.png") 38 | masks_valid = io.imread_collection("printed-hw-seg/val/gt/*.png") 39 | 40 | 41 | def mask2rgb(mask): 42 | result = np.zeros((mask.shape)) 43 | result[:, :][np.where((mask[:, :] == [0, 0, 1]).all(axis=2))] = [1, 0, 0] 44 | result[:, :][np.where((mask[:, :] == [0, 0, 2]).all(axis=2))] = [0, 1, 0] 45 | result[:, :][np.where((mask[:, :] == [0, 0, 4]).all(axis=2))] = [0, 0, 1] 46 | return result 47 | 48 | for im_in,im_mask in zip(inputs_train, masks_train): 49 | X_train.append(img_as_float(gray2rgb(getbinim(im_in)))) 50 | y_train.append(mask2rgb(im_mask)) 51 | 52 | 53 | for im_in,im_mask in zip(inputs_valid, masks_valid): 54 | X_valid.append(img_as_float(gray2rgb(getbinim(im_in)))) 55 | y_valid.append(mask2rgb(im_mask)) 56 | 57 | print('dumping x_valid') 58 | pickle.dump(X_valid, open("models/x_valid.sav", "wb")) 59 | print('done x_valid') 60 | del X_valid 61 | print("dumping y_valid") 62 | pickle.dump(y_valid, open("models/y_valid.sav", "wb")) 63 | print("done") 64 | del y_valid 65 | print('dumping x_train') 66 | pickle.dump(X_train, open("models/x_train.sav", "wb")) 67 | print('done') 68 | del X_train 69 | print('dumping y_train') 70 | pickle.dump(y_train, open("models/y_train.sav", "wb")) 71 | exit() 72 | ''' 73 | 74 | X_valid = pickle.load(open("models/x_valid.sav", "rb")) 75 | y_valid = pickle.load(open("models/y_valid.sav", "rb")) 76 | X_train = pickle.load(open("models/x_train.sav", "rb")) 77 | y_train = pickle.load(open("models/y_train.sav", "rb")) 78 | 79 | print('done reading') 80 | X_valid = np.array(X_valid) 81 | X_valid = (X_valid-X_valid.mean()) / X_valid.std() 82 | print('done valid std norm') 83 | X_train = np.array(X_train) 84 | X_train = (X_train-X_train.mean()) / X_train.std() 85 | 86 | 87 | y_train = np.array(y_train) 88 | y_valid = np.array(y_valid) 89 | 90 | print("Done!") 91 | 92 | print('Number of training samples:' + str(len(X_train))) 93 | print('Number of validation samples:' + str(len(y_valid))) 94 | 95 | 96 | def FCN(nClasses, input_height=256, input_width=256): 97 | IMAGE_ORDERING = "channels_last" 98 | 99 | img_input = Input(shape=(input_height, input_width, 3)) 100 | 101 | x = Conv2D(32, (3, 3), activation='relu', padding='same', 102 | data_format=IMAGE_ORDERING)(img_input) 103 | 104 | x = MaxPooling2D((2, 2), strides=(2, 2), data_format=IMAGE_ORDERING)(x) 105 | 106 | x = Conv2D(64, (3, 3), activation='relu', 107 | padding='same', data_format=IMAGE_ORDERING)(x) 108 | x = Conv2D(64, (3, 3), activation='relu', 109 | padding='same', data_format=IMAGE_ORDERING)(x) 110 | 111 | x = Dropout(0.05)(x) 112 | 113 | 114 | x = MaxPooling2D((2, 2), strides=(2, 2), data_format=IMAGE_ORDERING)(x) 115 | 116 | x = Conv2D(128, (3, 3), activation='relu', 117 | padding='same', data_format=IMAGE_ORDERING)(x) 118 | x = Conv2D(128, (3, 3), activation='relu', 119 | padding='same', data_format=IMAGE_ORDERING)(x) 120 | 121 | x = Dropout(0.1)(x) 122 | 123 | 124 | x = (Conv2D(54, (1, 1), activation='relu', 125 | padding='same', data_format=IMAGE_ORDERING))(x) 126 | 127 | x = Dropout(0.2)(x) 128 | 129 | 130 | x = Conv2DTranspose(nClasses, kernel_size=(4, 4), strides=( 131 | 4, 4), use_bias=False, data_format=IMAGE_ORDERING)(x) 132 | 133 | o = (Activation('softmax'))(x) 134 | 135 | model = Model(img_input, o) 136 | 137 | return model 138 | 139 | 140 | model = FCN(nClasses=3, 141 | input_height=256, 142 | input_width=256) 143 | print(model.summary()) 144 | 145 | model.compile(loss=[weighted_categorical_crossentropy([0.4,0.5,0.1])], 146 | optimizer='adam', 147 | metrics=[IoU]) 148 | 149 | model.fit(x=X_train, y=y_train, epochs=15, batch_size=30, validation_data=(X_valid,y_valid)) 150 | 151 | 152 | model.save('models/fcnn_bin_simple.h5') 153 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # printed-hw-segmentation 2 | ### _Lightweight printed and handwritten text identification using FCN and CRF post-processing._ 3 | 4 | [![Codefresh build status]( https://g.codefresh.io/api/badges/pipeline/jumpst3r/Jumpst3r%2FBscThesis%2FBuildTestPush?branch=production&key=eyJhbGciOiJIUzI1NiJ9.NWNhYTQwZDAyYTE1MmZmMGQ2Y2FjOGM1.t3CzjCcStPDcqAcTi1nh8zpYB_E3tQmnemqSgDTbyQM&type=cf-1)]( https://g.codefresh.io/pipelines/BuildTestPush/builds?repoOwner=Jumpst3r&repoName=printed-hw-segmentation&serviceName=Jumpst3r%2Fprinted-hw-segmentation&filter=trigger:build~Build;branch:production;pipeline:5caa428088545f2b9e9e45e9~BuildTestPush) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) ![GitHub release](https://img.shields.io/github/release/jumpst3r/printed-hw-segmentation.svg) 5 | 6 | ### _published as:_ PHTI-WS: A Printed and Handwritten Text Identification Web Service Based on FCN and CRF Post-Processing* in _ICDAR-WS 2019_ 7 | 8 | ## Introduction 9 | 10 | **printed-hw-segmentation** is a tool that allows segmentation (identification) of printed and handwritten text using a fully convolutional network with CRF post-processing. 11 | 12 | If you make use of this work (the corresponding paper can be found [here](https://ieeexplore.ieee.org/document/8892961)), please cite it as: 13 | ``` 14 | @article{Dutly2019PHTIWSAP, 15 | title={PHTI-WS: A Printed and Handwritten Text Identification Web Service Based on FCN and CRF Post-Processing}, 16 | author={Nicolas Dutly and Fouad Slimane and Rolf Ingold}, 17 | journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)}, 18 | year={2019}, 19 | volume={2}, 20 | pages={20-25} 21 | } 22 | ``` 23 | 24 | 25 | ![FCN resluts before and after CRF postprocessing](frontend/goodfcn.png) 26 | _left: input image, middle: raw FCN output, right: output after CRF postprocessing_ 27 | 28 | With each release a docker image of the code is published on [docker hub](https://cloud.docker.com/repository/docker/jumpst3r/printed-hw-segmentation). This image can be used in conjuction with [DIVA services](https://github.com/lunactic/DIVAServices) to provide segmenation as a web service. It can also be used locally. 29 | 30 | ## Web service live demo 31 | 32 | The web service version of this project can be tested at the following [link](http://wuersch.pillo-srv.ch/#/algorithm/5ff5b809fad531d7dc0f29c4) 33 | 34 | ## Local Usage 35 | 36 | After pulling (`docker pull jumpst3r/printed-hw-segmentation:latest`) the image, the model can be applied to an image with 37 | 38 | `docker run -it --rm -v /FULL_PATH_TO/example.png:/input/example.png -v /FULL_PATH_TO_OUTPUT_FOLDER/:/output/ jumpst3r/printed-hw-segmentation sh /input/script.sh /input/example.png /output/` 39 | 40 | The resulting image will be saved in the output path provided by the user 41 | 42 | ## Usage using the DIVA services framework 43 | 44 | Follow the guide [here](https://lunactic.github.io/DIVAServicesweb/articles/installation/) to install the DIVA services framework on a server. 45 | 46 | Once install, our method can be installed by make a POST request to the `alogithms` endpoint. This request should contain the JSON file `install.json` located in the `diva-service` folder of this repository. This will return a link pointing to the newly created method. The JSON file `upload.json` can then be run to upload an image to the server by making a request to that link. This will return an ID for the uploaded image. Finally the method can be executed using the provided ID and the JSON file `run.json` 47 | 48 | ### Local Testing and development 49 | 50 | If you wish to modify or try to execute individual files, create a conda new conda environment with the file `environment.yml`. After activating the newly created environment, you should be able to run and modify existing files without dependency problems. 51 | 52 | A few important files are listed bellow. 53 | 54 | The ```FCN/``` folder contains all files which are relevant to the training and testing of the two FCN models _fcn-light1_ and _fcn-light-2_. 55 | 56 | Specifically, the models are defined in the files ``fcn-light-1.py`` and ``fcn-light-2.py``. 57 | 58 | If you want to test the performance and get IoU result metrics, you will need access to the ``test/`` folder of the ``printed-hw-seg`` dataset (upon request). 59 | 60 | To classify single images you can use the script ``classifier_fcnn.py``, which offers a bunch of options. The release binary is a standalone version of this file. 61 | 62 | The ``fontend/`` folder contains a discontinued frontend for interations with the DIVA service framework. 63 | 64 | The ``docker/`` folder contains docker-specific files which are used to generate the docker image of the tool. 65 | 66 | The ``diva-service/`` folder contains example JSON file which can be used to deploy the method on a server 67 | running DIVA services. 68 | 69 | _disclaimer:_ The quality of the predictions depends on many factors such as input resolutions, format, colors etc. We do not claim that the models present in this repository perform universally well on all documents. The models were train, validated and tested on a private dataset. 70 | 71 | _For word level segmentation, have a look at the ``practical-word-level`` branch._ 72 | -------------------------------------------------------------------------------- /FCN/fcnn-light-2.py: -------------------------------------------------------------------------------- 1 | import skimage.io as io 2 | from keras.layers import * 3 | from keras.models import * 4 | from keras.utils import to_categorical 5 | from skimage import img_as_float 6 | from skimage.color import gray2rgb 7 | from sklearn.model_selection import train_test_split 8 | import matplotlib.pyplot as plt 9 | from fcn_helper_function import * 10 | from img_utils import getbinim 11 | import pickle 12 | 13 | """ 14 | This file defines the model fcn-light-2. It is a fully convolutional model based on the FCN-8 architecture 15 | """ 16 | 17 | np.random.seed(123) 18 | ''' 19 | X_train = [] 20 | X_valid = [] 21 | y_train = [] 22 | y_valid = [] 23 | 24 | print("Reading images...") 25 | 26 | inputs_train = io.imread_collection("printed-hw-seg/train/data/*.png") 27 | inputs_valid = io.imread_collection("printed-hw-seg/val/data/*.png") 28 | 29 | masks_train = io.imread_collection("printed-hw-seg/train/gt/*.png") 30 | masks_valid = io.imread_collection("printed-hw-seg/val/gt/*.png") 31 | 32 | 33 | def mask2rgb(mask): 34 | result = np.zeros((mask.shape)) 35 | result[:, :][np.where((mask[:, :] == [0, 0, 1]).all(axis=2))] = [1, 0, 0] 36 | result[:, :][np.where((mask[:, :] == [0, 0, 2]).all(axis=2))] = [0, 1, 0] 37 | result[:, :][np.where((mask[:, :] == [0, 0, 4]).all(axis=2))] = [0, 0, 1] 38 | return result 39 | 40 | for im_in,im_mask in zip(inputs_train, masks_train): 41 | X_train.append(img_as_float(gray2rgb(getbinim(im_in)))) 42 | y_train.append(mask2rgb(im_mask)) 43 | 44 | 45 | for im_in,im_mask in zip(inputs_valid, masks_valid): 46 | X_valid.append(img_as_float(gray2rgb(getbinim(im_in)))) 47 | y_valid.append(mask2rgb(im_mask)) 48 | 49 | print('dumping x_valid') 50 | pickle.dump(X_valid, open("models/x_valid.sav", "wb")) 51 | print('done x_valid') 52 | del X_valid 53 | print("dumping y_valid") 54 | pickle.dump(y_valid, open("models/y_valid.sav", "wb")) 55 | print("done") 56 | del y_valid 57 | print('dumping x_train') 58 | pickle.dump(X_train, open("models/x_train.sav", "wb")) 59 | print('done') 60 | del X_train 61 | print('dumping y_train') 62 | pickle.dump(y_train, open("models/y_train.sav", "wb")) 63 | exit() 64 | ''' 65 | X_valid = pickle.load(open("models/x_valid.sav", "rb")) 66 | y_valid = pickle.load(open("models/y_valid.sav", "rb")) 67 | X_train = pickle.load(open("models/x_train.sav", "rb")) 68 | y_train = pickle.load(open("models/y_train.sav", "rb")) 69 | 70 | print('done reading') 71 | X_valid = np.array(X_valid) 72 | X_valid = (X_valid-X_valid.mean()) / X_valid.std() 73 | print('done valid std norm') 74 | X_train = np.array(X_train) 75 | X_train = (X_train-X_train.mean()) / X_train.std() 76 | 77 | 78 | y_train = np.array(y_train) 79 | y_valid = np.array(y_valid) 80 | 81 | print("Done!") 82 | 83 | print('Number of training samples:' + str(len(X_train))) 84 | print('Number of validation samples:' + str(len(y_valid))) 85 | 86 | 87 | def FCN(nClasses, input_height=256, input_width=256): 88 | IMAGE_ORDERING = "channels_last" 89 | 90 | img_input = Input(shape=(input_height, input_width, 3)) 91 | 92 | x = Conv2D(32, (3, 3), activation='relu', padding='same', 93 | data_format=IMAGE_ORDERING)(img_input) 94 | skip_1_in = Conv2D(32, (3, 3), activation='relu', 95 | padding='same', data_format=IMAGE_ORDERING)(x) 96 | 97 | 98 | x = MaxPooling2D((2, 2), strides=(2, 2), data_format=IMAGE_ORDERING)(skip_1_in) 99 | 100 | x = Conv2D(64, (3, 3), activation='relu', 101 | padding='same', data_format=IMAGE_ORDERING)(x) 102 | skip_2_in = Conv2D(64, (3, 3), activation='relu', 103 | padding='same', data_format=IMAGE_ORDERING)(x) 104 | 105 | x = Dropout(0.05)(skip_2_in) 106 | 107 | 108 | x = MaxPooling2D((2, 2), strides=(2, 2), data_format=IMAGE_ORDERING)(x) 109 | 110 | x = Conv2D(128, (3, 3), activation='relu', 111 | padding='same', data_format=IMAGE_ORDERING)(x) 112 | x = Conv2D(128, (3, 3), activation='relu', 113 | padding='same', data_format=IMAGE_ORDERING)(x) 114 | 115 | x = Dropout(0.1)(x) 116 | 117 | 118 | x = (Conv2D(54, (1, 1), activation='relu', 119 | padding='same', data_format=IMAGE_ORDERING))(x) 120 | 121 | x = Dropout(0.2)(x) 122 | 123 | skip_reduce_2 = Conv2D(nClasses, (1,1), activation='relu', padding='same')(skip_2_in) 124 | 125 | 126 | x = Conv2DTranspose(nClasses, kernel_size=(2, 2), strides=( 127 | 2, 2), use_bias=False, data_format=IMAGE_ORDERING)(x) 128 | 129 | skip_2_out = Add()([x, skip_reduce_2]) 130 | 131 | skip_reduce_1 = Conv2D(nClasses, (1,1), activation='relu', padding='same')(skip_1_in) 132 | 133 | 134 | x = Conv2DTranspose(nClasses, kernel_size=(2, 2), strides=( 135 | 2, 2), use_bias=False, data_format=IMAGE_ORDERING)(skip_2_out) 136 | 137 | skip_1_out = Add()([x, skip_reduce_1]) 138 | 139 | o = (Activation('softmax'))(skip_1_out) 140 | 141 | model = Model(img_input, o) 142 | 143 | return model 144 | 145 | 146 | model = FCN(nClasses=3, 147 | input_height=256, 148 | input_width=256) 149 | print(model.summary()) 150 | 151 | model.compile(loss=[weighted_categorical_crossentropy([0.4,0.5,0.1])], 152 | optimizer='adam', 153 | metrics=[IoU]) 154 | 155 | 156 | model.fit(x=X_train, y=y_train, epochs=15, batch_size=16, validation_data=(X_valid,y_valid)) 157 | 158 | 159 | model.save('models/fcnn_bin.h5') 160 | -------------------------------------------------------------------------------- /frontend/script-js.js: -------------------------------------------------------------------------------- 1 | HTML 2 | CSS 3 | JS 4 | Result 5 | EDIT 6 | ON 7 | //Reference: 8 | //https://www.onextrapixel.com/2012/12/10/how-to-create-a-custom-file-input-with-jquery-css3-and-php/ 9 | ;(function ($) { 10 | 11 | // Browser supports HTML5 multiple file? 12 | var multipleSupport = typeof $('')[0].multiple !== 'undefined', 13 | isIE = /msie/i.test(navigator.userAgent); 14 | 15 | $.fn.customFile = function () { 16 | 17 | return this.each(function () { 18 | 19 | var $file = $(this).addClass('custom-file-upload-hidden'), // the original file input 20 | $wrap = $('
'), 21 | $input = $(''), 22 | // Button that will be used in non-IE browsers 23 | $button = $(''), 24 | // Hack for IE 25 | $label = $(''); 26 | 27 | // Hide by shifting to the left so we 28 | // can still trigger events 29 | $file.css({ 30 | position: 'absolute', 31 | left: '-9999px' 32 | }); 33 | 34 | $wrap.insertAfter($file) 35 | .append($file, $input, (isIE ? $label : $button)); 36 | 37 | // Prevent focus 38 | $file.attr('tabIndex', -1); 39 | $button.attr('tabIndex', -1); 40 | 41 | $button.click(function () { 42 | $file.focus().click(); // Open dialog 43 | }); 44 | 45 | $file.change(function () { 46 | 47 | var files = [], fileArr, filename; 48 | 49 | // If multiple is supported then extract 50 | // all filenames from the file array 51 | if (multipleSupport) { 52 | fileArr = $file[0].files; 53 | for (var i = 0, len = fileArr.length; i < len; i++) { 54 | files.push(fileArr[i].name); 55 | } 56 | filename = files.join(', '); 57 | 58 | // If not supported then just take the value 59 | // and remove the path to just show the filename 60 | } else { 61 | filename = $file.val().split('\\').pop(); 62 | } 63 | 64 | $input.val(filename) // Set the value 65 | .attr('title', filename) // Show filename in title tootlip 66 | .focus(); // Regain focus 67 | 68 | }); 69 | 70 | $input.on({ 71 | blur: function () { 72 | $file.trigger('blur'); 73 | }, 74 | keydown: function (e) { 75 | if (e.which === 13) { // Enter 76 | if (!isIE) { 77 | $file.trigger('click'); 78 | } 79 | } else if (e.which === 8 || e.which === 46) { // Backspace & Del 80 | // On some browsers the value is read-only 81 | // with this trick we remove the old input and add 82 | // a clean clone with all the original events attached 83 | $file.replaceWith($file = $file.clone(true)); 84 | $file.trigger('change'); 85 | $input.val(''); 86 | } else if (e.which === 9) { // TAB 87 | return; 88 | } else { // All other keys 89 | return false; 90 | } 91 | } 92 | }); 93 | 94 | }); 95 | 96 | }; 97 | 98 | // Old browser fallback 99 | if (!multipleSupport) { 100 | $(document).on('change', 'input.customfile', function () { 101 | 102 | var $this = $(this), 103 | // Create a unique ID so we 104 | // can attach the label to the input 105 | uniqId = 'customfile_' + (new Date()).getTime(), 106 | $wrap = $this.parent(), 107 | 108 | // Filter empty input 109 | $inputs = $wrap.siblings().find('.file-upload-input') 110 | .filter(function () { 111 | return !this.value 112 | }), 113 | 114 | $file = $(''); 115 | 116 | // 1ms timeout so it runs after all other events 117 | // that modify the value have triggered 118 | setTimeout(function () { 119 | // Add a new input 120 | if ($this.val()) { 121 | // Check for empty fields to prevent 122 | // creating new inputs when changing files 123 | if (!$inputs.length) { 124 | $wrap.after($file); 125 | $file.customFile(); 126 | } 127 | // Remove and reorganize inputs 128 | } else { 129 | $inputs.parent().remove(); 130 | // Move the input so it's always last on the list 131 | $wrap.appendTo($wrap.parent()); 132 | $wrap.find('input').focus(); 133 | } 134 | }, 1); 135 | 136 | }); 137 | } 138 | 139 | }(jQuery)); 140 | 141 | $('input[type=file]').customFile(); 142 | -------------------------------------------------------------------------------- /frontend/upload.php: -------------------------------------------------------------------------------- 1 | 50000000000000) { 21 | echo "Sorry, your file is too large."; 22 | $uploadOk = 0; 23 | } 24 | // Allow certain file formats 25 | if($imageFileType != "jpg" && $imageFileType != "png" && $imageFileType != "jpeg" 26 | && $imageFileType != "gif" ) { 27 | echo "Sorry, only JPG, JPEG, PNG & GIF files are allowed."; 28 | $uploadOk = 0; 29 | } 30 | // Check if $uploadOk is set to 0 by an error 31 | if ($uploadOk == 0) { 32 | echo "Sorry, your file was not uploaded."; 33 | // if everything is ok, try to upload file 34 | } else { 35 | if (move_uploaded_file($_FILES["fileToUpload"]["tmp_name"], $target_file)) { 36 | echo "The file ". basename( $_FILES["fileToUpload"]["name"]). " has been uploaded.\n\n"; 37 | echo "Uploading on DIVA services..."; 38 | 39 | $cmd = 'python executeOnDivaservices.py ' . $_FILES["fileToUpload"]["name"] . ' http://' . '134.21.133.202' . '/uploads/' . $_FILES["fileToUpload"]["name"] . " output/" ; 40 | 41 | $curl = curl_init(); 42 | 43 | curl_setopt_array($curl, array( 44 | CURLOPT_PORT => "8080", 45 | CURLOPT_URL => "http://134.21.72.190:8080/collections", 46 | CURLOPT_RETURNTRANSFER => true, 47 | CURLOPT_ENCODING => "", 48 | CURLOPT_MAXREDIRS => 10, 49 | CURLOPT_TIMEOUT => 30, 50 | CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, 51 | CURLOPT_CUSTOMREQUEST => "POST", 52 | CURLOPT_POSTFIELDS => "{\n \"files\":[\n {\n \"type\":\"url\",\n \"value\":\"http://134.21.131.89/uploads/".$_FILES["fileToUpload"]["name"]."\"\n }\n ]\n}", 53 | CURLOPT_HTTPHEADER => array( 54 | "content-type: application/json" 55 | ), 56 | )); 57 | 58 | $response = curl_exec($curl); 59 | $err = curl_error($curl); 60 | 61 | curl_close($curl); 62 | 63 | if ($err) { 64 | echo "cURL Error #:" . $err; 65 | } else { 66 | echo 'uploaded image to DIVA server, executing method..\n'; 67 | $someObject = json_decode($response, true); 68 | $im_id = $someObject["collection"] ."/".$_FILES["fileToUpload"]["name"]; 69 | echo "image identifier: ".$im_id; 70 | sleep(1); 71 | $curl = curl_init(); 72 | curl_setopt_array($curl, array( 73 | CURLOPT_PORT => "8080", 74 | CURLOPT_URL => "http://134.21.72.190:8080/segmentation/nicolasprintedhandwrittensegmentation/1", 75 | CURLOPT_RETURNTRANSFER => true, 76 | CURLOPT_ENCODING => "", 77 | CURLOPT_MAXREDIRS => 10, 78 | CURLOPT_TIMEOUT => 30, 79 | CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, 80 | CURLOPT_CUSTOMREQUEST => "POST", 81 | CURLOPT_POSTFIELDS => "{\n \"data\":[\n {\n \"inputImage\": \"".$im_id."\"\n }\n ]\n}", 82 | CURLOPT_HTTPHEADER => array( 83 | "content-type: application/json" 84 | ), 85 | )); 86 | 87 | $response = curl_exec($curl); 88 | $err = curl_error($curl); 89 | 90 | curl_close($curl); 91 | 92 | if ($err) { 93 | echo "cURL Error #:" . $err; 94 | } else { 95 | $res = json_decode($response, true); 96 | $res_link = $res['results'][0]['resultLink']; 97 | 98 | $curl = curl_init(); 99 | 100 | curl_setopt_array($curl, array( 101 | CURLOPT_PORT => "8080", 102 | CURLOPT_URL => $res_link, 103 | CURLOPT_RETURNTRANSFER => true, 104 | CURLOPT_ENCODING => "", 105 | CURLOPT_MAXREDIRS => 10, 106 | CURLOPT_TIMEOUT => 30, 107 | CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, 108 | CURLOPT_CUSTOMREQUEST => "GET", 109 | CURLOPT_POSTFIELDS => "", 110 | CURLOPT_HTTPHEADER => array( 111 | "content-type: application/json" 112 | ), 113 | )); 114 | 115 | $response = curl_exec($curl); 116 | $err = curl_error($curl); 117 | 118 | if ($err) { 119 | echo "cURL Error #:" . $err; 120 | } else { 121 | $res = json_decode($response, true); 122 | while($res['status'] == 'running'){ 123 | sleep(2); 124 | $response = curl_exec($curl); 125 | $res = json_decode($response, true); 126 | } 127 | echo $res['output'][0]['file']['url']; 128 | 129 | $img = 'output/res.png'; 130 | 131 | // Function to write image into file 132 | file_put_contents($img, file_get_contents($res['output'][0]['file']['url'])); 133 | echo "DOOONE"; 134 | 135 | } 136 | 137 | } 138 | } 139 | } 140 | } 141 | ?> -------------------------------------------------------------------------------- /FCN/test_classifier.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file can be used to evaluate models. You will only need the test/ folder of the 3 | dataset printed-hw-seg which can be downloaded in the thesis. 4 | Run python test_classifier.py for help 5 | """ 6 | import argparse 7 | import sys 8 | import warnings 9 | 10 | import numpy as np 11 | import skimage.io as io 12 | from fcn_helper_function import weighted_categorical_crossentropy, IoU 13 | from img_utils import max_rgb_filter, get_IoU, getBinclassImg, mask2rgb, rgb2mask 14 | from keras.engine.saving import load_model 15 | from post import crf 16 | from skimage import img_as_float 17 | from skimage.color import gray2rgb 18 | from tqdm import tqdm 19 | 20 | if not sys.warnoptions: 21 | warnings.simplefilter("ignore") 22 | 23 | BOXWDITH = 256 24 | STRIDE = BOXWDITH - 10 25 | 26 | 27 | def classify(imgdb): 28 | result_imgs = [] 29 | print("classifying " + str(len(imgdb)) + ' images') 30 | for image in imgdb: 31 | model = load_model('models/fcnn_bin_simple.h5', custom_objects={ 32 | 'loss': weighted_categorical_crossentropy([0.4, 0.5, 0.1]), 'IoU': IoU}) 33 | orgim = np.copy(image) 34 | # assume image in binary 35 | image = img_as_float(gray2rgb(image)) 36 | maskw = int((np.ceil(image.shape[1] / BOXWDITH) * BOXWDITH)) + 1 37 | maskh = int((np.ceil(image.shape[0] / BOXWDITH) * BOXWDITH)) 38 | mask = np.ones((maskh, maskw, 3)) 39 | mask2 = np.zeros((maskh, maskw, 3)) 40 | mask[0:image.shape[0], 0:image.shape[1]] = image 41 | for y in tqdm(range(0, mask.shape[0], STRIDE), unit='batch'): 42 | x = 0 43 | if (y + BOXWDITH > mask.shape[0]): 44 | break 45 | while (x + BOXWDITH) < mask.shape[1]: 46 | input = mask[y:y + BOXWDITH, x:x + BOXWDITH] 47 | std = input.std() if input.std() != 0 else 1 48 | mean = input.mean() 49 | mask2[y:y + BOXWDITH, x:x + BOXWDITH] = model.predict( 50 | np.array([(input - mean) / std]))[0] 51 | x = x + STRIDE 52 | result_imgs.append(mask2[0:image.shape[0], 0:image.shape[1]]) 53 | return result_imgs 54 | 55 | 56 | if __name__ == "__main__": 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument("ground_truth_folder", help="ground truth folder") 59 | parser.add_argument("test_folder", help="test folder") 60 | args = parser.parse_args() 61 | 62 | image_col = io.imread_collection(args.test_folder + '*') 63 | mask_col = io.imread_collection(args.ground_truth_folder + '*') 64 | crf_orgim = io.imread_collection(mask_col + 'crf-orgim/*') 65 | out_raw = classify(image_col) 66 | out_crf = [crf(inim, outim) for inim, outim in zip(crf_orgim, out_raw)] 67 | 68 | results_folder = 'test_out/' 69 | 70 | IoUs_hw_old = [] 71 | IoUs_hw_new = [] 72 | IoUs_printed_old = [] 73 | IoUs_printed_new = [] 74 | IoUs_bg_old = [] 75 | IoUs_bg_new = [] 76 | IoUs_mean_old = [] 77 | IoUs_mean_new = [] 78 | 79 | for i, (out, crf_res, gt) in enumerate(zip(out_raw, out_crf, mask_col)): 80 | IoU_printed_old = get_IoU(getBinclassImg(1, rgb2mask(max_rgb_filter(out))), getBinclassImg(1, gt)) 81 | IoU_hw_old = get_IoU(getBinclassImg(2, rgb2mask(max_rgb_filter(out))), getBinclassImg(2, gt)) 82 | IoU_bg_old = get_IoU(getBinclassImg(3, rgb2mask(max_rgb_filter(out))), getBinclassImg(3, gt)) 83 | IoU_mean_old = np.array([IoU_printed_old, IoU_hw_old, IoU_bg_old]).mean() 84 | 85 | IoU_printed_new = get_IoU(getBinclassImg(1, crf_res), getBinclassImg(1, gt)) 86 | IoU_hw_new = get_IoU(getBinclassImg(2, crf_res), getBinclassImg(2, gt)) 87 | IoU_bg_new = get_IoU(getBinclassImg(3, crf_res), getBinclassImg(3, gt)) 88 | IoU_mean_new = np.array([IoU_printed_new, IoU_hw_new, IoU_bg_new]).mean() 89 | 90 | IoUs_hw_old.append(IoU_hw_old) 91 | IoUs_hw_new.append(IoU_hw_new) 92 | IoUs_printed_new.append(IoU_printed_new) 93 | IoUs_printed_old.append(IoU_printed_old) 94 | IoUs_bg_old.append(IoU_bg_old) 95 | IoUs_bg_new.append(IoU_bg_new) 96 | IoUs_mean_old.append(IoU_mean_old) 97 | IoUs_mean_new.append(IoU_mean_new) 98 | 99 | print("--------------- IoU test results for image " + str(i) + " ---------------") 100 | print("Format: | [old IoU]-->[new IoU]") 101 | print(" printed | [{:.2f}]-->[{:.2f}]".format(IoU_printed_old, IoU_printed_new)) 102 | print(" handwritten | [{:.2f}]-->[{:.2f}]".format(IoU_hw_old, IoU_hw_new)) 103 | print(" background | [{:.2f}]-->[{:.2f}]".format(IoU_bg_old, IoU_bg_new)) 104 | print("-------------------------------------------------") 105 | print(" mean | [{:.2f}]-->[{:.2f}]".format(IoU_mean_old, IoU_mean_new)) 106 | print("\n") 107 | 108 | io.imsave(results_folder + 'fcn_out_' + str(i) + '.png', max_rgb_filter(out)) 109 | print('saved fcn_out_' + str(i) + '.png') 110 | io.imsave(results_folder + 'fcn_out_crf_' + str(i) + '.png', mask2rgb(crf_res)) 111 | print('saved fcn_out_crf_' + str(i) + '.png') 112 | 113 | print("\n") 114 | 115 | IoUs_hw_old = np.array(IoUs_hw_old) 116 | IoUs_hw_new = np.array(IoUs_hw_new) 117 | IoUs_printed_old = np.array(IoUs_printed_old) 118 | IoUs_printed_new = np.array(IoUs_printed_new) 119 | IoUs_bg_old = np.array(IoUs_bg_old) 120 | IoUs_bg_new = np.array(IoUs_bg_new) 121 | IoUs_mean_old = np.array(IoUs_mean_old) 122 | IoUs_mean_new = np.array(IoUs_mean_new) 123 | 124 | print('\n') 125 | print("--------------- IoU mean test results ---------------") 126 | print("Format: | [old IoU mean]-->[new IoU mean]") 127 | print(" printed | [{:.2f}]-->[{:.2f}]".format(IoUs_printed_old.mean(), IoUs_printed_new.mean())) 128 | print(" handwritten | [{:.2f}]-->[{:.2f}]".format(IoUs_hw_old.mean(), IoUs_hw_new.mean())) 129 | print(" background | [{:.2f}]-->[{:.2f}]".format(IoUs_bg_old.mean(), IoUs_bg_new.mean())) 130 | print("-------------------------------------------------") 131 | print(" mean | [{:.2f}]-->[{:.2f}]".format(IoUs_mean_old.mean(), IoUs_mean_new.mean())) 132 | print("\n") 133 | -------------------------------------------------------------------------------- /FCN/classifier_fcnn.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This file can be used to test the models on input images. It includes features such as 3 | 1) Enabling CRF postprocessing 4 | 2) Enabling visualisations 5 | 3) Mean IoU calculations if a GT image is provided. 6 | 7 | Run python 'classifier_fcnn.py -h' for more information 8 | 9 | ''' 10 | 11 | import argparse 12 | import sys 13 | import warnings 14 | 15 | import matplotlib.pyplot as plt 16 | import numpy as np 17 | import skimage.io as io 18 | from fcn_helper_function import weighted_categorical_crossentropy, IoU 19 | from img_utils import getbinim, max_rgb_filter, get_IoU, getBinclassImg, mask2rgb, rgb2mask 20 | from keras.engine.saving import load_model 21 | from post import crf 22 | from skimage import img_as_float 23 | from skimage.color import gray2rgb 24 | 25 | if not sys.warnoptions: 26 | warnings.simplefilter("ignore") 27 | 28 | 29 | BOXWDITH = 256 30 | STRIDE = BOXWDITH - 10 31 | 32 | def classify(image): 33 | try: 34 | model = load_model('/input/models/fcnn_bin.h5', custom_objects={ 35 | 'loss': weighted_categorical_crossentropy([0.4,0.5,0.1]), 'IoU': IoU}) 36 | except OSError: 37 | # Use relative file path if anyone tries to run the generated binary without using the provided docker image 38 | model = load_model('models/fcnn_bin.h5', custom_objects={ 39 | 'loss': weighted_categorical_crossentropy([0.4,0.5,0.1]), 'IoU': IoU}) 40 | orgim = np.copy(image) 41 | image = img_as_float(gray2rgb(getbinim(image))) 42 | maskw = int((np.ceil(image.shape[1] / BOXWDITH) * BOXWDITH)) + 1 43 | maskh = int((np.ceil(image.shape[0] / BOXWDITH) * BOXWDITH)) 44 | mask = np.ones((maskh, maskw, 3)) 45 | mask2 = np.zeros((maskh, maskw, 3)) 46 | mask[0:image.shape[0], 0:image.shape[1]] = image 47 | print("classifying image...") 48 | for y in range(0, mask.shape[0], STRIDE): 49 | x = 0 50 | if (y + BOXWDITH > mask.shape[0]): 51 | break 52 | while (x + BOXWDITH) < mask.shape[1]: 53 | input = mask[y:y+BOXWDITH, x:x+BOXWDITH] 54 | std = input.std() if input.std() != 0 else 1 55 | mean = input.mean() 56 | mask2[y:y+BOXWDITH, x:x+BOXWDITH] = model.predict( 57 | np.array([(input-mean)/std]))[0] 58 | x = x + STRIDE 59 | return mask2[0:image.shape[0], 0:image.shape[1]] 60 | 61 | 62 | 63 | if __name__ == "__main__": 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument("--enableviz", help="Plot results of segmentation", 66 | action="store_true") 67 | parser.add_argument("--enableCRF", help="Use crf for postprocessing", 68 | action="store_true") 69 | parser.add_argument("--ground_truth", help="ground truth file name. Classes must be encoded in the blue channel as 1:printed, 2:hw, 4:bg") 70 | parser.add_argument("input_image", help="input image file name") 71 | parser.add_argument("output_folder", help="output folder") 72 | args = parser.parse_args() 73 | inputim = io.imread(args.input_image) 74 | output_folder = args.output_folder 75 | 76 | 77 | out = classify(inputim) 78 | 79 | if args.enableCRF: 80 | crf_res = crf(inputim, out) 81 | else: 82 | crf_res = None 83 | 84 | if args.ground_truth is not None: 85 | gt = io.imread(args.ground_truth) 86 | 87 | mask_out = rgb2mask(max_rgb_filter(out)) 88 | 89 | IoU_printed_old = get_IoU(getBinclassImg(1, rgb2mask(max_rgb_filter(out))), getBinclassImg(1, gt)) 90 | IoU_hw_old = get_IoU(getBinclassImg(2, rgb2mask(max_rgb_filter(out))), getBinclassImg(2, gt)) 91 | IoU_bg_old = get_IoU(getBinclassImg(3, rgb2mask(max_rgb_filter(out))), getBinclassImg(3, gt)) 92 | IoU_mean_old = np.array([IoU_printed_old, IoU_hw_old, IoU_bg_old]).mean() 93 | 94 | if crf_res is not None: 95 | IoU_printed_new = get_IoU(getBinclassImg(1, crf_res), getBinclassImg(1, gt)) 96 | IoU_hw_new = get_IoU(getBinclassImg(2, crf_res), getBinclassImg(2, gt)) 97 | IoU_bg_new = get_IoU(getBinclassImg(3, crf_res), getBinclassImg(3, gt)) 98 | IoU_mean_new = np.array([IoU_printed_new, IoU_hw_new, IoU_bg_new]).mean() 99 | 100 | print("Format: | [old IoU]-->[new IoU]") 101 | print(" printed | [{:.5f}]-->[{:.5f}]".format(IoU_printed_old, IoU_printed_new)) 102 | print(" handwritten | [{:.5f}]-->[{:.5f}]".format(IoU_hw_old, IoU_hw_new)) 103 | print(" background | [{:.5f}]-->[{:.5f}]".format(IoU_bg_old, IoU_bg_new)) 104 | print("-------------------------------------------------") 105 | print(" mean | [{:.5f}]-->[{:.5f}]".format(IoU_mean_old, IoU_mean_new)) 106 | 107 | if args.enableviz: 108 | fig, axes = plt.subplots(1, 3) 109 | fig.suptitle('Printed - handwritten segmentation', fontsize=20) 110 | axes[0].imshow(inputim) 111 | axes[0].axis('off') 112 | axes[0].set_title('Input') 113 | axes[1].imshow(max_rgb_filter(out)) 114 | axes[1].axis('off') 115 | axes[1].set_title('FCN raw output [mean IoU = {:.5f}]'.format(IoU_mean_old)) 116 | axes[2].imshow(mask2rgb(crf_res)) 117 | axes[2].axis('off') 118 | axes[2].set_title('CRF post-processing output [mean IoU = {:.5f}]'.format(IoU_mean_new)) 119 | plt.show() 120 | else: 121 | io.imsave(output_folder + 'fcn_out.png', max_rgb_filter(out)) 122 | print('saved fcn_out.png') 123 | io.imsave(output_folder + 'fcn_out_crf.png', mask2rgb(crf_res)) 124 | print('saved fcn_out_post.png') 125 | 126 | else: 127 | print("Format: | [IoU]") 128 | print(" printed | [{:.5f}]".format(IoU_printed_old)) 129 | print(" handwritten | [{:.5f}]".format(IoU_hw_old)) 130 | print(" background | [{:.5f}]".format(IoU_bg_old)) 131 | print("-------------------------------------------------") 132 | print(" mean | [{:.5f}]".format(IoU_mean_old)) 133 | 134 | if args.enableviz: 135 | fig, axes = plt.subplots(1, 2) 136 | fig.suptitle('Printed - handwritten segmentation', fontsize=20) 137 | axes[0].imshow(inputim) 138 | axes[0].axis('off') 139 | axes[0].set_title('Input') 140 | axes[1].imshow(max_rgb_filter(out)) 141 | axes[1].axis('off') 142 | axes[1].set_title('FCN raw output [mean IoU = {:.5f}]'.format(IoU_mean_old)) 143 | plt.show() 144 | else: 145 | io.imsave(output_folder + 'fcn_out.png', max_rgb_filter(out)) 146 | print('saved fcn_out.png') 147 | else: 148 | if crf_res is not None: 149 | if args.enableviz: 150 | fig, axes = plt.subplots(1, 3) 151 | fig.suptitle('Printed - handwritten segmentation', fontsize=20) 152 | axes[0].imshow(inputim) 153 | axes[0].axis('off') 154 | axes[0].set_title('Input') 155 | axes[1].imshow(max_rgb_filter(out)) 156 | axes[1].axis('off') 157 | axes[1].set_title('FCN raw output') 158 | axes[2].imshow(mask2rgb(crf_res)) 159 | axes[2].axis('off') 160 | axes[2].set_title('CRF post-processing output') 161 | plt.show() 162 | else: 163 | io.imsave(output_folder + 'fcn_out.png', max_rgb_filter(out)) 164 | print('saved fcn_out.png') 165 | io.imsave(output_folder + 'fcn_out_crf.png', mask2rgb(crf_res)) 166 | print('saved fcn_out_post.png') 167 | 168 | else: 169 | if args.enableviz: 170 | fig, axes = plt.subplots(1, 2) 171 | fig.suptitle('Printed - handwritten segmentation', fontsize=20) 172 | axes[0].imshow(inputim) 173 | axes[0].axis('off') 174 | axes[0].set_title('Input') 175 | axes[1].imshow(max_rgb_filter(out)) 176 | axes[1].axis('off') 177 | axes[1].set_title('FCN raw output') 178 | plt.show() 179 | else: 180 | io.imsave(output_folder + 'fcn_out.png', max_rgb_filter(out)) 181 | print('saved fcn_out.png') 182 | --------------------------------------------------------------------------------