├── .gitattributes ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── conda.txt ├── data ├── input │ ├── in1.jpeg │ ├── in2.jpeg │ ├── in3.jpeg │ ├── signed_file.pdf │ ├── signed_image.jpeg │ └── signed_image.png └── output │ ├── labeled_mask.jpeg │ ├── labeled_mask_0.jpeg │ ├── labeled_mask_1.jpeg │ ├── mask.jpeg │ ├── mask_0.jpeg │ ├── mask_1.jpeg │ ├── sign.jpeg │ └── signature.jpeg ├── demo.ipynb ├── demo.py ├── lab ├── data │ ├── input │ │ ├── signed_file.pdf │ │ └── signed_image.jpeg │ └── output │ │ ├── signed_image.jpg │ │ ├── signed_image_boxed.jpeg │ │ └── signed_image_region_0.jpeg └── dev.ipynb ├── package.md ├── setup.py ├── src └── signature_detect │ ├── __init__.py │ ├── cropper.py │ ├── extractor.py │ ├── judger.py │ └── loader.py ├── tests ├── .coveragerc ├── coverage.sh ├── data │ ├── dummy.py │ ├── signed_file.pdf │ ├── signed_image.jpeg │ ├── signed_image.tif │ └── test.txt ├── test_cropper.py ├── test_extractor.py ├── test_judger.py └── test_loader.py └── update_package.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # project 2 | .vscode/ 3 | private/ 4 | dev.ipynb 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # SageMath parsed files 91 | *.sage.py 92 | 93 | # Environments 94 | .env 95 | .venv 96 | env/ 97 | venv/ 98 | ENV/ 99 | env.bak/ 100 | venv.bak/ 101 | 102 | # Spyder project settings 103 | .spyderproject 104 | .spyproject 105 | 106 | # Rope project settings 107 | .ropeproject 108 | 109 | # mkdocs documentation 110 | /site 111 | 112 | # mypy 113 | .mypy_cache/ 114 | .dmypy.json 115 | dmypy.json 116 | 117 | # Pyre type checker 118 | .pyre/ 119 | 120 | # macOS 121 | .DS_Store 122 | 123 | #PyCharm 124 | .idea 125 | __pycache__ 126 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Jijie LIU 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | recursive-include tests *.py 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Signature Detection 2 | 3 | A simple tool to detect if there is **a signature** in **an image** or **a PDF file**. 4 | 5 | ## Installation of PyPi 6 | 7 | It's the quick way to use this tool. 8 | 9 | `signature-detect` package contains the codes in the `src`. 10 | 11 | ``` 12 | pip install signature-detect 13 | ``` 14 | 15 | ## Installation of Anaconda 16 | 17 | It's the recommended way to explore this tool. It provides notebooks for playing around. 18 | 19 | 1. install [anaconda](https://docs.anaconda.com/anaconda/install/) 20 | 21 | 2. install dependencies 22 | 23 | ``` 24 | conda create --name --file conda.txt 25 | ``` 26 | 27 | ## Demo 28 | 29 | - Image: 30 | 31 | ``` 32 | python demo.py --file my-image.jpeg 33 | ``` 34 | 35 | - PDF File: 36 | 37 | ``` 38 | python demo.py --file my-file.pdf 39 | ``` 40 | 41 | ## Unit Tests 42 | 43 | All the codes in `src` are covered. 44 | 45 | ``` 46 | cd tests 47 | coverage run -m unittest 48 | coverage report -m 49 | ``` 50 | 51 | ## Example 52 | 53 | We use the following image as an example. The full example is in the [demo notebook](demo.ipynb) 54 | 55 | ![signed_image](./data/input/signed_image.jpeg) 56 | 57 | ### Loader 58 | The loader reads the file and creates a mask. 59 | 60 | The mask is a numpy array. The bright parts are set to 255, the rest is set to 0. It contains ONLY these 2 numbers. 61 | 62 | #### Atrributes 63 | 64 | - low_threshold = (0, 0, 250) 65 | 66 | - high_threshold = (255, 255, 255) 67 | 68 | They control the creation of the mask, used in the function `cv.inRange`. 69 | 70 | 71 | #### Result 72 | 73 | Here, yellow is `255`, purple is `0`. 74 | 75 | ![mask](./data/output/mask.jpeg) 76 | 77 | ### Extractor 78 | 79 | The extractor, first, generates the regions from the mask. 80 | 81 | Then, it removes the small and the big regions because the signature is neither too big nor too small. 82 | 83 | The process is as followed. 84 | 85 | 1. label the image 86 | 87 | `skimage.measure.label` labels the connected regions of an integer array. It returns a labeled array, where all connected regions are assigned the same integer value. 88 | 89 | 2. calculate the average size of regions 90 | 91 | Here, the size means **the number of the pixels in a region**. 92 | 93 | We accumulate the number of the pixels in all the regions, `total_pixels`. The average size is `total_pixels / nb_regions`. 94 | 95 | If the size of a region is smaller `min_area_size`, this region is ignored. `min_area_size` is given by the user. 96 | 97 | 3. calculate the size of the small outlier 98 | 99 | ``` 100 | small_size_outlier = average * outlier_weight + outlier_bias 101 | ``` 102 | 103 | `outlier_weight` and `outlier_bias` are given by the user. 104 | 105 | 4. calculate the size of the big outlier 106 | 107 | ``` 108 | big_size_outlier = small_size_outlier * amplfier 109 | ``` 110 | 111 | `amplfier` is given by the user. 112 | 113 | 5. remove the small and big outliers 114 | 115 | #### Attributes 116 | 117 | - outlier_weight = 3 118 | 119 | - outlier_bias = 100 120 | 121 | - amplfier = 10 122 | 123 | > `15` is used in the demo. 124 | 125 | - min_area_size = 10 126 | 127 | #### Result 128 | 129 | ![labeled_image](./data/output/labeled_mask.jpeg) 130 | 131 | 132 | ### Cropper 133 | 134 | The cropper finds the **contours** of regions in the **labeled masks** and crop them. 135 | 136 | #### Attributes 137 | 138 | Suppose `(h, w) = region.shape`. 139 | 140 | - min_region_size = 10000 141 | 142 | If `h * w < min_region_size`, then this region is ignored. 143 | 144 | - border_ratio: float 145 | 146 | border = min(h, w) * border_ratio 147 | 148 | The border will be removed if this attribute is not `0`. 149 | 150 | #### Result 151 | 152 | ![signature](./data/output/signature.jpeg) 153 | 154 | 155 | ### Judger 156 | 157 | The judger reads the cropped mask and identifies if it's a signature or not. 158 | 159 | #### Attributes 160 | 161 | Suppose `(h, w) = cropped_mask.shape`. 162 | 163 | - size_ratio: [low, high] 164 | 165 | low < max(h, w) / min(h, w) < high. 166 | 167 | - max_pixel_ratio: [low, high] 168 | 169 | low < the number of 0 / the number of 255 < high. 170 | 171 | The mask should only have 2 value, 0 and 255. 172 | 173 | By default: 174 | 175 | - size_ratio = [1, 4] 176 | 177 | - max_pixel_ratio = [0.01, 1] 178 | 179 | #### Result 180 | 181 | - `max(h, w) / min(h, w)` = 3.48 182 | 183 | - number of `0` / number of `255` = 0.44 184 | 185 | So, this image is signed. -------------------------------------------------------------------------------- /conda.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: osx-64 4 | anyio=2.2.0=py38h50d1736_0 5 | appdirs=1.4.4=pyh9f0ad1d_0 6 | appnope=0.1.2=py38h50d1736_1 7 | argon2-cffi=20.1.0=py38h5406a74_2 8 | async_generator=1.10=py_0 9 | atk-1.0=2.36.0=he69c4ee_4 10 | attrs=20.3.0=pyhd3deb0d_0 11 | babel=2.9.0=pyhd3deb0d_0 12 | backcall=0.2.0=pyh9f0ad1d_0 13 | backports=1.0=py_2 14 | backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0 15 | black=20.8b1=py_1 16 | bleach=3.3.0=pyh44b312d_0 17 | blosc=1.21.0=he49afe7_0 18 | brotli=1.0.9=h046ec9c_4 19 | brotlipy=0.7.0=py38h5406a74_1001 20 | brunsli=0.1=h046ec9c_0 21 | bzip2=1.0.8=hc929b4f_4 22 | c-ares=1.17.1=h0d85af4_1 23 | ca-certificates=2020.12.5=h033912b_0 24 | cairo=1.16.0=he43a7df_1008 25 | certifi=2020.12.5=py38h50d1736_1 26 | cffi=1.14.5=py38ha97d567_0 27 | chardet=4.0.0=py38h50d1736_1 28 | charls=2.2.0=h046ec9c_0 29 | click=7.1.2=pyh9f0ad1d_0 30 | cloudpickle=1.6.0=py_0 31 | coverage=5.5=py38h96a0964_0 32 | cryptography=3.4.7=py38h1fa4640_0 33 | cycler=0.10.0=py_2 34 | cytoolz=0.11.0=py38h5406a74_3 35 | dask-core=2021.4.0=pyhd8ed1ab_0 36 | dataclasses=0.8=pyhc8e2a94_1 37 | decorator=5.0.6=pyhd8ed1ab_0 38 | defusedxml=0.7.1=pyhd8ed1ab_0 39 | entrypoints=0.3=pyhd8ed1ab_1003 40 | expat=2.3.0=he49afe7_0 41 | ffmpeg=4.3.1=hb787a4c_2 42 | fftw=3.3.9=nompi_h02cd531_101 43 | font-ttf-dejavu-sans-mono=2.37=hab24e00_0 44 | font-ttf-inconsolata=2.001=hab24e00_0 45 | font-ttf-source-code-pro=2.030=hab24e00_0 46 | font-ttf-ubuntu=0.83=hab24e00_0 47 | fontconfig=2.13.1=h10f422b_1005 48 | fonts-conda-ecosystem=1=0 49 | fonts-conda-forge=1=0 50 | freetype=2.10.4=h4cff582_1 51 | fribidi=1.0.10=hbcb3906_0 52 | fsspec=0.9.0=pyhd8ed1ab_0 53 | gdk-pixbuf=2.42.6=h2e6141f_0 54 | gettext=0.19.8.1=h7937167_1005 55 | ghostscript=9.53.3=h2e338ed_2 56 | giflib=5.2.1=hbcb3906_2 57 | gmp=6.2.1=h2e338ed_0 58 | gnutls=3.6.13=h756fd2b_1 59 | graphite2=1.3.13=h2e338ed_1001 60 | graphviz=2.47.0=hdb475ea_1 61 | gtk2=2.24.33=h55c0dba_0 62 | gts=0.7.6=hccb3bdf_2 63 | harfbuzz=2.8.0=h159f659_1 64 | hdf5=1.10.6=nompi_hc5d9132_1114 65 | icu=68.1=h74dc148_0 66 | idna=2.10=pyh9f0ad1d_0 67 | imagecodecs=2021.3.31=py38hc7b77fe_0 68 | imageio=2.9.0=py_0 69 | imagemagick=7.0.11_7=pl5320h46a8306_0 70 | importlib-metadata=3.10.1=py38h50d1736_0 71 | ipykernel=5.3.4=py38h5ca1d4c_0 72 | ipython=7.22.0=py38h6c79ece_0 73 | ipython_genutils=0.2.0=py_1 74 | jasper=1.900.1=h636a363_1006 75 | jbig=2.1=h0d85af4_2002 76 | jedi=0.18.0=py38h50d1736_2 77 | jinja2=2.11.3=pyh44b312d_0 78 | joblib=1.0.1=pyhd8ed1ab_0 79 | jpeg=9d=hbcb3906_0 80 | json5=0.9.5=pyh9f0ad1d_0 81 | jsonschema=3.2.0=pyhd8ed1ab_3 82 | jupyter-packaging=0.7.12=pyhd8ed1ab_0 83 | jupyter_client=6.1.12=pyhd8ed1ab_0 84 | jupyter_core=4.7.1=py38h50d1736_0 85 | jupyter_server=1.6.1=py38h50d1736_0 86 | jupyterlab=3.0.12=pyhd8ed1ab_0 87 | jupyterlab_pygments=0.1.2=pyh9f0ad1d_0 88 | jupyterlab_server=2.4.0=pyhd8ed1ab_0 89 | jxrlib=1.1=h35c211d_2 90 | kiwisolver=1.3.1=py38hd9c93a9_1 91 | krb5=1.17.2=h60d9502_0 92 | lame=3.100=h35c211d_1001 93 | lcms2=2.12=h577c468_0 94 | lerc=2.2.1=h046ec9c_0 95 | libaec=1.0.4=h046ec9c_1 96 | libblas=3.9.0=8_openblas 97 | libcblas=3.9.0=8_openblas 98 | libcurl=7.76.0=h8ef9fac_0 99 | libcxx=11.1.0=habf9029_0 100 | libdeflate=1.7=h35c211d_5 101 | libedit=3.1.20191231=h0678c8f_2 102 | libev=4.33=haf1e3a3_1 103 | libffi=3.3=h046ec9c_2 104 | libgd=2.3.2=h4e7a7ea_0 105 | libgfortran=5.0.0=9_3_0_h6c81a4c_22 106 | libgfortran5=9.3.0=h6c81a4c_22 107 | libglib=2.68.1=hd556434_0 108 | libiconv=1.16=haf1e3a3_0 109 | liblapack=3.9.0=8_openblas 110 | liblapacke=3.9.0=8_openblas 111 | libnghttp2=1.43.0=h07e645a_0 112 | libopenblas=0.3.12=openmp_h54245bb_1 113 | libopencv=4.5.1=py38h229d5d6_1 114 | libpng=1.6.37=h7cec526_2 115 | libprotobuf=3.15.8=hcf210ce_0 116 | librsvg=2.50.3=hd4300c0_1 117 | libsodium=1.0.18=hbcb3906_1 118 | libssh2=1.9.0=h52ee1ee_6 119 | libtiff=4.2.0=h355d032_0 120 | libtool=2.4.6=h2e338ed_1007 121 | libwebp=1.2.0=h1648767_0 122 | libwebp-base=1.2.0=h0d85af4_2 123 | libxcb=1.13=h35c211d_1003 124 | libxml2=2.9.10=h93ec3fd_4 125 | libzopfli=1.0.3=h046ec9c_0 126 | llvm-openmp=11.1.0=hda6cdc1_1 127 | locket=0.2.0=py_2 128 | lz4-c=1.9.3=h046ec9c_0 129 | markupsafe=1.1.1=py38h5406a74_3 130 | matplotlib=3.4.1=py38h50d1736_0 131 | matplotlib-base=3.4.1=py38h6152e83_0 132 | mistune=0.8.4=py38h5406a74_1003 133 | mypy_extensions=0.4.3=py38h50d1736_3 134 | nbclassic=0.2.7=pyhd8ed1ab_0 135 | nbclient=0.5.3=pyhd8ed1ab_0 136 | nbconvert=6.0.7=py38h50d1736_3 137 | nbformat=5.1.3=pyhd8ed1ab_0 138 | ncurses=6.2=h2e338ed_4 139 | nest-asyncio=1.5.1=pyhd8ed1ab_0 140 | nettle=3.6=hedd7734_0 141 | networkx=2.5=py_0 142 | notebook=6.3.0=py38h50d1736_0 143 | numpy=1.20.2=py38had91d27_0 144 | olefile=0.46=pyh9f0ad1d_1 145 | opencv=4.5.1=py38h50d1736_1 146 | openh264=2.1.1=hfd3ada9_0 147 | openjpeg=2.4.0=h6cbf5cd_0 148 | openssl=1.1.1k=h0d85af4_0 149 | packaging=20.9=pyh44b312d_0 150 | pandoc=2.13=h0d85af4_0 151 | pandocfilters=1.4.2=py_1 152 | pango=1.42.4=ha86e081_5 153 | parso=0.8.2=pyhd8ed1ab_0 154 | partd=1.2.0=pyhd8ed1ab_0 155 | pathspec=0.8.1=pyhd3deb0d_0 156 | pcre=8.44=hb1e8313_0 157 | perl=5.32.0=hbcb3906_0 158 | pexpect=4.8.0=pyh9f0ad1d_2 159 | pickleshare=0.7.5=py_1003 160 | pillow=8.1.2=py38h83525de_1 161 | pip=21.0.1=pyhd8ed1ab_0 162 | pixman=0.40.0=hbcb3906_0 163 | pkg-config=0.29.2=h31203cd_1008 164 | prometheus_client=0.10.1=pyhd8ed1ab_0 165 | prompt-toolkit=3.0.18=pyha770c72_0 166 | pthread-stubs=0.4=hc929b4f_1001 167 | ptyprocess=0.7.0=pyhd3deb0d_0 168 | py-opencv=4.5.1=py38h6c79ece_1 169 | pycparser=2.20=pyh9f0ad1d_2 170 | pygments=2.8.1=pyhd8ed1ab_0 171 | pyopenssl=20.0.1=pyhd8ed1ab_0 172 | pyparsing=2.4.7=pyh9f0ad1d_0 173 | pyrsistent=0.17.3=py38h5406a74_2 174 | pysocks=1.7.1=py38h50d1736_3 175 | python=3.8.8=h4e93d89_0_cpython 176 | python-dateutil=2.8.1=py_0 177 | python_abi=3.8=1_cp38 178 | pytz=2021.1=pyhd8ed1ab_0 179 | pywavelets=1.1.1=py38hc7193ba_3 180 | pyyaml=5.4.1=py38h5406a74_0 181 | pyzmq=22.0.3=py38hd3b92b6_1 182 | readline=8.1=h05e3726_0 183 | regex=2021.4.4=py38h96a0964_0 184 | requests=2.25.1=pyhd3deb0d_0 185 | scikit-image=0.17.2=py38h81aa140_0 186 | scikit-learn=0.23.2=py38h959d312_0 187 | scipy=1.6.2=py38h431c0a8_0 188 | send2trash=1.5.0=py_0 189 | setuptools=49.6.0=py38h50d1736_3 190 | six=1.15.0=pyh9f0ad1d_0 191 | snappy=1.1.8=hb1e8313_3 192 | sniffio=1.2.0=py38h50d1736_1 193 | sqlite=3.35.4=h44b9ce1_0 194 | terminado=0.9.4=py38h50d1736_0 195 | testpath=0.4.4=py_0 196 | threadpoolctl=2.1.0=pyh5ca1d4c_0 197 | tifffile=2021.4.8=pyhd8ed1ab_0 198 | tk=8.6.10=h0419947_1 199 | toml=0.10.2=pyhd8ed1ab_0 200 | toolz=0.11.1=py_0 201 | tornado=6.1=py38h5406a74_1 202 | traitlets=5.0.5=py_0 203 | typed-ast=1.4.3=py38h96a0964_0 204 | typing_extensions=3.7.4.3=py_0 205 | urllib3=1.26.4=pyhd8ed1ab_0 206 | wand=0.6.5=pyhd3deb0d_0 207 | wcwidth=0.2.5=pyh9f0ad1d_2 208 | webencodings=0.5.1=py_1 209 | wheel=0.36.2=pyhd3deb0d_0 210 | x264=1!161.3030=h35c211d_0 211 | xorg-kbproto=1.0.7=h35c211d_1002 212 | xorg-libice=1.0.10=h0d85af4_0 213 | xorg-libsm=1.2.3=h0d85af4_1000 214 | xorg-libx11=1.7.0=h35c211d_0 215 | xorg-libxau=1.0.9=h35c211d_0 216 | xorg-libxdmcp=1.1.3=h35c211d_0 217 | xorg-libxext=1.3.4=h0d85af4_1 218 | xorg-libxrender=0.9.10=h0d85af4_1003 219 | xorg-libxt=1.2.1=h0d85af4_2 220 | xorg-renderproto=0.11.1=h0d85af4_1002 221 | xorg-xextproto=7.3.0=h35c211d_1002 222 | xorg-xproto=7.0.31=h35c211d_1007 223 | xz=5.2.5=haf1e3a3_1 224 | yaml=0.2.5=haf1e3a3_0 225 | zeromq=4.3.4=h1c7c35f_0 226 | zfp=0.5.5=he49afe7_5 227 | zipp=3.4.1=pyhd8ed1ab_0 228 | zlib=1.2.11=h7795811_1010 229 | zstd=1.4.9=h582d3a0_0 230 | -------------------------------------------------------------------------------- /data/input/in1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/in1.jpeg -------------------------------------------------------------------------------- /data/input/in2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/in2.jpeg -------------------------------------------------------------------------------- /data/input/in3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/in3.jpeg -------------------------------------------------------------------------------- /data/input/signed_file.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/signed_file.pdf -------------------------------------------------------------------------------- /data/input/signed_image.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/signed_image.jpeg -------------------------------------------------------------------------------- /data/input/signed_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/signed_image.png -------------------------------------------------------------------------------- /data/output/labeled_mask.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/labeled_mask.jpeg -------------------------------------------------------------------------------- /data/output/labeled_mask_0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/labeled_mask_0.jpeg -------------------------------------------------------------------------------- /data/output/labeled_mask_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/labeled_mask_1.jpeg -------------------------------------------------------------------------------- /data/output/mask.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/mask.jpeg -------------------------------------------------------------------------------- /data/output/mask_0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/mask_0.jpeg -------------------------------------------------------------------------------- /data/output/mask_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/mask_1.jpeg -------------------------------------------------------------------------------- /data/output/sign.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/sign.jpeg -------------------------------------------------------------------------------- /data/output/signature.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/signature.jpeg -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from signature_detect.cropper import Cropper 4 | from signature_detect.extractor import Extractor 5 | from signature_detect.loader import Loader 6 | from signature_detect.judger import Judger 7 | 8 | 9 | def main(file_path: str) -> None: 10 | loader = Loader() 11 | extractor = Extractor(amplfier=15) 12 | cropper = Cropper() 13 | judger = Judger() 14 | 15 | try: 16 | masks = loader.get_masks(file_path) 17 | is_signed = False 18 | for mask in masks: 19 | labeled_mask = extractor.extract(mask) 20 | results = cropper.run(labeled_mask) 21 | for result in results.values(): 22 | is_signed = judger.judge(result["cropped_mask"]) 23 | if is_signed: 24 | break 25 | if is_signed: 26 | break 27 | print(is_signed) 28 | except Exception as e: 29 | print(e) 30 | 31 | 32 | if __name__ == "__main__": 33 | file_path = None 34 | for i in range(len(sys.argv)): 35 | if sys.argv[i] == "--file": 36 | file_path = sys.argv[i + 1] 37 | if file_path is None: 38 | print("Need input file") 39 | print("python demo.py --file my-file.pdf") 40 | else: 41 | main(file_path) 42 | -------------------------------------------------------------------------------- /lab/data/input/signed_file.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/lab/data/input/signed_file.pdf -------------------------------------------------------------------------------- /lab/data/input/signed_image.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/lab/data/input/signed_image.jpeg -------------------------------------------------------------------------------- /lab/data/output/signed_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/lab/data/output/signed_image.jpg -------------------------------------------------------------------------------- /lab/data/output/signed_image_boxed.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/lab/data/output/signed_image_boxed.jpeg -------------------------------------------------------------------------------- /lab/data/output/signed_image_region_0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/lab/data/output/signed_image_region_0.jpeg -------------------------------------------------------------------------------- /package.md: -------------------------------------------------------------------------------- 1 | # Signature Detection 2 | 3 | A simple tool to detect if there are **signatures** in **an image** or **a PDF file**. 4 | 5 | The full documentation is presented at the [Github Repository](https://github.com/EnzoSeason/signature_detection). 6 | 7 | ## Extra Dependencies 8 | 9 | This tool uses `Wand` to convert a PDF file into images. 10 | 11 | `Wand` is a ctypes-based simple `ImageMagick` binding for Python. You may need to install `ImageMagick` on your local machine. 12 | 13 | More details are available [here](https://docs.wand-py.org/en/0.6.6/). 14 | 15 | ## Usage 16 | 17 | Signature_detect contains 4 classes. 18 | 19 | 1. Loader 20 | 2. Extractor 21 | 3. Cropper 22 | 4. Judger 23 | 24 | ### Loader 25 | 26 | Loader can read an image or a PDF file page by page. 27 | 28 | It returns a list of the masks. Each mask is a numpy 2 dimensions array. Its element's value is `0` or `255`. 29 | 30 | ```python 31 | from signature_detect.loader import Loader 32 | 33 | loader = Loader( 34 | low_threshold=(0, 0, 250), 35 | high_threshold=(255, 255, 255)) 36 | 37 | masks = loader.get_masks(file_path) 38 | ``` 39 | 40 | 41 | ### Extractor 42 | 43 | Extractor reads a mask, labels the regions in the mask, and removes both small and big regions. We consider that the signature is a region of middle size. 44 | 45 | ```python 46 | from signature_detect.extractor import Extractor 47 | 48 | extractor = Extractor( 49 | outlier_weight=3, 50 | outlier_bias=100, 51 | amplfier=10, 52 | min_area_size=10) 53 | 54 | labeled_mask = extractor.extract(mask) 55 | ``` 56 | 57 | ### Cropper 58 | 59 | Cropper crops the regions in the labeled mask. 60 | 61 | ```python 62 | from signature_detect.cropper import Cropper 63 | 64 | cropper = Cropper( 65 | min_region_size=10000, 66 | border_ratio=0.1) 67 | 68 | results = cropper.run(labeled_mask) 69 | ``` 70 | 71 | ### Judger 72 | 73 | Judger decides whether a region is a signature. 74 | 75 | ```python 76 | from signature_detect.judger import Judger 77 | 78 | judger = Judger( 79 | size_ratio=[1, 4], 80 | pixel_ratio=[0.01, 1]) 81 | 82 | is_signed = judger.judge(result["cropped_mask"]) 83 | ``` 84 | 85 | ## Dev version 86 | 87 | If you would like to develop this package and run the tests, you can download the code and install dev environment locally. 88 | 89 | ```bash 90 | pip install -e .[dev] 91 | ``` -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open("package.md", "r") as md: 4 | long_description = md.read() 5 | 6 | setup( 7 | name="signature-detect", 8 | version="0.1.4", 9 | url="https://github.com/EnzoSeason/signature_detection", 10 | author="Jijie LIU", 11 | author_email="liujijieseason@gmail.com", 12 | description="A package for the signature detection", 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", 15 | py_modules=[ 16 | "signature_detect", 17 | ], 18 | package_dir={"": "src"}, 19 | install_requires=[ 20 | "numpy>=1.0", 21 | "pillow>=8.0", 22 | "scikit-image", 23 | "Wand", 24 | "opencv-python", 25 | ], 26 | extras_require={"dev": ["coverage>=5.5"]}, 27 | license = "MIT", 28 | ) -------------------------------------------------------------------------------- /src/signature_detect/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/src/signature_detect/__init__.py -------------------------------------------------------------------------------- /src/signature_detect/cropper.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Any 3 | import cv2 4 | import numpy as np 5 | from PIL import Image 6 | 7 | 8 | class Cropper: 9 | """ 10 | read the mask extracted by Extractor, and crop it. 11 | 12 | Attributes: 13 | ----------- 14 | - min_region_size 15 | 16 | the min area size of the signature. 17 | 18 | - border_ratio: float 19 | 20 | border = min(h, w) * border_ratio 21 | 22 | h, w are the heigth and width of the input mask. 23 | The border will be removed by the function _remove_borders. 24 | 25 | Methods: 26 | -------- 27 | - find_contours(img: numpy array) -> sorted_boxes: numpy array 28 | 29 | find the contours and sort them by area size 30 | 31 | - is_intersected(box_a: [x, y, w, h], box_b: [x, y, w, h]) -> bool 32 | 33 | check box_a and box_b is intersected 34 | 35 | - merge_boxes(box_a: [x, y, w, h], box_b: [x, y, w, h]) -> [x, y, w, h]: 36 | 37 | merge the intersected boxes into one 38 | 39 | - boxes2regions(sorted_boxes) -> dict: 40 | 41 | transform all the sorted_boxes into regions (merged boxes) 42 | 43 | - crop_regions(img: numpy array, regions: dict) -> list: 44 | 45 | return a list of cropped images (np.array) 46 | 47 | - run(img_path) -> list 48 | 49 | main function, crop the signatures, 50 | return a list of cropped images (np.array) 51 | """ 52 | 53 | def __init__(self, min_region_size=10000, border_ratio=0.1): 54 | self.min_region_size = min_region_size 55 | self.border_ratio = border_ratio 56 | 57 | def __str__(self) -> str: 58 | s = "\nCropper\n==========\n" 59 | s += "min_region_size = {}\n".format(self.min_region_size) 60 | s += "border_ratio = {}\n".format(self.border_ratio) 61 | return s 62 | 63 | def find_contours(self, img): 64 | """ 65 | find contours limited by min_region_size 66 | in the binary image. 67 | 68 | The contours are sorted by area size, from large to small. 69 | 70 | Params: 71 | img: numpy array 72 | Return: 73 | boxes: A numpy array of contours. 74 | each items in the array is a contour (x, y, w, h) 75 | """ 76 | cnts = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 77 | cnt = cnts[0] if len(cnts) == 2 else cnts[1] 78 | 79 | boxes = [] 80 | copy_img = img.copy() 81 | for c in cnt: 82 | (x, y, w, h) = cv2.boundingRect(c) 83 | 84 | if ( 85 | h * w > self.min_region_size 86 | and h < copy_img.shape[0] 87 | and w < copy_img.shape[1] 88 | ): 89 | 90 | # cv2.rectangle(copy_img, (x, y), (x + w, y + h), (155, 155, 0), 1) 91 | boxes.append([x, y, w, h]) 92 | 93 | np_boxes = np.array(boxes) 94 | # sort the boxes by area size 95 | area_size = list(map(lambda box: box[2] * box[3], np_boxes)) 96 | area_size = np.array(area_size) 97 | area_dec_order = area_size.argsort()[::-1] 98 | sorted_boxes = np_boxes[area_dec_order] 99 | 100 | return sorted_boxes 101 | 102 | def is_intersected(self, new_box, orignal_box) -> bool: 103 | [x_a, y_a, w_a, h_a] = new_box 104 | [x_b, y_b, w_b, h_b] = orignal_box 105 | 106 | if y_a > y_b + h_b: 107 | return False 108 | if y_a + h_a < y_b: 109 | return False 110 | if x_a > x_b + w_b: 111 | return False 112 | if x_a + w_a < x_b: 113 | return False 114 | return True 115 | 116 | def merge_boxes(self, box_a, box_b) -> list: 117 | """ 118 | merge 2 intersected box into one 119 | """ 120 | [x_a, y_a, w_a, h_a] = box_a 121 | [x_b, y_b, w_b, h_b] = box_b 122 | 123 | min_x = min(x_a, x_b) 124 | min_y = min(y_a, y_b) 125 | max_w = max(w_a, w_b, (x_b + w_b - x_a), (x_a + w_a - x_b)) 126 | max_h = max(h_a, h_b, (y_b + h_b - y_a), (y_a + h_a - y_b)) 127 | 128 | return [min_x, min_y, max_w, max_h] 129 | 130 | def _remove_borders(self, box) -> Any: 131 | """ 132 | remove the borders around the box 133 | """ 134 | [x, y, w, h] = box 135 | border = math.floor(min(w, h) * self.border_ratio) 136 | return [x + border, y + border, w - border, h - border] 137 | 138 | def boxes2regions(self, sorted_boxes) -> dict: 139 | regions = {} 140 | 141 | for box in sorted_boxes: 142 | if len(regions) == 0: 143 | regions[0] = box 144 | else: 145 | is_merged = False 146 | for key, region in regions.items(): 147 | if self.is_intersected(box, region) == True: 148 | new_region = self.merge_boxes(region, box) 149 | regions[key] = self._remove_borders(new_region) 150 | is_merged = True 151 | break 152 | if is_merged == False: 153 | key = len(regions) 154 | regions[key] = self._remove_borders(box) 155 | 156 | return regions 157 | 158 | def get_cropped_masks(self, mask, regions) -> dict: 159 | """ 160 | return cropped masks 161 | """ 162 | 163 | results = {} 164 | for key, region in regions.items(): 165 | [x, y, w, h] = region 166 | image = Image.fromarray(mask) 167 | cropped_image = image.crop((x, y, x + w, y + h)) 168 | cropped_mask = np.array(cropped_image) 169 | 170 | results[key] = cropped_mask 171 | return results 172 | 173 | def merge_regions_and_masks(self, mask, regions) -> dict: 174 | """ 175 | helper function: put regions and masks in a dict, and return it. 176 | """ 177 | 178 | cropped_image = self.get_cropped_masks(mask, regions) 179 | results = {} 180 | 181 | for key in regions.keys(): 182 | results[key] = { 183 | "cropped_region": regions[key], 184 | "cropped_mask": cropped_image[key], 185 | } 186 | 187 | return results 188 | 189 | def run(self, np_image): 190 | """ 191 | read the signature extracted by Extractor, and crop it. 192 | """ 193 | 194 | # find contours 195 | sorted_boxes = self.find_contours(np_image) 196 | 197 | # get regions 198 | regions = self.boxes2regions(sorted_boxes) 199 | 200 | # crop regions 201 | return self.merge_regions_and_masks(np_image, regions) 202 | -------------------------------------------------------------------------------- /src/signature_detect/extractor.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from skimage import measure, morphology 3 | from skimage.measure import regionprops 4 | import numpy as np 5 | 6 | 7 | class Extractor: 8 | """ 9 | Extract the signature from a mask. The process is as followed. 10 | 11 | 1. It finds the regions in an image mask. Each region has a label (unique number). 12 | 2. It removes the small regions. The small region is defined by attributes. 13 | 3. It remove the big regions. The big region is defined by attributes. 14 | 4. It returns a labeled image. The numbers in the image are the region labels, NOT pixels. 15 | 16 | Attributes 17 | ---------- 18 | outlier_weight: int 19 | The weight of small outlier size 20 | outlier_bias: int 21 | The bias of small outlier size 22 | amplfier: int 23 | The amplfier calculates the big outlier size from the small one 24 | min_area_size: int 25 | The min region area size in the labeled image. 26 | 27 | Methods 28 | ------- 29 | extract(mask: numpy.array): 30 | extract the signature 31 | """ 32 | 33 | def __init__( 34 | self, outlier_weight=3, outlier_bias=100, amplfier=10, min_area_size=10 35 | ): 36 | # the parameters are used to remove small size connected pixels outlier 37 | self.outlier_weight = outlier_weight 38 | self.outlier_bias = outlier_bias 39 | # the parameter is used to remove big size connected pixels outlier 40 | self.amplfier = amplfier 41 | self.min_area_size = min_area_size 42 | 43 | def __str__(self) -> str: 44 | s = "\nExtractor\n==========\n" 45 | s += "outlier_weight = {}\n".format(self.outlier_weight) 46 | s += "outlier_bias = {}\n".format(self.outlier_bias) 47 | s += "> small_outlier_size = outlier_weight * average_region_size + outlier_bias\n" 48 | s += "amplfier = {}\n".format(self.amplfier) 49 | s += "> large_outlier_size = amplfier * small_outlier_size\n" 50 | s += "min_area_size = {} (pixels)\n".format(self.min_area_size) 51 | s += "> min_area_size is used to calculate average_region_size.\n" 52 | return s 53 | 54 | def extract(self, mask) -> Any: 55 | """ 56 | params 57 | ------ 58 | mask: numpy array 59 | The mask of the image. It's calculated by Loader. 60 | 61 | return 62 | ------ 63 | labeled_image: numpy array 64 | The labeled image. 65 | The numbers in the array are the region labels. 66 | """ 67 | condition = mask > mask.mean() 68 | labels = measure.label(condition, background=1) 69 | 70 | total_pixels = 0 71 | nb_region = 0 72 | average = 0.0 73 | for region in regionprops(labels): 74 | if region.area > self.min_area_size: 75 | total_pixels += region.area 76 | nb_region += 1 77 | 78 | if nb_region > 1: 79 | average = total_pixels / nb_region 80 | # small_size_outlier is used as a threshold value to remove pixels 81 | # are smaller than small_size_outlier 82 | small_size_outlier = average * self.outlier_weight + self.outlier_bias 83 | 84 | # big_size_outlier is used as a threshold value to remove pixels 85 | # are bigger than big_size_outlier 86 | big_size_outlier = small_size_outlier * self.amplfier 87 | 88 | # remove small pixels 89 | labeled_image = morphology.remove_small_objects(labels, small_size_outlier) 90 | # remove the big pixels 91 | component_sizes = np.bincount(labeled_image.ravel()) 92 | too_small = component_sizes > (big_size_outlier) 93 | too_small_mask = too_small[labeled_image] 94 | labeled_image[too_small_mask] = 0 95 | 96 | labeled_mask = np.full(labeled_image.shape, 255, dtype="uint8") 97 | labeled_mask = labeled_mask * (labeled_image == 0) 98 | else: 99 | labeled_mask = mask 100 | 101 | return labeled_mask 102 | -------------------------------------------------------------------------------- /src/signature_detect/judger.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | import numpy as np 3 | 4 | 5 | class Judger: 6 | """ 7 | read the cropped mask and identify if it's a signature 8 | 9 | Attributes: 10 | ----------- 11 | - size_ratio: [low, high] 12 | 13 | low < max(h, w) / min(h, w) < high. 14 | 15 | h, w are the heigth and width of the input mask. 16 | 17 | - max_pixel_ratio: [low, high] 18 | 19 | low < the number of 0 / the number of 255 < high. 20 | 21 | The mask should only have 2 value, 0 and 255. 22 | 23 | Methods: 24 | -------- 25 | - judge(mask: numpy array) -> bool 26 | 27 | identify if the mask is a signature 28 | """ 29 | 30 | def __init__(self, size_ratio=[1, 4], pixel_ratio=[0.01, 1]) -> None: 31 | self.size_ratio = size_ratio 32 | self.pixel_ratio = pixel_ratio 33 | 34 | def __str__(self) -> str: 35 | s = "\nJudger\n==========\n" 36 | s += "size_ratio = {}\n".format(str(self.size_ratio)) 37 | s += "pixel_ratio = {}\n".format(str(self.pixel_ratio)) 38 | return s 39 | 40 | def _is_valid_mask(self, mask: Any) -> bool: 41 | values = np.unique(mask) 42 | if len(values) != 2: 43 | return False 44 | if values[0] != 0 or values[1] != 255: 45 | return False 46 | return True 47 | 48 | def judge(self, mask: Any) -> bool: 49 | if self._is_valid_mask(mask): 50 | size_ratio = max(mask.shape) / min(mask.shape) 51 | if size_ratio < self.size_ratio[0] or size_ratio > self.size_ratio[1]: 52 | return False 53 | 54 | bincounts = np.bincount(mask.ravel()) 55 | pixel_ratio = bincounts[0] / bincounts[255] 56 | if pixel_ratio < self.pixel_ratio[0] or pixel_ratio > self.pixel_ratio[1]: 57 | return False 58 | 59 | return True 60 | else: 61 | return False 62 | 63 | def run(self, results: dict) -> list: 64 | regions = [] 65 | for idx, result in results.items(): 66 | is_signed = self.judge(result["cropped_mask"]) 67 | regions.append({"id": idx, "signed": is_signed, "box": result["cropped_region"]}) 68 | return regions 69 | -------------------------------------------------------------------------------- /src/signature_detect/loader.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import cv2 4 | import numpy as np 5 | import os 6 | from wand.image import Image 7 | 8 | 9 | class Loader: 10 | """ 11 | Load an image or a pdf file. 12 | 13 | Attributes 14 | ---------- 15 | low_threshold: tuple 16 | The low threshold of cv2.inRange 17 | high_threshold: tuple 18 | The high threshold of cv2.inRange 19 | 20 | Methods 21 | ------- 22 | get_masks(path: str) -> list 23 | It read an image or a pdf file page by page. 24 | It returns the masks that the bright parts are marked as 255, the rest as 0. 25 | """ 26 | 27 | def __init__(self, low_threshold=(0, 0, 250), high_threshold=(255, 255, 255)): 28 | if self._is_valid(low_threshold): 29 | self.low_threshold = low_threshold 30 | if self._is_valid(high_threshold): 31 | self.high_threshold = high_threshold 32 | 33 | def __str__(self) -> str: 34 | s = "\nLoader\n==========\n" 35 | s += "low_threshold = {}\n".format(self.low_threshold) 36 | s += "high_threshold = {}\n".format(self.high_threshold) 37 | return s 38 | 39 | def _is_valid(self, threshold: tuple) -> bool: 40 | if type(threshold) is not tuple: 41 | raise Exception("The threshold must be a tuple.") 42 | if len(threshold) != 3: 43 | raise Exception("The threshold must have 3 item (h, s, v).") 44 | for item in threshold: 45 | if item not in range(0, 256): 46 | raise Exception("The threshold item must be in the range [0, 255].") 47 | return True 48 | 49 | def get_masks(self, path) -> list: 50 | basename = os.path.basename(path) 51 | dn, dext = os.path.splitext(basename) 52 | ext = dext[1:].lower() 53 | if ext == "pdf": 54 | self.document_type = "PDF" 55 | elif ext == "jpg" or ext == "jpeg" or ext == "png" or ext == "tif": 56 | self.document_type = "IMAGE" 57 | else: 58 | raise Exception("Document should be jpg/jpeg, png, tif or pdf.") 59 | 60 | if self.document_type == "IMAGE": 61 | loader = _ImageWorker(self.low_threshold, self.high_threshold) 62 | return [loader.get_image_mask(path)] 63 | 64 | if self.document_type == "PDF": 65 | loader = _PdfWorker(self.low_threshold, self.high_threshold) 66 | return loader.get_pdf_masks(path) 67 | 68 | 69 | class _ImageWorker: 70 | def __init__(self, low_threshold: tuple, high_threshold: tuple) -> None: 71 | self.low_threshold = low_threshold 72 | self.high_threshold = high_threshold 73 | 74 | def make_mask(self, image) -> Any: 75 | """ 76 | create a mask that the bright parts are marked as 255, the rest as 0. 77 | 78 | params 79 | ------ 80 | image: numpy array 81 | 82 | return 83 | ------ 84 | frame_threshold: numpy array 85 | """ 86 | frame_HSV = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 87 | frame_threshold = cv2.inRange( 88 | frame_HSV, self.low_threshold, self.high_threshold 89 | ) 90 | return frame_threshold 91 | 92 | def get_image_mask(self, path: str) -> Any: 93 | image = cv2.imread(path) 94 | return self.make_mask(image) 95 | 96 | 97 | class _PdfWorker(_ImageWorker): 98 | def __init__(self, low_threshold, high_threshold): 99 | super().__init__(low_threshold, high_threshold) 100 | 101 | def get_pdf_images(self, path: str) -> list: 102 | imgs = [] 103 | with(Image(filename=path,resolution=200)) as source: 104 | images=source.sequence 105 | pages=len(images) 106 | for i in range(pages): 107 | imgs.append(images[i]) 108 | return imgs 109 | 110 | def get_pdf_masks(self, path: str) -> list: 111 | """ 112 | create the mask that the bright parts are marked as 255, the rest as 0, 113 | page by page 114 | """ 115 | images = self.get_pdf_images(path) 116 | 117 | masks = [] 118 | for image in images: 119 | np_image = np.array(image) 120 | mask = self.make_mask(np_image) 121 | masks.append(mask) 122 | return masks 123 | -------------------------------------------------------------------------------- /tests/.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = *tests* 3 | 4 | [report] 5 | omit = *tests* -------------------------------------------------------------------------------- /tests/coverage.sh: -------------------------------------------------------------------------------- 1 | coverage run -m unittest 2 | coverage report -m 3 | -------------------------------------------------------------------------------- /tests/data/dummy.py: -------------------------------------------------------------------------------- 1 | TEST_IMAGE_PATH = "./data/signed_image.jpeg" 2 | TEST_PDF_PATH = "./data/signed_file.pdf" 3 | TEST_TIF_PATH = "./data/signed_image.tif" -------------------------------------------------------------------------------- /tests/data/signed_file.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/tests/data/signed_file.pdf -------------------------------------------------------------------------------- /tests/data/signed_image.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/tests/data/signed_image.jpeg -------------------------------------------------------------------------------- /tests/data/signed_image.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/tests/data/signed_image.tif -------------------------------------------------------------------------------- /tests/data/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/tests/data/test.txt -------------------------------------------------------------------------------- /tests/test_cropper.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | import numpy as np 5 | 6 | sys.path.append("..") 7 | 8 | from signature_detect.cropper import Cropper 9 | from signature_detect.extractor import Extractor 10 | from signature_detect.loader import Loader 11 | 12 | from tests.data.dummy import TEST_IMAGE_PATH 13 | 14 | 15 | class TestCropper(unittest.TestCase): 16 | def test_init(self): 17 | cropper = Cropper() 18 | self.assertEqual(cropper.min_region_size, 10000) 19 | 20 | def test_str(self): 21 | cropper = Cropper() 22 | s = "\nCropper\n==========\n" 23 | s += "min_region_size = {}\n".format(cropper.min_region_size) 24 | s += "border_ratio = {}\n".format(cropper.border_ratio) 25 | self.assertEqual(str(cropper), s) 26 | 27 | def test_run(self): 28 | path = TEST_IMAGE_PATH 29 | 30 | loader = Loader() 31 | mask = loader.get_masks(path)[0] 32 | 33 | extractor = Extractor() 34 | labeled_mask = extractor.extract(mask) 35 | 36 | cropper = Cropper() 37 | results = cropper.run(labeled_mask) 38 | self.assertEqual(len(results[0]["cropped_region"]), 4) 39 | 40 | mask_list = list(np.unique(results[0]["cropped_mask"])) 41 | self.assertEqual(mask_list[0], 0) 42 | self.assertEqual(mask_list[1], 255) 43 | 44 | def test_boxes2regions(self): 45 | cropper = Cropper(border_ratio=0) 46 | boxes = [[0, 0, 10, 10], [9, 9, 5, 5], [20, 20, 1, 1]] 47 | regions = cropper.boxes2regions(boxes) 48 | self.assertEqual(len(regions), 2) 49 | 50 | self.assertEqual(regions[0][0], 0) 51 | self.assertEqual(regions[0][1], 0) 52 | self.assertEqual(regions[0][2], 14) 53 | self.assertEqual(regions[0][3], 14) 54 | 55 | self.assertEqual(regions[1][0], 20) 56 | self.assertEqual(regions[1][1], 20) 57 | self.assertEqual(regions[1][2], 1) 58 | self.assertEqual(regions[1][2], 1) 59 | 60 | def test_is_intersected(self): 61 | cropper = Cropper() 62 | box_b = [10, 10, 1, 1] 63 | 64 | # y_a > y_b + h_b 65 | box_a = [0, 20, 1, 1] 66 | self.assertFalse(cropper.is_intersected(box_a, box_b)) 67 | 68 | # y_a + h_a < y_b 69 | box_a = [0, 0, 1, 1] 70 | self.assertFalse(cropper.is_intersected(box_a, box_b)) 71 | 72 | # x_a > x_b + w_b 73 | box_a = [20, 10, 1, 1] 74 | self.assertFalse(cropper.is_intersected(box_a, box_b)) 75 | 76 | # x_a + w_a < x_b 77 | box_a = [0, 10, 1, 1] 78 | self.assertFalse(cropper.is_intersected(box_a, box_b)) 79 | -------------------------------------------------------------------------------- /tests/test_extractor.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | import numpy as np 5 | 6 | sys.path.append("..") 7 | 8 | from signature_detect.extractor import Extractor 9 | from signature_detect.loader import Loader 10 | 11 | from tests.data.dummy import TEST_IMAGE_PATH 12 | 13 | 14 | class TestExtractor(unittest.TestCase): 15 | def test_init(self): 16 | extractor = Extractor() 17 | self.assertEqual(extractor.outlier_weight, 3) 18 | self.assertEqual(extractor.outlier_bias, 100) 19 | self.assertEqual(extractor.amplfier, 10) 20 | self.assertEqual(extractor.min_area_size, 10) 21 | 22 | def test_str(self): 23 | extractor = Extractor() 24 | s = "\nExtractor\n==========\n" 25 | s += "outlier_weight = 3\n" 26 | s += "outlier_bias = 100\n" 27 | s += "> small_outlier_size = outlier_weight * average_region_size + outlier_bias\n" 28 | s += "amplfier = 10\n" 29 | s += "> large_outlier_size = amplfier * small_outlier_size\n" 30 | s += "min_area_size = 10 (pixels)\n" 31 | s += "> min_area_size is used to calculate average_region_size.\n" 32 | self.assertEqual(str(extractor), s) 33 | 34 | def test_extract(self): 35 | path = TEST_IMAGE_PATH 36 | loader = Loader() 37 | mask = loader.get_masks(path)[0] 38 | 39 | extractor = Extractor() 40 | labeled_mask = extractor.extract(mask) 41 | mask_list = list(np.unique(labeled_mask)) 42 | self.assertEqual(mask_list[0], 0) 43 | self.assertEqual(mask_list[1], 255) 44 | 45 | mask = np.array([[0, 255, 0], [0, 255, 0]], dtype="uint8") 46 | labeled_mask = extractor.extract(mask) 47 | 48 | mask_bincounts = list(np.bincount(mask.ravel())) 49 | labeled_mask_bincounts = list(np.bincount(labeled_mask.ravel())) 50 | self.assertEqual(mask_bincounts[0], labeled_mask_bincounts[0]) 51 | self.assertEqual(mask_bincounts[255], labeled_mask_bincounts[255]) 52 | -------------------------------------------------------------------------------- /tests/test_judger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | import numpy as np 5 | 6 | sys.path.append("..") 7 | 8 | from signature_detect.cropper import Cropper 9 | from signature_detect.extractor import Extractor 10 | from signature_detect.loader import Loader 11 | from signature_detect.judger import Judger 12 | 13 | from tests.data.dummy import TEST_IMAGE_PATH 14 | 15 | class TestJudger(unittest.TestCase): 16 | def test_init(self): 17 | judger = Judger() 18 | self.assertEqual(judger.size_ratio[0], 1) 19 | self.assertEqual(judger.size_ratio[1], 4) 20 | self.assertEqual(judger.pixel_ratio[0], 0.01) 21 | self.assertEqual(judger.pixel_ratio[1], 1) 22 | 23 | def test_str(self): 24 | judger = Judger() 25 | s = "\nJudger\n==========\n" 26 | s += "size_ratio = {}\n".format(judger.size_ratio) 27 | s += "pixel_ratio = {}\n".format(judger.pixel_ratio) 28 | self.assertEqual(str(judger), s) 29 | 30 | def test_is_valid_mask(self): 31 | judger = Judger() 32 | 33 | mask = np.array([[0,0,0,0]]) 34 | res = judger.judge(mask) 35 | self.assertFalse(res) 36 | 37 | mask = np.array([0]) 38 | self.assertFalse(judger._is_valid_mask(mask)) 39 | 40 | mask = np.array([0, 1]) 41 | self.assertFalse(judger._is_valid_mask(mask)) 42 | 43 | mask = np.array([0, 255]) 44 | res = judger._is_valid_mask(mask) 45 | self.assertTrue(res) 46 | 47 | def test_judge(self): 48 | judger = Judger() 49 | 50 | mask = np.array([[255,0,0,0,0]]) 51 | res = judger.judge(mask) 52 | self.assertFalse(res) 53 | 54 | mask = np.array([[255, 0], [0, 0]]) 55 | res = judger.judge(mask) 56 | self.assertFalse(res) 57 | 58 | mask = np.array([[255, 255, 255], [0, 255, 255]]) 59 | res = judger.judge(mask) 60 | self.assertTrue(res) 61 | 62 | def test_run(self): 63 | path = TEST_IMAGE_PATH 64 | 65 | loader = Loader() 66 | mask = loader.get_masks(path)[0] 67 | 68 | extractor = Extractor() 69 | labeled_mask = extractor.extract(mask) 70 | 71 | cropper = Cropper() 72 | results = cropper.run(labeled_mask) 73 | 74 | judger = Judger() 75 | regions = judger.run(results) 76 | 77 | # assert 78 | region = regions[0] 79 | self.assertEqual(region["id"], 0) 80 | self.assertEqual(region["signed"], True) 81 | comparison = region["box"] == results[0]["cropped_region"] 82 | self.assertTrue(comparison.all()) 83 | -------------------------------------------------------------------------------- /tests/test_loader.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | import numpy as np 5 | 6 | sys.path.append("..") 7 | 8 | from signature_detect.loader import Loader 9 | 10 | from tests.data.dummy import TEST_IMAGE_PATH, TEST_PDF_PATH, TEST_TIF_PATH 11 | 12 | 13 | class TestLoader(unittest.TestCase): 14 | def test_loader_init(self): 15 | loader = Loader() 16 | self.assertEqual(loader.low_threshold, (0, 0, 250)) 17 | 18 | def test_str(self): 19 | loader = Loader() 20 | s = "\nLoader\n==========\n" 21 | s += "low_threshold = {}\n".format(loader.low_threshold) 22 | s += "high_threshold = {}\n".format(loader.high_threshold) 23 | self.assertEqual(str(loader), s) 24 | 25 | def test_is_valid(self): 26 | low_threshold = "" 27 | with self.assertRaises(Exception) as cm: 28 | Loader(low_threshold) 29 | self.assertEqual(cm.exception.__str__(), "The threshold must be a tuple.") 30 | 31 | low_threshold = (0, 1) 32 | with self.assertRaises(Exception) as cm: 33 | Loader(low_threshold) 34 | self.assertEqual( 35 | cm.exception.__str__(), "The threshold must have 3 item (h, s, v)." 36 | ) 37 | 38 | low_threshold = (0, 0, 256) 39 | with self.assertRaises(Exception) as cm: 40 | Loader(low_threshold) 41 | self.assertEqual( 42 | cm.exception.__str__(), "The threshold item must be in the range [0, 255]." 43 | ) 44 | 45 | low_threshold = (0, 0, "0") 46 | with self.assertRaises(Exception) as cm: 47 | Loader(low_threshold) 48 | self.assertEqual( 49 | cm.exception.__str__(), "The threshold item must be in the range [0, 255]." 50 | ) 51 | 52 | def test_get_masks(self): 53 | path = "./data/test.txt" 54 | loader = Loader() 55 | with self.assertRaises(Exception) as cm: 56 | loader.get_masks(path) 57 | self.assertEqual( 58 | cm.exception.__str__(), "Document should be jpg/jpeg, png, tif or pdf." 59 | ) 60 | 61 | # jpeg test 62 | path = TEST_IMAGE_PATH 63 | masks = loader.get_masks(path) 64 | self.assertEqual(len(masks), 1) 65 | 66 | first_mask_list = list(np.unique(masks[0])) 67 | self.assertEqual(first_mask_list[0], 0) 68 | self.assertEqual(first_mask_list[1], 255) 69 | 70 | # tif test 71 | path = TEST_TIF_PATH 72 | masks = loader.get_masks(path) 73 | self.assertEqual(len(masks), 1) 74 | 75 | first_mask_list = list(np.unique(masks[0])) 76 | self.assertEqual(first_mask_list[0], 0) 77 | self.assertEqual(first_mask_list[1], 255) 78 | 79 | # pdf test 80 | path = TEST_PDF_PATH 81 | masks = loader.get_masks(path) 82 | self.assertEqual(len(masks), 2) 83 | 84 | mask_list = list(np.unique(masks[1])) 85 | self.assertEqual(mask_list[0], 0) 86 | self.assertEqual(mask_list[1], 255) 87 | -------------------------------------------------------------------------------- /update_package.md: -------------------------------------------------------------------------------- 1 | # Update PyPi 2 | 3 | ## Update `setup.py` 4 | 5 | `version` need to be update. 6 | 7 | ## Build 8 | 9 | ```bash 10 | python setup.py bdist_wheel sdist 11 | ``` 12 | 13 | ## Upload 14 | 15 | ```bash 16 | twine upload dist/* 17 | ``` --------------------------------------------------------------------------------