├── .gitattributes
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── conda.txt
├── data
    ├── input
    │   ├── in1.jpeg
    │   ├── in2.jpeg
    │   ├── in3.jpeg
    │   ├── signed_file.pdf
    │   ├── signed_image.jpeg
    │   └── signed_image.png
    └── output
    │   ├── labeled_mask.jpeg
    │   ├── labeled_mask_0.jpeg
    │   ├── labeled_mask_1.jpeg
    │   ├── mask.jpeg
    │   ├── mask_0.jpeg
    │   ├── mask_1.jpeg
    │   ├── sign.jpeg
    │   └── signature.jpeg
├── demo.ipynb
├── demo.py
├── lab
    ├── data
    │   ├── input
    │   │   ├── signed_file.pdf
    │   │   └── signed_image.jpeg
    │   └── output
    │   │   ├── signed_image.jpg
    │   │   ├── signed_image_boxed.jpeg
    │   │   └── signed_image_region_0.jpeg
    └── dev.ipynb
├── package.md
├── setup.py
├── src
    └── signature_detect
    │   ├── __init__.py
    │   ├── cropper.py
    │   ├── extractor.py
    │   ├── judger.py
    │   └── loader.py
├── tests
    ├── .coveragerc
    ├── coverage.sh
    ├── data
    │   ├── dummy.py
    │   ├── signed_file.pdf
    │   ├── signed_image.jpeg
    │   ├── signed_image.tif
    │   └── test.txt
    ├── test_cropper.py
    ├── test_extractor.py
    ├── test_judger.py
    └── test_loader.py
└── update_package.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # project
  2 | .vscode/
  3 | private/
  4 | dev.ipynb
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # celery beat schedule file
 88 | celerybeat-schedule
 89 | 
 90 | # SageMath parsed files
 91 | *.sage.py
 92 | 
 93 | # Environments
 94 | .env
 95 | .venv
 96 | env/
 97 | venv/
 98 | ENV/
 99 | env.bak/
100 | venv.bak/
101 | 
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 | 
106 | # Rope project settings
107 | .ropeproject
108 | 
109 | # mkdocs documentation
110 | /site
111 | 
112 | # mypy
113 | .mypy_cache/
114 | .dmypy.json
115 | dmypy.json
116 | 
117 | # Pyre type checker
118 | .pyre/
119 | 
120 | # macOS
121 | .DS_Store
122 | 
123 | #PyCharm
124 | .idea
125 | __pycache__
126 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Jijie LIU
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | recursive-include tests *.py
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Signature Detection
  2 | 
  3 | A simple tool to detect if there is **a signature** in **an image** or **a PDF file**.
  4 | 
  5 | ## Installation of PyPi
  6 | 
  7 | It's the quick way to use this tool.
  8 | 
  9 | `signature-detect` package contains the codes in the `src`.
 10 | 
 11 | ```
 12 | pip install signature-detect
 13 | ```
 14 | 
 15 | ## Installation of Anaconda
 16 | 
 17 | It's the recommended way to explore this tool. It provides notebooks for playing around. 
 18 | 
 19 | 1. install [anaconda](https://docs.anaconda.com/anaconda/install/)
 20 | 
 21 | 2. install dependencies
 22 | 
 23 |     ```
 24 |     conda create --name <env> --file conda.txt
 25 |     ```
 26 | 
 27 | ## Demo
 28 | 
 29 | - Image:
 30 | 
 31 |     ```
 32 |     python demo.py --file my-image.jpeg
 33 |     ```
 34 | 
 35 | - PDF File:
 36 | 
 37 |     ```
 38 |     python demo.py --file my-file.pdf
 39 |     ```
 40 | 
 41 | ## Unit Tests
 42 | 
 43 | All the codes in `src` are covered.
 44 | 
 45 | ```
 46 | cd tests
 47 | coverage run -m unittest
 48 | coverage report -m
 49 | ```
 50 | 
 51 | ## Example
 52 | 
 53 | We use the following image as an example. The full example is in the [demo notebook](demo.ipynb)
 54 | 
 55 | ![signed_image](./data/input/signed_image.jpeg)
 56 | 
 57 | ### Loader
 58 | The loader reads the file and creates a mask.
 59 | 
 60 | The mask is a numpy array. The bright parts are set to 255, the rest is set to 0. It contains ONLY these 2 numbers.
 61 | 
 62 | #### Atrributes
 63 | 
 64 | - low_threshold = (0, 0, 250)
 65 | 
 66 | - high_threshold = (255, 255, 255)
 67 | 
 68 | They control the creation of the mask, used in the function `cv.inRange`.
 69 | 
 70 | 
 71 | #### Result
 72 | 
 73 | Here, yellow is `255`, purple is `0`.
 74 | 
 75 | ![mask](./data/output/mask.jpeg)
 76 | 
 77 | ### Extractor
 78 | 
 79 | The extractor, first, generates the regions from the mask. 
 80 | 
 81 | Then, it removes the small and the big regions because the signature is neither too big nor too small.
 82 | 
 83 | The process is as followed.
 84 | 
 85 | 1. label the image
 86 | 
 87 |    `skimage.measure.label` labels the connected regions of an integer array. It returns a labeled array, where all connected regions are assigned the same integer value.
 88 | 
 89 | 2. calculate the average size of regions
 90 | 
 91 |    Here, the size means **the number of the pixels in a region**. 
 92 | 
 93 |    We accumulate the number of the pixels in all the regions, `total_pixels`. The average size is `total_pixels / nb_regions`.
 94 | 
 95 |    If the size of a region is smaller `min_area_size`, this region is ignored. `min_area_size` is given by the user.
 96 | 
 97 | 3. calculate the size of the small outlier
 98 | 
 99 |     ```
100 |     small_size_outlier = average * outlier_weight + outlier_bias
101 |     ```
102 |     
103 |     `outlier_weight` and `outlier_bias` are given by the user.
104 | 
105 | 4. calculate the size of the big outlier
106 | 
107 |     ```
108 |     big_size_outlier = small_size_outlier * amplfier
109 |     ```
110 | 
111 |     `amplfier` is given by the user.
112 | 
113 | 5. remove the small and big outliers
114 | 
115 | #### Attributes
116 | 
117 | - outlier_weight = 3
118 | 
119 | - outlier_bias = 100
120 | 
121 | - amplfier = 10 
122 | 
123 |   > `15` is used in the demo.
124 | 
125 | - min_area_size = 10
126 | 
127 | #### Result
128 | 
129 | ![labeled_image](./data/output/labeled_mask.jpeg)
130 | 
131 | 
132 | ### Cropper
133 | 
134 | The cropper finds the **contours** of regions in the **labeled masks** and crop them.
135 | 
136 | #### Attributes
137 | 
138 | Suppose `(h, w) = region.shape`.
139 | 
140 | - min_region_size = 10000
141 | 
142 |    If `h * w < min_region_size`, then this region is ignored.
143 | 
144 | - border_ratio: float
145 | 
146 |     border = min(h, w) * border_ratio
147 | 
148 |     The border will be removed if this attribute is not `0`.
149 | 
150 | #### Result
151 | 
152 | ![signature](./data/output/signature.jpeg)
153 | 
154 | 
155 | ### Judger
156 | 
157 | The judger reads the cropped mask and identifies if it's a signature or not.
158 | 
159 | #### Attributes
160 | 
161 | Suppose `(h, w) = cropped_mask.shape`.
162 | 
163 | - size_ratio: [low, high]
164 | 
165 |     low < max(h, w) / min(h, w) < high.
166 | 
167 | - max_pixel_ratio: [low, high]
168 | 
169 |     low < the number of 0 / the number of 255 < high.
170 | 
171 |     The mask should only have 2 value, 0 and 255.
172 | 
173 | By default: 
174 | 
175 | - size_ratio = [1, 4]
176 | 
177 | - max_pixel_ratio = [0.01, 1]
178 | 
179 | #### Result
180 | 
181 | - `max(h, w) / min(h, w)` = 3.48
182 | 
183 | - number of `0` / number of `255` = 0.44
184 | 
185 | So, this image is signed.


--------------------------------------------------------------------------------
/conda.txt:
--------------------------------------------------------------------------------
  1 | # This file may be used to create an environment using:
  2 | # $ conda create --name <env> --file <this file>
  3 | # platform: osx-64
  4 | anyio=2.2.0=py38h50d1736_0
  5 | appdirs=1.4.4=pyh9f0ad1d_0
  6 | appnope=0.1.2=py38h50d1736_1
  7 | argon2-cffi=20.1.0=py38h5406a74_2
  8 | async_generator=1.10=py_0
  9 | atk-1.0=2.36.0=he69c4ee_4
 10 | attrs=20.3.0=pyhd3deb0d_0
 11 | babel=2.9.0=pyhd3deb0d_0
 12 | backcall=0.2.0=pyh9f0ad1d_0
 13 | backports=1.0=py_2
 14 | backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
 15 | black=20.8b1=py_1
 16 | bleach=3.3.0=pyh44b312d_0
 17 | blosc=1.21.0=he49afe7_0
 18 | brotli=1.0.9=h046ec9c_4
 19 | brotlipy=0.7.0=py38h5406a74_1001
 20 | brunsli=0.1=h046ec9c_0
 21 | bzip2=1.0.8=hc929b4f_4
 22 | c-ares=1.17.1=h0d85af4_1
 23 | ca-certificates=2020.12.5=h033912b_0
 24 | cairo=1.16.0=he43a7df_1008
 25 | certifi=2020.12.5=py38h50d1736_1
 26 | cffi=1.14.5=py38ha97d567_0
 27 | chardet=4.0.0=py38h50d1736_1
 28 | charls=2.2.0=h046ec9c_0
 29 | click=7.1.2=pyh9f0ad1d_0
 30 | cloudpickle=1.6.0=py_0
 31 | coverage=5.5=py38h96a0964_0
 32 | cryptography=3.4.7=py38h1fa4640_0
 33 | cycler=0.10.0=py_2
 34 | cytoolz=0.11.0=py38h5406a74_3
 35 | dask-core=2021.4.0=pyhd8ed1ab_0
 36 | dataclasses=0.8=pyhc8e2a94_1
 37 | decorator=5.0.6=pyhd8ed1ab_0
 38 | defusedxml=0.7.1=pyhd8ed1ab_0
 39 | entrypoints=0.3=pyhd8ed1ab_1003
 40 | expat=2.3.0=he49afe7_0
 41 | ffmpeg=4.3.1=hb787a4c_2
 42 | fftw=3.3.9=nompi_h02cd531_101
 43 | font-ttf-dejavu-sans-mono=2.37=hab24e00_0
 44 | font-ttf-inconsolata=2.001=hab24e00_0
 45 | font-ttf-source-code-pro=2.030=hab24e00_0
 46 | font-ttf-ubuntu=0.83=hab24e00_0
 47 | fontconfig=2.13.1=h10f422b_1005
 48 | fonts-conda-ecosystem=1=0
 49 | fonts-conda-forge=1=0
 50 | freetype=2.10.4=h4cff582_1
 51 | fribidi=1.0.10=hbcb3906_0
 52 | fsspec=0.9.0=pyhd8ed1ab_0
 53 | gdk-pixbuf=2.42.6=h2e6141f_0
 54 | gettext=0.19.8.1=h7937167_1005
 55 | ghostscript=9.53.3=h2e338ed_2
 56 | giflib=5.2.1=hbcb3906_2
 57 | gmp=6.2.1=h2e338ed_0
 58 | gnutls=3.6.13=h756fd2b_1
 59 | graphite2=1.3.13=h2e338ed_1001
 60 | graphviz=2.47.0=hdb475ea_1
 61 | gtk2=2.24.33=h55c0dba_0
 62 | gts=0.7.6=hccb3bdf_2
 63 | harfbuzz=2.8.0=h159f659_1
 64 | hdf5=1.10.6=nompi_hc5d9132_1114
 65 | icu=68.1=h74dc148_0
 66 | idna=2.10=pyh9f0ad1d_0
 67 | imagecodecs=2021.3.31=py38hc7b77fe_0
 68 | imageio=2.9.0=py_0
 69 | imagemagick=7.0.11_7=pl5320h46a8306_0
 70 | importlib-metadata=3.10.1=py38h50d1736_0
 71 | ipykernel=5.3.4=py38h5ca1d4c_0
 72 | ipython=7.22.0=py38h6c79ece_0
 73 | ipython_genutils=0.2.0=py_1
 74 | jasper=1.900.1=h636a363_1006
 75 | jbig=2.1=h0d85af4_2002
 76 | jedi=0.18.0=py38h50d1736_2
 77 | jinja2=2.11.3=pyh44b312d_0
 78 | joblib=1.0.1=pyhd8ed1ab_0
 79 | jpeg=9d=hbcb3906_0
 80 | json5=0.9.5=pyh9f0ad1d_0
 81 | jsonschema=3.2.0=pyhd8ed1ab_3
 82 | jupyter-packaging=0.7.12=pyhd8ed1ab_0
 83 | jupyter_client=6.1.12=pyhd8ed1ab_0
 84 | jupyter_core=4.7.1=py38h50d1736_0
 85 | jupyter_server=1.6.1=py38h50d1736_0
 86 | jupyterlab=3.0.12=pyhd8ed1ab_0
 87 | jupyterlab_pygments=0.1.2=pyh9f0ad1d_0
 88 | jupyterlab_server=2.4.0=pyhd8ed1ab_0
 89 | jxrlib=1.1=h35c211d_2
 90 | kiwisolver=1.3.1=py38hd9c93a9_1
 91 | krb5=1.17.2=h60d9502_0
 92 | lame=3.100=h35c211d_1001
 93 | lcms2=2.12=h577c468_0
 94 | lerc=2.2.1=h046ec9c_0
 95 | libaec=1.0.4=h046ec9c_1
 96 | libblas=3.9.0=8_openblas
 97 | libcblas=3.9.0=8_openblas
 98 | libcurl=7.76.0=h8ef9fac_0
 99 | libcxx=11.1.0=habf9029_0
100 | libdeflate=1.7=h35c211d_5
101 | libedit=3.1.20191231=h0678c8f_2
102 | libev=4.33=haf1e3a3_1
103 | libffi=3.3=h046ec9c_2
104 | libgd=2.3.2=h4e7a7ea_0
105 | libgfortran=5.0.0=9_3_0_h6c81a4c_22
106 | libgfortran5=9.3.0=h6c81a4c_22
107 | libglib=2.68.1=hd556434_0
108 | libiconv=1.16=haf1e3a3_0
109 | liblapack=3.9.0=8_openblas
110 | liblapacke=3.9.0=8_openblas
111 | libnghttp2=1.43.0=h07e645a_0
112 | libopenblas=0.3.12=openmp_h54245bb_1
113 | libopencv=4.5.1=py38h229d5d6_1
114 | libpng=1.6.37=h7cec526_2
115 | libprotobuf=3.15.8=hcf210ce_0
116 | librsvg=2.50.3=hd4300c0_1
117 | libsodium=1.0.18=hbcb3906_1
118 | libssh2=1.9.0=h52ee1ee_6
119 | libtiff=4.2.0=h355d032_0
120 | libtool=2.4.6=h2e338ed_1007
121 | libwebp=1.2.0=h1648767_0
122 | libwebp-base=1.2.0=h0d85af4_2
123 | libxcb=1.13=h35c211d_1003
124 | libxml2=2.9.10=h93ec3fd_4
125 | libzopfli=1.0.3=h046ec9c_0
126 | llvm-openmp=11.1.0=hda6cdc1_1
127 | locket=0.2.0=py_2
128 | lz4-c=1.9.3=h046ec9c_0
129 | markupsafe=1.1.1=py38h5406a74_3
130 | matplotlib=3.4.1=py38h50d1736_0
131 | matplotlib-base=3.4.1=py38h6152e83_0
132 | mistune=0.8.4=py38h5406a74_1003
133 | mypy_extensions=0.4.3=py38h50d1736_3
134 | nbclassic=0.2.7=pyhd8ed1ab_0
135 | nbclient=0.5.3=pyhd8ed1ab_0
136 | nbconvert=6.0.7=py38h50d1736_3
137 | nbformat=5.1.3=pyhd8ed1ab_0
138 | ncurses=6.2=h2e338ed_4
139 | nest-asyncio=1.5.1=pyhd8ed1ab_0
140 | nettle=3.6=hedd7734_0
141 | networkx=2.5=py_0
142 | notebook=6.3.0=py38h50d1736_0
143 | numpy=1.20.2=py38had91d27_0
144 | olefile=0.46=pyh9f0ad1d_1
145 | opencv=4.5.1=py38h50d1736_1
146 | openh264=2.1.1=hfd3ada9_0
147 | openjpeg=2.4.0=h6cbf5cd_0
148 | openssl=1.1.1k=h0d85af4_0
149 | packaging=20.9=pyh44b312d_0
150 | pandoc=2.13=h0d85af4_0
151 | pandocfilters=1.4.2=py_1
152 | pango=1.42.4=ha86e081_5
153 | parso=0.8.2=pyhd8ed1ab_0
154 | partd=1.2.0=pyhd8ed1ab_0
155 | pathspec=0.8.1=pyhd3deb0d_0
156 | pcre=8.44=hb1e8313_0
157 | perl=5.32.0=hbcb3906_0
158 | pexpect=4.8.0=pyh9f0ad1d_2
159 | pickleshare=0.7.5=py_1003
160 | pillow=8.1.2=py38h83525de_1
161 | pip=21.0.1=pyhd8ed1ab_0
162 | pixman=0.40.0=hbcb3906_0
163 | pkg-config=0.29.2=h31203cd_1008
164 | prometheus_client=0.10.1=pyhd8ed1ab_0
165 | prompt-toolkit=3.0.18=pyha770c72_0
166 | pthread-stubs=0.4=hc929b4f_1001
167 | ptyprocess=0.7.0=pyhd3deb0d_0
168 | py-opencv=4.5.1=py38h6c79ece_1
169 | pycparser=2.20=pyh9f0ad1d_2
170 | pygments=2.8.1=pyhd8ed1ab_0
171 | pyopenssl=20.0.1=pyhd8ed1ab_0
172 | pyparsing=2.4.7=pyh9f0ad1d_0
173 | pyrsistent=0.17.3=py38h5406a74_2
174 | pysocks=1.7.1=py38h50d1736_3
175 | python=3.8.8=h4e93d89_0_cpython
176 | python-dateutil=2.8.1=py_0
177 | python_abi=3.8=1_cp38
178 | pytz=2021.1=pyhd8ed1ab_0
179 | pywavelets=1.1.1=py38hc7193ba_3
180 | pyyaml=5.4.1=py38h5406a74_0
181 | pyzmq=22.0.3=py38hd3b92b6_1
182 | readline=8.1=h05e3726_0
183 | regex=2021.4.4=py38h96a0964_0
184 | requests=2.25.1=pyhd3deb0d_0
185 | scikit-image=0.17.2=py38h81aa140_0
186 | scikit-learn=0.23.2=py38h959d312_0
187 | scipy=1.6.2=py38h431c0a8_0
188 | send2trash=1.5.0=py_0
189 | setuptools=49.6.0=py38h50d1736_3
190 | six=1.15.0=pyh9f0ad1d_0
191 | snappy=1.1.8=hb1e8313_3
192 | sniffio=1.2.0=py38h50d1736_1
193 | sqlite=3.35.4=h44b9ce1_0
194 | terminado=0.9.4=py38h50d1736_0
195 | testpath=0.4.4=py_0
196 | threadpoolctl=2.1.0=pyh5ca1d4c_0
197 | tifffile=2021.4.8=pyhd8ed1ab_0
198 | tk=8.6.10=h0419947_1
199 | toml=0.10.2=pyhd8ed1ab_0
200 | toolz=0.11.1=py_0
201 | tornado=6.1=py38h5406a74_1
202 | traitlets=5.0.5=py_0
203 | typed-ast=1.4.3=py38h96a0964_0
204 | typing_extensions=3.7.4.3=py_0
205 | urllib3=1.26.4=pyhd8ed1ab_0
206 | wand=0.6.5=pyhd3deb0d_0
207 | wcwidth=0.2.5=pyh9f0ad1d_2
208 | webencodings=0.5.1=py_1
209 | wheel=0.36.2=pyhd3deb0d_0
210 | x264=1!161.3030=h35c211d_0
211 | xorg-kbproto=1.0.7=h35c211d_1002
212 | xorg-libice=1.0.10=h0d85af4_0
213 | xorg-libsm=1.2.3=h0d85af4_1000
214 | xorg-libx11=1.7.0=h35c211d_0
215 | xorg-libxau=1.0.9=h35c211d_0
216 | xorg-libxdmcp=1.1.3=h35c211d_0
217 | xorg-libxext=1.3.4=h0d85af4_1
218 | xorg-libxrender=0.9.10=h0d85af4_1003
219 | xorg-libxt=1.2.1=h0d85af4_2
220 | xorg-renderproto=0.11.1=h0d85af4_1002
221 | xorg-xextproto=7.3.0=h35c211d_1002
222 | xorg-xproto=7.0.31=h35c211d_1007
223 | xz=5.2.5=haf1e3a3_1
224 | yaml=0.2.5=haf1e3a3_0
225 | zeromq=4.3.4=h1c7c35f_0
226 | zfp=0.5.5=he49afe7_5
227 | zipp=3.4.1=pyhd8ed1ab_0
228 | zlib=1.2.11=h7795811_1010
229 | zstd=1.4.9=h582d3a0_0
230 | 


--------------------------------------------------------------------------------
/data/input/in1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/in1.jpeg


--------------------------------------------------------------------------------
/data/input/in2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/in2.jpeg


--------------------------------------------------------------------------------
/data/input/in3.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/in3.jpeg


--------------------------------------------------------------------------------
/data/input/signed_file.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/signed_file.pdf


--------------------------------------------------------------------------------
/data/input/signed_image.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/signed_image.jpeg


--------------------------------------------------------------------------------
/data/input/signed_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/input/signed_image.png


--------------------------------------------------------------------------------
/data/output/labeled_mask.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/labeled_mask.jpeg


--------------------------------------------------------------------------------
/data/output/labeled_mask_0.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/labeled_mask_0.jpeg


--------------------------------------------------------------------------------
/data/output/labeled_mask_1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/labeled_mask_1.jpeg


--------------------------------------------------------------------------------
/data/output/mask.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/mask.jpeg


--------------------------------------------------------------------------------
/data/output/mask_0.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/mask_0.jpeg


--------------------------------------------------------------------------------
/data/output/mask_1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/mask_1.jpeg


--------------------------------------------------------------------------------
/data/output/sign.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/sign.jpeg


--------------------------------------------------------------------------------
/data/output/signature.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/data/output/signature.jpeg


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from signature_detect.cropper import Cropper
 4 | from signature_detect.extractor import Extractor
 5 | from signature_detect.loader import Loader
 6 | from signature_detect.judger import Judger
 7 | 
 8 | 
 9 | def main(file_path: str) -> None:
10 |     loader = Loader()
11 |     extractor = Extractor(amplfier=15)
12 |     cropper = Cropper()
13 |     judger = Judger()
14 | 
15 |     try:
16 |         masks = loader.get_masks(file_path)
17 |         is_signed = False
18 |         for mask in masks:
19 |             labeled_mask = extractor.extract(mask)
20 |             results = cropper.run(labeled_mask)
21 |             for result in results.values():
22 |                 is_signed = judger.judge(result["cropped_mask"])
23 |                 if is_signed:
24 |                     break
25 |             if is_signed:
26 |                 break
27 |         print(is_signed)
28 |     except Exception as e:
29 |         print(e)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     file_path = None
34 |     for i in range(len(sys.argv)):
35 |         if sys.argv[i] == "--file":
36 |             file_path = sys.argv[i + 1]
37 |     if file_path is None:
38 |         print("Need input file")
39 |         print("python demo.py --file my-file.pdf")
40 |     else:
41 |         main(file_path)
42 | 


--------------------------------------------------------------------------------
/lab/data/input/signed_file.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/lab/data/input/signed_file.pdf


--------------------------------------------------------------------------------
/lab/data/input/signed_image.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/lab/data/input/signed_image.jpeg


--------------------------------------------------------------------------------
/lab/data/output/signed_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/lab/data/output/signed_image.jpg


--------------------------------------------------------------------------------
/lab/data/output/signed_image_boxed.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/lab/data/output/signed_image_boxed.jpeg


--------------------------------------------------------------------------------
/lab/data/output/signed_image_region_0.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/lab/data/output/signed_image_region_0.jpeg


--------------------------------------------------------------------------------
/package.md:
--------------------------------------------------------------------------------
 1 | # Signature Detection
 2 | 
 3 | A simple tool to detect if there are **signatures** in **an image** or **a PDF file**.
 4 | 
 5 | The full documentation is presented at the [Github Repository](https://github.com/EnzoSeason/signature_detection).
 6 | 
 7 | ## Extra Dependencies
 8 | 
 9 | This tool uses `Wand` to convert a PDF file into images. 
10 | 
11 | `Wand` is a ctypes-based simple `ImageMagick` binding for Python. You may need to install `ImageMagick` on your local machine.
12 | 
13 | More details are available [here](https://docs.wand-py.org/en/0.6.6/).
14 | 
15 | ## Usage
16 | 
17 | Signature_detect contains 4 classes.
18 | 
19 | 1. Loader
20 | 2. Extractor
21 | 3. Cropper
22 | 4. Judger
23 | 
24 | ### Loader
25 | 
26 | Loader can read an image or a PDF file page by page.
27 | 
28 | It returns a list of the masks. Each mask is a numpy 2 dimensions array. Its element's value is `0` or `255`.
29 | 
30 | ```python
31 | from signature_detect.loader import Loader
32 | 
33 | loader = Loader(
34 |     low_threshold=(0, 0, 250), 
35 |     high_threshold=(255, 255, 255))
36 | 
37 | masks = loader.get_masks(file_path)
38 | ```
39 | 
40 | 
41 | ### Extractor
42 | 
43 | Extractor reads a mask, labels the regions in the mask, and removes both small and big regions. We consider that the signature is a region of middle size.
44 | 
45 | ```python
46 | from signature_detect.extractor import Extractor
47 | 
48 | extractor = Extractor(
49 |     outlier_weight=3, 
50 |     outlier_bias=100, 
51 |     amplfier=10, 
52 |     min_area_size=10)
53 | 
54 | labeled_mask = extractor.extract(mask)
55 | ```
56 | 
57 | ### Cropper
58 | 
59 | Cropper crops the regions in the labeled mask.
60 | 
61 | ```python
62 | from signature_detect.cropper import Cropper
63 | 
64 | cropper = Cropper(
65 |     min_region_size=10000, 
66 |     border_ratio=0.1)
67 | 
68 | results = cropper.run(labeled_mask)
69 | ```
70 | 
71 | ### Judger
72 | 
73 | Judger decides whether a region is a signature.
74 | 
75 | ```python
76 | from signature_detect.judger import Judger
77 | 
78 | judger = Judger(
79 |     size_ratio=[1, 4], 
80 |     pixel_ratio=[0.01, 1])
81 | 
82 | is_signed = judger.judge(result["cropped_mask"])
83 | ```
84 | 
85 | ## Dev version
86 | 
87 | If you would like to develop this package and run the tests, you can download the code and install dev environment locally.
88 | 
89 | ```bash
90 | pip install -e .[dev]
91 | ```


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | with open("package.md", "r") as md:
 4 |     long_description = md.read()
 5 | 
 6 | setup(
 7 |     name="signature-detect",
 8 |     version="0.1.4",
 9 |     url="https://github.com/EnzoSeason/signature_detection",
10 |     author="Jijie LIU",
11 |     author_email="liujijieseason@gmail.com",
12 |     description="A package for the signature detection",
13 |     long_description=long_description,
14 |     long_description_content_type="text/markdown",
15 |     py_modules=[
16 |         "signature_detect",
17 |     ],
18 |     package_dir={"": "src"},
19 |     install_requires=[
20 |         "numpy>=1.0",
21 |         "pillow>=8.0",
22 |         "scikit-image",
23 |         "Wand",
24 |         "opencv-python",
25 |     ],
26 |     extras_require={"dev": ["coverage>=5.5"]},
27 |     license = "MIT",
28 | )


--------------------------------------------------------------------------------
/src/signature_detect/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/src/signature_detect/__init__.py


--------------------------------------------------------------------------------
/src/signature_detect/cropper.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import Any
  3 | import cv2
  4 | import numpy as np
  5 | from PIL import Image
  6 | 
  7 | 
  8 | class Cropper:
  9 |     """
 10 |     read the mask extracted by Extractor, and crop it.
 11 | 
 12 |     Attributes:
 13 |     -----------
 14 |       - min_region_size
 15 | 
 16 |         the min area size of the signature.
 17 | 
 18 |       - border_ratio: float
 19 | 
 20 |           border = min(h, w) * border_ratio
 21 | 
 22 |           h, w are the heigth and width of the input mask.
 23 |           The border will be removed by the function _remove_borders.
 24 | 
 25 |     Methods:
 26 |     --------
 27 |       - find_contours(img: numpy array) -> sorted_boxes: numpy array
 28 | 
 29 |         find the contours and sort them by area size
 30 | 
 31 |       - is_intersected(box_a: [x, y, w, h], box_b: [x, y, w, h]) -> bool
 32 | 
 33 |         check box_a and box_b is intersected
 34 | 
 35 |       - merge_boxes(box_a: [x, y, w, h], box_b: [x, y, w, h]) -> [x, y, w, h]:
 36 | 
 37 |         merge the intersected boxes into one
 38 | 
 39 |       - boxes2regions(sorted_boxes) -> dict:
 40 | 
 41 |         transform all the sorted_boxes into regions (merged boxes)
 42 | 
 43 |       - crop_regions(img: numpy array, regions: dict) -> list:
 44 | 
 45 |         return a list of cropped images (np.array)
 46 | 
 47 |       - run(img_path) -> list
 48 | 
 49 |         main function, crop the signatures,
 50 |         return a list of cropped images (np.array)
 51 |     """
 52 | 
 53 |     def __init__(self, min_region_size=10000, border_ratio=0.1):
 54 |         self.min_region_size = min_region_size
 55 |         self.border_ratio = border_ratio
 56 | 
 57 |     def __str__(self) -> str:
 58 |         s = "\nCropper\n==========\n"
 59 |         s += "min_region_size = {}\n".format(self.min_region_size)
 60 |         s += "border_ratio = {}\n".format(self.border_ratio)
 61 |         return s
 62 | 
 63 |     def find_contours(self, img):
 64 |         """
 65 |         find contours limited by min_region_size
 66 |         in the binary image.
 67 | 
 68 |         The contours are sorted by area size, from large to small.
 69 | 
 70 |         Params:
 71 |           img: numpy array
 72 |         Return:
 73 |           boxes: A numpy array of contours.
 74 |           each items in the array is a contour (x, y, w, h)
 75 |         """
 76 |         cnts = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
 77 |         cnt = cnts[0] if len(cnts) == 2 else cnts[1]
 78 | 
 79 |         boxes = []
 80 |         copy_img = img.copy()
 81 |         for c in cnt:
 82 |             (x, y, w, h) = cv2.boundingRect(c)
 83 | 
 84 |             if (
 85 |                 h * w > self.min_region_size
 86 |                 and h < copy_img.shape[0]
 87 |                 and w < copy_img.shape[1]
 88 |             ):
 89 | 
 90 |                 # cv2.rectangle(copy_img, (x, y), (x + w, y + h), (155, 155, 0), 1)
 91 |                 boxes.append([x, y, w, h])
 92 | 
 93 |         np_boxes = np.array(boxes)
 94 |         # sort the boxes by area size
 95 |         area_size = list(map(lambda box: box[2] * box[3], np_boxes))
 96 |         area_size = np.array(area_size)
 97 |         area_dec_order = area_size.argsort()[::-1]
 98 |         sorted_boxes = np_boxes[area_dec_order]
 99 | 
100 |         return sorted_boxes
101 | 
102 |     def is_intersected(self, new_box, orignal_box) -> bool:
103 |         [x_a, y_a, w_a, h_a] = new_box
104 |         [x_b, y_b, w_b, h_b] = orignal_box
105 | 
106 |         if y_a > y_b + h_b:
107 |             return False
108 |         if y_a + h_a < y_b:
109 |             return False
110 |         if x_a > x_b + w_b:
111 |             return False
112 |         if x_a + w_a < x_b:
113 |             return False
114 |         return True
115 | 
116 |     def merge_boxes(self, box_a, box_b) -> list:
117 |         """
118 |         merge 2 intersected box into one
119 |         """
120 |         [x_a, y_a, w_a, h_a] = box_a
121 |         [x_b, y_b, w_b, h_b] = box_b
122 | 
123 |         min_x = min(x_a, x_b)
124 |         min_y = min(y_a, y_b)
125 |         max_w = max(w_a, w_b, (x_b + w_b - x_a), (x_a + w_a - x_b))
126 |         max_h = max(h_a, h_b, (y_b + h_b - y_a), (y_a + h_a - y_b))
127 | 
128 |         return [min_x, min_y, max_w, max_h]
129 | 
130 |     def _remove_borders(self, box) -> Any:
131 |         """
132 |         remove the borders around the box
133 |         """
134 |         [x, y, w, h] = box
135 |         border = math.floor(min(w, h) * self.border_ratio)
136 |         return [x + border, y + border, w - border, h - border]
137 | 
138 |     def boxes2regions(self, sorted_boxes) -> dict:
139 |         regions = {}
140 | 
141 |         for box in sorted_boxes:
142 |             if len(regions) == 0:
143 |                 regions[0] = box
144 |             else:
145 |                 is_merged = False
146 |                 for key, region in regions.items():
147 |                     if self.is_intersected(box, region) == True:
148 |                         new_region = self.merge_boxes(region, box)
149 |                         regions[key] = self._remove_borders(new_region)
150 |                         is_merged = True
151 |                         break
152 |                 if is_merged == False:
153 |                     key = len(regions)
154 |                     regions[key] = self._remove_borders(box)
155 | 
156 |         return regions
157 | 
158 |     def get_cropped_masks(self, mask, regions) -> dict:
159 |         """
160 |         return cropped masks
161 |         """
162 | 
163 |         results = {}
164 |         for key, region in regions.items():
165 |             [x, y, w, h] = region
166 |             image = Image.fromarray(mask)
167 |             cropped_image = image.crop((x, y, x + w, y + h))
168 |             cropped_mask = np.array(cropped_image)
169 | 
170 |             results[key] = cropped_mask
171 |         return results
172 | 
173 |     def merge_regions_and_masks(self, mask, regions) -> dict:
174 |         """
175 |         helper function: put regions and masks in a dict, and return it.
176 |         """
177 | 
178 |         cropped_image = self.get_cropped_masks(mask, regions)
179 |         results = {}
180 | 
181 |         for key in regions.keys():
182 |             results[key] = {
183 |                 "cropped_region": regions[key],
184 |                 "cropped_mask": cropped_image[key],
185 |             }
186 | 
187 |         return results
188 | 
189 |     def run(self, np_image):
190 |         """
191 |         read the signature extracted by Extractor, and crop it.
192 |         """
193 | 
194 |         # find contours
195 |         sorted_boxes = self.find_contours(np_image)
196 | 
197 |         # get regions
198 |         regions = self.boxes2regions(sorted_boxes)
199 | 
200 |         # crop regions
201 |         return self.merge_regions_and_masks(np_image, regions)
202 | 


--------------------------------------------------------------------------------
/src/signature_detect/extractor.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | from skimage import measure, morphology
  3 | from skimage.measure import regionprops
  4 | import numpy as np
  5 | 
  6 | 
  7 | class Extractor:
  8 |     """
  9 |     Extract the signature from a mask. The process is as followed.
 10 | 
 11 |     1. It finds the regions in an image mask. Each region has a label (unique number).
 12 |     2. It removes the small regions. The small region is defined by attributes.
 13 |     3. It remove the big regions. The big region is defined by attributes.
 14 |     4. It returns a labeled image. The numbers in the image are the region labels, NOT pixels.
 15 | 
 16 |     Attributes
 17 |     ----------
 18 |     outlier_weight: int
 19 |         The weight of small outlier size
 20 |     outlier_bias: int
 21 |         The bias of small outlier size
 22 |     amplfier: int
 23 |         The amplfier calculates the big outlier size from the small one
 24 |     min_area_size: int
 25 |         The min region area size in the labeled image.
 26 | 
 27 |     Methods
 28 |     -------
 29 |     extract(mask: numpy.array):
 30 |         extract the signature
 31 |     """
 32 | 
 33 |     def __init__(
 34 |         self, outlier_weight=3, outlier_bias=100, amplfier=10, min_area_size=10
 35 |     ):
 36 |         # the parameters are used to remove small size connected pixels outlier
 37 |         self.outlier_weight = outlier_weight
 38 |         self.outlier_bias = outlier_bias
 39 |         # the parameter is used to remove big size connected pixels outlier
 40 |         self.amplfier = amplfier
 41 |         self.min_area_size = min_area_size
 42 | 
 43 |     def __str__(self) -> str:
 44 |         s = "\nExtractor\n==========\n"
 45 |         s += "outlier_weight = {}\n".format(self.outlier_weight)
 46 |         s += "outlier_bias = {}\n".format(self.outlier_bias)
 47 |         s += "> small_outlier_size = outlier_weight * average_region_size + outlier_bias\n"
 48 |         s += "amplfier = {}\n".format(self.amplfier)
 49 |         s += "> large_outlier_size = amplfier * small_outlier_size\n"
 50 |         s += "min_area_size = {} (pixels)\n".format(self.min_area_size)
 51 |         s += "> min_area_size is used to calculate average_region_size.\n"
 52 |         return s
 53 | 
 54 |     def extract(self, mask) -> Any:
 55 |         """
 56 |         params
 57 |         ------
 58 |         mask: numpy array
 59 |             The mask of the image. It's calculated by Loader.
 60 | 
 61 |         return
 62 |         ------
 63 |         labeled_image: numpy array
 64 |             The labeled image.
 65 |             The numbers in the array are the region labels.
 66 |         """
 67 |         condition = mask > mask.mean()
 68 |         labels = measure.label(condition, background=1)
 69 | 
 70 |         total_pixels = 0
 71 |         nb_region = 0
 72 |         average = 0.0
 73 |         for region in regionprops(labels):
 74 |             if region.area > self.min_area_size:
 75 |                 total_pixels += region.area
 76 |                 nb_region += 1
 77 |         
 78 |         if nb_region > 1:
 79 |             average = total_pixels / nb_region
 80 |             # small_size_outlier is used as a threshold value to remove pixels
 81 |             # are smaller than small_size_outlier
 82 |             small_size_outlier = average * self.outlier_weight + self.outlier_bias
 83 | 
 84 |             # big_size_outlier is used as a threshold value to remove pixels
 85 |             # are bigger than big_size_outlier
 86 |             big_size_outlier = small_size_outlier * self.amplfier
 87 | 
 88 |             # remove small pixels
 89 |             labeled_image = morphology.remove_small_objects(labels, small_size_outlier)
 90 |             # remove the big pixels
 91 |             component_sizes = np.bincount(labeled_image.ravel())
 92 |             too_small = component_sizes > (big_size_outlier)
 93 |             too_small_mask = too_small[labeled_image]
 94 |             labeled_image[too_small_mask] = 0
 95 | 
 96 |             labeled_mask = np.full(labeled_image.shape, 255, dtype="uint8")
 97 |             labeled_mask = labeled_mask * (labeled_image == 0)
 98 |         else:
 99 |             labeled_mask = mask
100 | 
101 |         return labeled_mask
102 | 


--------------------------------------------------------------------------------
/src/signature_detect/judger.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Judger:
 6 |     """
 7 |     read the cropped mask and identify if it's a signature
 8 | 
 9 |     Attributes:
10 |     -----------
11 |     - size_ratio: [low, high]
12 | 
13 |         low < max(h, w) / min(h, w) < high.
14 | 
15 |         h, w are the heigth and width of the input mask.
16 | 
17 |     - max_pixel_ratio: [low, high]
18 | 
19 |        low < the number of 0 / the number of 255 < high.
20 | 
21 |        The mask should only have 2 value, 0 and 255.
22 | 
23 |     Methods:
24 |     --------
25 |     - judge(mask: numpy array) -> bool
26 | 
27 |        identify if the mask is a signature
28 |     """
29 | 
30 |     def __init__(self, size_ratio=[1, 4], pixel_ratio=[0.01, 1]) -> None:
31 |         self.size_ratio = size_ratio
32 |         self.pixel_ratio = pixel_ratio
33 | 
34 |     def __str__(self) -> str:
35 |         s = "\nJudger\n==========\n"
36 |         s += "size_ratio = {}\n".format(str(self.size_ratio))
37 |         s += "pixel_ratio = {}\n".format(str(self.pixel_ratio))
38 |         return s
39 | 
40 |     def _is_valid_mask(self, mask: Any) -> bool:
41 |         values = np.unique(mask)
42 |         if len(values) != 2:
43 |             return False
44 |         if values[0] != 0 or values[1] != 255:
45 |             return False
46 |         return True
47 | 
48 |     def judge(self, mask: Any) -> bool:
49 |         if self._is_valid_mask(mask):
50 |             size_ratio = max(mask.shape) / min(mask.shape)
51 |             if size_ratio < self.size_ratio[0] or size_ratio > self.size_ratio[1]:
52 |                 return False
53 | 
54 |             bincounts = np.bincount(mask.ravel())
55 |             pixel_ratio = bincounts[0] / bincounts[255]
56 |             if pixel_ratio < self.pixel_ratio[0] or pixel_ratio > self.pixel_ratio[1]:
57 |                 return False
58 | 
59 |             return True
60 |         else:
61 |             return False
62 | 
63 |     def run(self, results: dict) -> list:
64 |         regions = []
65 |         for idx, result in results.items():
66 |             is_signed = self.judge(result["cropped_mask"])
67 |             regions.append({"id": idx, "signed": is_signed, "box": result["cropped_region"]})
68 |         return regions
69 | 


--------------------------------------------------------------------------------
/src/signature_detect/loader.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | import os
  6 | from wand.image import Image
  7 | 
  8 | 
  9 | class Loader:
 10 |     """
 11 |     Load an image or a pdf file.
 12 | 
 13 |     Attributes
 14 |     ----------
 15 |     low_threshold: tuple
 16 |         The low threshold of cv2.inRange
 17 |     high_threshold: tuple
 18 |         The high threshold of cv2.inRange
 19 | 
 20 |     Methods
 21 |     -------
 22 |     get_masks(path: str) -> list
 23 |         It read an image or a pdf file page by page.
 24 |         It returns the masks that the bright parts are marked as 255, the rest as 0.
 25 |     """
 26 | 
 27 |     def __init__(self, low_threshold=(0, 0, 250), high_threshold=(255, 255, 255)):
 28 |         if self._is_valid(low_threshold):
 29 |             self.low_threshold = low_threshold
 30 |         if self._is_valid(high_threshold):
 31 |             self.high_threshold = high_threshold
 32 |     
 33 |     def __str__(self) -> str:
 34 |         s = "\nLoader\n==========\n"
 35 |         s += "low_threshold = {}\n".format(self.low_threshold)
 36 |         s += "high_threshold = {}\n".format(self.high_threshold)
 37 |         return s
 38 | 
 39 |     def _is_valid(self, threshold: tuple) -> bool:
 40 |         if type(threshold) is not tuple:
 41 |             raise Exception("The threshold must be a tuple.")
 42 |         if len(threshold) != 3:
 43 |             raise Exception("The threshold must have 3 item (h, s, v).")
 44 |         for item in threshold:
 45 |             if item not in range(0, 256):
 46 |                 raise Exception("The threshold item must be in the range [0, 255].")
 47 |         return True
 48 | 
 49 |     def get_masks(self, path) -> list:
 50 |         basename = os.path.basename(path)
 51 |         dn, dext = os.path.splitext(basename)
 52 |         ext = dext[1:].lower()
 53 |         if ext == "pdf":
 54 |             self.document_type = "PDF"
 55 |         elif ext == "jpg" or ext == "jpeg" or ext == "png" or ext == "tif":
 56 |             self.document_type = "IMAGE"
 57 |         else:
 58 |             raise Exception("Document should be jpg/jpeg, png, tif or pdf.")
 59 | 
 60 |         if self.document_type == "IMAGE":
 61 |             loader = _ImageWorker(self.low_threshold, self.high_threshold)
 62 |             return [loader.get_image_mask(path)]
 63 | 
 64 |         if self.document_type == "PDF":
 65 |             loader = _PdfWorker(self.low_threshold, self.high_threshold)
 66 |             return loader.get_pdf_masks(path)
 67 | 
 68 | 
 69 | class _ImageWorker:
 70 |     def __init__(self, low_threshold: tuple, high_threshold: tuple) -> None:
 71 |         self.low_threshold = low_threshold
 72 |         self.high_threshold = high_threshold
 73 | 
 74 |     def make_mask(self, image) -> Any:
 75 |         """
 76 |         create a mask that the bright parts are marked as 255, the rest as 0.
 77 | 
 78 |         params
 79 |         ------
 80 |         image: numpy array
 81 | 
 82 |         return
 83 |         ------
 84 |         frame_threshold: numpy array
 85 |         """
 86 |         frame_HSV = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 87 |         frame_threshold = cv2.inRange(
 88 |             frame_HSV, self.low_threshold, self.high_threshold
 89 |         )
 90 |         return frame_threshold
 91 | 
 92 |     def get_image_mask(self, path: str) -> Any:
 93 |         image = cv2.imread(path)
 94 |         return self.make_mask(image)
 95 | 
 96 | 
 97 | class _PdfWorker(_ImageWorker):
 98 |     def __init__(self, low_threshold, high_threshold):
 99 |         super().__init__(low_threshold, high_threshold)
100 |     
101 |     def get_pdf_images(self, path: str) -> list:
102 |         imgs = []
103 |         with(Image(filename=path,resolution=200)) as source:
104 |             images=source.sequence
105 |             pages=len(images)
106 |             for i in range(pages):
107 |                 imgs.append(images[i])
108 |         return imgs
109 | 
110 |     def get_pdf_masks(self, path: str) -> list:
111 |         """
112 |         create the mask that the bright parts are marked as 255, the rest as 0,
113 |         page by page
114 |         """
115 |         images = self.get_pdf_images(path)
116 | 
117 |         masks = []
118 |         for image in images:
119 |             np_image = np.array(image)
120 |             mask = self.make_mask(np_image)
121 |             masks.append(mask)
122 |         return masks
123 | 


--------------------------------------------------------------------------------
/tests/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = *tests*
3 | 
4 | [report]
5 | omit = *tests*


--------------------------------------------------------------------------------
/tests/coverage.sh:
--------------------------------------------------------------------------------
1 | coverage run -m unittest
2 | coverage report -m
3 | 


--------------------------------------------------------------------------------
/tests/data/dummy.py:
--------------------------------------------------------------------------------
1 | TEST_IMAGE_PATH = "./data/signed_image.jpeg"
2 | TEST_PDF_PATH = "./data/signed_file.pdf"
3 | TEST_TIF_PATH = "./data/signed_image.tif"


--------------------------------------------------------------------------------
/tests/data/signed_file.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/tests/data/signed_file.pdf


--------------------------------------------------------------------------------
/tests/data/signed_image.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/tests/data/signed_image.jpeg


--------------------------------------------------------------------------------
/tests/data/signed_image.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/tests/data/signed_image.tif


--------------------------------------------------------------------------------
/tests/data/test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EnzoSeason/signature_detection/7fdfb687729f73792f56fd35696fd5abb06eedb8/tests/data/test.txt


--------------------------------------------------------------------------------
/tests/test_cropper.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | 
 4 | import numpy as np
 5 | 
 6 | sys.path.append("..")
 7 | 
 8 | from signature_detect.cropper import Cropper
 9 | from signature_detect.extractor import Extractor
10 | from signature_detect.loader import Loader
11 | 
12 | from tests.data.dummy import TEST_IMAGE_PATH
13 | 
14 | 
15 | class TestCropper(unittest.TestCase):
16 |     def test_init(self):
17 |         cropper = Cropper()
18 |         self.assertEqual(cropper.min_region_size, 10000)
19 | 
20 |     def test_str(self):
21 |         cropper = Cropper()
22 |         s = "\nCropper\n==========\n"
23 |         s += "min_region_size = {}\n".format(cropper.min_region_size)
24 |         s += "border_ratio = {}\n".format(cropper.border_ratio)
25 |         self.assertEqual(str(cropper), s)
26 | 
27 |     def test_run(self):
28 |         path = TEST_IMAGE_PATH
29 | 
30 |         loader = Loader()
31 |         mask = loader.get_masks(path)[0]
32 | 
33 |         extractor = Extractor()
34 |         labeled_mask = extractor.extract(mask)
35 | 
36 |         cropper = Cropper()
37 |         results = cropper.run(labeled_mask)
38 |         self.assertEqual(len(results[0]["cropped_region"]), 4)
39 | 
40 |         mask_list = list(np.unique(results[0]["cropped_mask"]))
41 |         self.assertEqual(mask_list[0], 0)
42 |         self.assertEqual(mask_list[1], 255)
43 | 
44 |     def test_boxes2regions(self):
45 |         cropper = Cropper(border_ratio=0)
46 |         boxes = [[0, 0, 10, 10], [9, 9, 5, 5], [20, 20, 1, 1]]
47 |         regions = cropper.boxes2regions(boxes)
48 |         self.assertEqual(len(regions), 2)
49 | 
50 |         self.assertEqual(regions[0][0], 0)
51 |         self.assertEqual(regions[0][1], 0)
52 |         self.assertEqual(regions[0][2], 14)
53 |         self.assertEqual(regions[0][3], 14)
54 | 
55 |         self.assertEqual(regions[1][0], 20)
56 |         self.assertEqual(regions[1][1], 20)
57 |         self.assertEqual(regions[1][2], 1)
58 |         self.assertEqual(regions[1][2], 1)
59 | 
60 |     def test_is_intersected(self):
61 |         cropper = Cropper()
62 |         box_b = [10, 10, 1, 1]
63 | 
64 |         # y_a > y_b + h_b
65 |         box_a = [0, 20, 1, 1]
66 |         self.assertFalse(cropper.is_intersected(box_a, box_b))
67 | 
68 |         # y_a + h_a < y_b
69 |         box_a = [0, 0, 1, 1]
70 |         self.assertFalse(cropper.is_intersected(box_a, box_b))
71 | 
72 |         # x_a > x_b + w_b
73 |         box_a = [20, 10, 1, 1]
74 |         self.assertFalse(cropper.is_intersected(box_a, box_b))
75 | 
76 |         # x_a + w_a < x_b
77 |         box_a = [0, 10, 1, 1]
78 |         self.assertFalse(cropper.is_intersected(box_a, box_b))
79 | 


--------------------------------------------------------------------------------
/tests/test_extractor.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | 
 4 | import numpy as np
 5 | 
 6 | sys.path.append("..")
 7 | 
 8 | from signature_detect.extractor import Extractor
 9 | from signature_detect.loader import Loader
10 | 
11 | from tests.data.dummy import TEST_IMAGE_PATH
12 | 
13 | 
14 | class TestExtractor(unittest.TestCase):
15 |     def test_init(self):
16 |         extractor = Extractor()
17 |         self.assertEqual(extractor.outlier_weight, 3)
18 |         self.assertEqual(extractor.outlier_bias, 100)
19 |         self.assertEqual(extractor.amplfier, 10)
20 |         self.assertEqual(extractor.min_area_size, 10)
21 | 
22 |     def test_str(self):
23 |         extractor = Extractor()
24 |         s = "\nExtractor\n==========\n"
25 |         s += "outlier_weight = 3\n"
26 |         s += "outlier_bias = 100\n"
27 |         s += "> small_outlier_size = outlier_weight * average_region_size + outlier_bias\n"
28 |         s += "amplfier = 10\n"
29 |         s += "> large_outlier_size = amplfier * small_outlier_size\n"
30 |         s += "min_area_size = 10 (pixels)\n"
31 |         s += "> min_area_size is used to calculate average_region_size.\n"
32 |         self.assertEqual(str(extractor), s)
33 | 
34 |     def test_extract(self):
35 |         path = TEST_IMAGE_PATH
36 |         loader = Loader()
37 |         mask = loader.get_masks(path)[0]
38 | 
39 |         extractor = Extractor()
40 |         labeled_mask = extractor.extract(mask)
41 |         mask_list = list(np.unique(labeled_mask))
42 |         self.assertEqual(mask_list[0], 0)
43 |         self.assertEqual(mask_list[1], 255)
44 | 
45 |         mask = np.array([[0, 255, 0], [0, 255, 0]], dtype="uint8")
46 |         labeled_mask = extractor.extract(mask)
47 | 
48 |         mask_bincounts = list(np.bincount(mask.ravel()))
49 |         labeled_mask_bincounts = list(np.bincount(labeled_mask.ravel()))
50 |         self.assertEqual(mask_bincounts[0], labeled_mask_bincounts[0])
51 |         self.assertEqual(mask_bincounts[255], labeled_mask_bincounts[255])
52 | 


--------------------------------------------------------------------------------
/tests/test_judger.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | 
 4 | import numpy as np
 5 | 
 6 | sys.path.append("..")
 7 | 
 8 | from signature_detect.cropper import Cropper
 9 | from signature_detect.extractor import Extractor
10 | from signature_detect.loader import Loader
11 | from signature_detect.judger import Judger
12 | 
13 | from tests.data.dummy import TEST_IMAGE_PATH
14 | 
15 | class TestJudger(unittest.TestCase):
16 |     def test_init(self):
17 |         judger = Judger()
18 |         self.assertEqual(judger.size_ratio[0], 1)
19 |         self.assertEqual(judger.size_ratio[1], 4)
20 |         self.assertEqual(judger.pixel_ratio[0], 0.01)
21 |         self.assertEqual(judger.pixel_ratio[1], 1)
22 | 
23 |     def test_str(self):
24 |         judger = Judger()
25 |         s = "\nJudger\n==========\n"
26 |         s += "size_ratio = {}\n".format(judger.size_ratio)
27 |         s += "pixel_ratio = {}\n".format(judger.pixel_ratio)
28 |         self.assertEqual(str(judger), s)
29 | 
30 |     def test_is_valid_mask(self):
31 |         judger = Judger()
32 | 
33 |         mask = np.array([[0,0,0,0]])
34 |         res = judger.judge(mask)
35 |         self.assertFalse(res)
36 | 
37 |         mask = np.array([0])
38 |         self.assertFalse(judger._is_valid_mask(mask))
39 | 
40 |         mask = np.array([0, 1])
41 |         self.assertFalse(judger._is_valid_mask(mask))
42 | 
43 |         mask = np.array([0, 255])
44 |         res = judger._is_valid_mask(mask)
45 |         self.assertTrue(res)
46 |     
47 |     def test_judge(self):
48 |         judger = Judger()
49 | 
50 |         mask = np.array([[255,0,0,0,0]])
51 |         res = judger.judge(mask)
52 |         self.assertFalse(res)
53 | 
54 |         mask = np.array([[255, 0], [0, 0]])
55 |         res = judger.judge(mask)
56 |         self.assertFalse(res)
57 | 
58 |         mask = np.array([[255, 255, 255], [0, 255, 255]])
59 |         res = judger.judge(mask)
60 |         self.assertTrue(res)
61 |     
62 |     def test_run(self):
63 |         path = TEST_IMAGE_PATH
64 | 
65 |         loader = Loader()
66 |         mask = loader.get_masks(path)[0]
67 | 
68 |         extractor = Extractor()
69 |         labeled_mask = extractor.extract(mask)
70 | 
71 |         cropper = Cropper()
72 |         results = cropper.run(labeled_mask)
73 | 
74 |         judger = Judger()
75 |         regions = judger.run(results)
76 |         
77 |         # assert
78 |         region = regions[0]
79 |         self.assertEqual(region["id"], 0)
80 |         self.assertEqual(region["signed"], True)
81 |         comparison = region["box"] == results[0]["cropped_region"]
82 |         self.assertTrue(comparison.all())
83 | 


--------------------------------------------------------------------------------
/tests/test_loader.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | 
 4 | import numpy as np
 5 | 
 6 | sys.path.append("..")
 7 | 
 8 | from signature_detect.loader import Loader
 9 | 
10 | from tests.data.dummy import TEST_IMAGE_PATH, TEST_PDF_PATH, TEST_TIF_PATH
11 | 
12 | 
13 | class TestLoader(unittest.TestCase):
14 |     def test_loader_init(self):
15 |         loader = Loader()
16 |         self.assertEqual(loader.low_threshold, (0, 0, 250))
17 | 
18 |     def test_str(self):
19 |         loader = Loader()
20 |         s = "\nLoader\n==========\n"
21 |         s += "low_threshold = {}\n".format(loader.low_threshold)
22 |         s += "high_threshold = {}\n".format(loader.high_threshold)
23 |         self.assertEqual(str(loader), s)
24 | 
25 |     def test_is_valid(self):
26 |         low_threshold = ""
27 |         with self.assertRaises(Exception) as cm:
28 |             Loader(low_threshold)
29 |         self.assertEqual(cm.exception.__str__(), "The threshold must be a tuple.")
30 | 
31 |         low_threshold = (0, 1)
32 |         with self.assertRaises(Exception) as cm:
33 |             Loader(low_threshold)
34 |         self.assertEqual(
35 |             cm.exception.__str__(), "The threshold must have 3 item (h, s, v)."
36 |         )
37 | 
38 |         low_threshold = (0, 0, 256)
39 |         with self.assertRaises(Exception) as cm:
40 |             Loader(low_threshold)
41 |         self.assertEqual(
42 |             cm.exception.__str__(), "The threshold item must be in the range [0, 255]."
43 |         )
44 | 
45 |         low_threshold = (0, 0, "0")
46 |         with self.assertRaises(Exception) as cm:
47 |             Loader(low_threshold)
48 |         self.assertEqual(
49 |             cm.exception.__str__(), "The threshold item must be in the range [0, 255]."
50 |         )
51 | 
52 |     def test_get_masks(self):
53 |         path = "./data/test.txt"
54 |         loader = Loader()
55 |         with self.assertRaises(Exception) as cm:
56 |             loader.get_masks(path)
57 |         self.assertEqual(
58 |             cm.exception.__str__(), "Document should be jpg/jpeg, png, tif or pdf."
59 |         )
60 | 
61 |         # jpeg test
62 |         path = TEST_IMAGE_PATH
63 |         masks = loader.get_masks(path)
64 |         self.assertEqual(len(masks), 1)
65 | 
66 |         first_mask_list = list(np.unique(masks[0]))
67 |         self.assertEqual(first_mask_list[0], 0)
68 |         self.assertEqual(first_mask_list[1], 255)
69 | 
70 |         # tif test
71 |         path = TEST_TIF_PATH
72 |         masks = loader.get_masks(path)
73 |         self.assertEqual(len(masks), 1)
74 | 
75 |         first_mask_list = list(np.unique(masks[0]))
76 |         self.assertEqual(first_mask_list[0], 0)
77 |         self.assertEqual(first_mask_list[1], 255)
78 | 
79 |         # pdf test
80 |         path = TEST_PDF_PATH
81 |         masks = loader.get_masks(path)
82 |         self.assertEqual(len(masks), 2)
83 | 
84 |         mask_list = list(np.unique(masks[1]))
85 |         self.assertEqual(mask_list[0], 0)
86 |         self.assertEqual(mask_list[1], 255)
87 | 


--------------------------------------------------------------------------------
/update_package.md:
--------------------------------------------------------------------------------
 1 | # Update PyPi
 2 | 
 3 | ## Update `setup.py`
 4 | 
 5 | `version` need to be update.
 6 | 
 7 | ## Build
 8 | 
 9 | ```bash
10 | python setup.py bdist_wheel sdist
11 | ```
12 | 
13 | ## Upload
14 | 
15 | ```bash
16 | twine upload dist/*
17 | ```


--------------------------------------------------------------------------------