├── test
    ├── Logfile_crass.log
    ├── out
    │   ├── deskew
    │   │   ├── testimg_deskewangle.txt
    │   │   └── testimg_deskew.jpg
    │   ├── splice
    │   │   ├── 0001_testimg1_SegInfo.txt
    │   │   └── 0001_testimg1.jpg
    │   ├── masks
    │   │   └── testimg_masked.jpg
    │   ├── segments
    │   │   ├── testimg_0_h.jpg
    │   │   ├── testimg_1_c.jpg
    │   │   ├── testimg_2_a.jpg
    │   │   ├── testimg_2_b.jpg
    │   │   └── testimg_3_f.jpg
    │   └── coords
    │   │   └── testimg_coords.txt
    ├── testimg.jpg
    ├── testimg_horizontal_bottom.jpg
    └── testimg_horizontal_bottom_skew.jpg
├── docs
    ├── img
    │   ├── crass_logo.png
    │   ├── basic_concept.png
    │   ├── basic_concept_crop.png
    │   ├── basic_concept_deskew.png
    │   ├── basic_concept_splice.png
    │   └── basic_concept_line_analyse.png
    ├── basic-concepts.md
    └── image-processing.md
├── requirements.txt
├── .gitignore
├── setup.py
├── Dockerfile
├── argparse_config
├── .travis.yml
├── README.md
├── LICENSE
└── crass.py


/test/Logfile_crass.log:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/out/deskew/testimg_deskewangle.txt:
--------------------------------------------------------------------------------
1 | Deskewangle:	0.000000


--------------------------------------------------------------------------------
/test/testimg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/test/testimg.jpg


--------------------------------------------------------------------------------
/docs/img/crass_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/docs/img/crass_logo.png


--------------------------------------------------------------------------------
/test/out/splice/0001_testimg1_SegInfo.txt:
--------------------------------------------------------------------------------
1 | testimg_1_c.jpg
2 | testimg_2_a.jpg
3 | testimg_2_b.jpg
4 | 


--------------------------------------------------------------------------------
/docs/img/basic_concept.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/docs/img/basic_concept.png


--------------------------------------------------------------------------------
/docs/img/basic_concept_crop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/docs/img/basic_concept_crop.png


--------------------------------------------------------------------------------
/docs/img/basic_concept_deskew.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/docs/img/basic_concept_deskew.png


--------------------------------------------------------------------------------
/docs/img/basic_concept_splice.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/docs/img/basic_concept_splice.png


--------------------------------------------------------------------------------
/test/out/deskew/testimg_deskew.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/test/out/deskew/testimg_deskew.jpg


--------------------------------------------------------------------------------
/test/out/masks/testimg_masked.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/test/out/masks/testimg_masked.jpg


--------------------------------------------------------------------------------
/test/out/segments/testimg_0_h.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/test/out/segments/testimg_0_h.jpg


--------------------------------------------------------------------------------
/test/out/segments/testimg_1_c.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/test/out/segments/testimg_1_c.jpg


--------------------------------------------------------------------------------
/test/out/segments/testimg_2_a.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/test/out/segments/testimg_2_a.jpg


--------------------------------------------------------------------------------
/test/out/segments/testimg_2_b.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/test/out/segments/testimg_2_b.jpg


--------------------------------------------------------------------------------
/test/out/segments/testimg_3_f.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/test/out/segments/testimg_3_f.jpg


--------------------------------------------------------------------------------
/test/out/splice/0001_testimg1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/test/out/splice/0001_testimg1.jpg


--------------------------------------------------------------------------------
/test/testimg_horizontal_bottom.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/test/testimg_horizontal_bottom.jpg


--------------------------------------------------------------------------------
/docs/img/basic_concept_line_analyse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/docs/img/basic_concept_line_analyse.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.11.3
2 | scipy>=0.19
3 | matplotlib>=1.4.3
4 | Pillow>=2.7.0
5 | lxml>=3.5.0
6 | scikit-image>=0.14.0
7 | 


--------------------------------------------------------------------------------
/test/testimg_horizontal_bottom_skew.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UB-Mannheim/crass/master/test/testimg_horizontal_bottom_skew.jpg


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | test/*.jpg
 2 | !test/testimg*.jpg
 3 | test/out/**/*.jpg
 4 | !test/out/**/*testimg*.jpg
 5 | test/out/**/*.txt
 6 | !test/out/**/*testimg*.txt
 7 | *.pyc
 8 | .idea/
 9 | crass_env/
10 | 


--------------------------------------------------------------------------------
/test/out/coords/testimg_coords.txt:
--------------------------------------------------------------------------------
1 | Image resolution:	764	1166
2 | Header:  	0	101	0	1166
3 | C-Seg: 	108	197	74	948
4 | A-Seg:	192	543	74	512
5 | B-Seg:	192	543	510	949
6 | Footer:  	537	764	0	1166
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2.7
 2 | import sys
 3 | 
 4 | assert sys.version_info[0]==2 and sys.version_info[1]>=7,\
 5 |     "you must install and use crass with Python version 2.7 or later, but not Python 3.x"
 6 | 
 7 | from distutils.core import setup
 8 | 
 9 | setup(
10 |     name='crass',
11 |     version='0.8',
12 |     author='jkamlah',
13 |     description='crass - crop and splice segments',
14 |     packages=[''],
15 | )
16 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # USAGE
 2 | #
 3 | #   1. Build the docker container:
 4 | # $ docker build -t crass-app .
 5 | #   2.a. Run the test
 6 | # $ docker run -it --rm crass-app
 7 | #   2.b. Run the container with a bash
 8 | # $ docker run -it --rm -v "$PWD":/usr/src/app crass-app bash
 9 | 
10 | FROM python:2
11 | 
12 | WORKDIR /usr/src/app
13 | 
14 | COPY requirements.txt ./
15 | RUN pip install --no-cache-dir -r requirements.txt
16 | 
17 | COPY . .
18 | 
19 | RUN python setup.py install
20 | 
21 | CMD python ./crass.py --input "./test/testimg.jpg" --extension  "jpg"
22 | 


--------------------------------------------------------------------------------
/argparse_config:
--------------------------------------------------------------------------------
 1 | --input="U:\\Eigene Dokumente\\Literatur\\Aufgaben\\crass\\1957\\jpg\\"
 2 | --extension="jpg"
 3 | --addstartheightab=0.01
 4 | --addstopheightab=0.011
 5 | --addstartheightc=-0.005
 6 | --addstopheightc=0.0
 7 | --bgcolor=0
 8 | #--crop
 9 | #--deskew
10 | --deskewlinesize=0.8
11 | --minwidthmask=0.06
12 | --minwidthhor=0.3
13 | --maxwidthhor=0.95
14 | --minheighthor=0.00
15 | --maxheighthor=0.03
16 | --minheighthormask=0.04
17 | --maxheighthormaskt=0.3
18 | --minheightver=0.0375
19 | --maxheightvert=0.95
20 | --minwidthver=0.00
21 | --maxwidthver=0.022
22 | --minwidthvermask=0.35
23 | --maxwidthvermask=0.75
24 | --maxgradientver=0.05,
25 | --minsizeblank=0.016
26 | --minsizeblankobolustop=0.014
27 | --parallel=3
28 | #--plot
29 | --ramp=None
30 | --adaptingmasksoff
31 | #--showmasks
32 | #--splice
33 | --splicetypes=['a', 'b', 'c']
34 | --splicemaintype='c'
35 | #--splicemaintypestop
36 | --threshwindow=31
37 | --threshweight=0.2
38 | #--quiet


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 | # This is not actually used. Because it would take an overly long time
 4 | # to build scipy we cannot use the virtual env of travis. Instead, we
 5 | # use miniconda.
 6 |   - "2.7"
 7 | 
 8 | sudo: false
 9 | 
10 | cache:
11 |   directories:
12 |     - $HOME/.cache/matplotlib
13 | 
14 | install:
15 |   # Install miniconda
16 |   # -----------------
17 |   - if [[ "$TRAVIS_PYTHON_VERSION" == 2* ]]; then
18 |       wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
19 |     else
20 |       wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
21 |     fi
22 |   - bash miniconda.sh -b -p $HOME/miniconda
23 |   - export PATH="$HOME/miniconda/bin:$PATH"
24 | 
25 |   # Create the basic testing environment
26 |   # ------------------------------------
27 |   - conda config --set always_yes yes --set changeps1 no --set show_channel_urls yes
28 |   - conda update conda
29 |   - conda create -n test-environment python=$TRAVIS_PYTHON_VERSION
30 |   - source activate test-environment
31 | 
32 |   # Customise the testing environment
33 |   # ---------------------------------
34 |   - conda install --file requirements.txt
35 |   - pip install --upgrade scikit-image
36 | 
37 |   # Conda debug
38 |   #-----------
39 |   - conda list
40 | 
41 |   # Install crass
42 |   # --------------
43 |   - python setup.py install
44 | 
45 | 
46 | script:
47 | - python ./crass.py --input "./test/testimg.jpg" --extension  "jpg"


--------------------------------------------------------------------------------
/docs/basic-concepts.md:
--------------------------------------------------------------------------------
 1 | Basic Concepts
 2 | ==============
 3 | 
 4 | The very basic object `crass` operates on is a **page** with **separator
 5 | lines**. First the **page** will be cropped into **segments** based on the
 6 | **separator lines** and afterwards spliced together to a new image.
 7 | There must be at least one vertical and one horizontal line that the code
 8 | runs correctly. In an additionally preprocessing step, `crass` might detect
 9 | the rotation of the page and will rotating it to the correct angle.
10 | This process is called **"deskewing"**.
11 | 
12 | ![Sheets and Pages](img/basic_concept.png)
13 | 
14 | ## Input and Output Image Files
15 | 
16 | `crass` can process either one **page** or a **folder** containing several
17 | pages with the same extension. The output files will have the same extension
18 | as the inputs ones.
19 | 
20 | By default, `crass` places the single segments and the debug information resp. images
21 | into the directory "out/..", e.g.
22 | the subdirectory "out/spliced/.." will contain the final spliced images.
23 | 
24 | ### File Formats
25 | 
26 | The *image-file* format accepted by `crass` is *jpg*.
27 | 
28 | ## Single Steps to Victory
29 | 
30 | ### Deskew
31 | 1.  Find the top or bottom horizontal line.
32 | 2.  Compute the deskew angle.
33 | 3.  Rotate to the correct angle.
34 | 
35 | ![Deskew](img/basic_concept_deskew.png)
36 | 
37 | ### Line Analyse
38 | 1. Find the top or bottom horizontal line.
39 | 2. Finds all vertical lines in the middle (by default) of the page.
40 | 
41 | ![Deskew](img/basic_concept_line_analyse.png)
42 | 
43 | ### Crop
44 | 1. Compute the clipping masks.
45 |     * There are 5 types of segments:
46 |         - h = header
47 |         - a = left side separated by a vertical line
48 |         - b = right side separated by a vertical line
49 |         - c = space between header and vertical line or vertical line and another vertical line
50 |         - f = footer
51 | 2. Crop the single segments
52 |     (by default: the footer and header information will also be stored)
53 |     
54 | ![Crop](img/basic_concept_crop.png)
55 | 
56 | ### Splice
57 | 1. Splice the single segments according to a certain order
58 |     (by default, a then b then a etc. until a c segments forms the end).
59 |     
60 | ![Splice](img/basic_concept_splice.png)
61 | 
62 | ## Further Information
63 | 
64 | You can find more detailed information about the single steps and setting
65 | options in the [image processing][1] documentation.
66 | 
67 | [1]: image-processing.md


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![crass logo](docs/img/crass_logo.png)  
 2 | crop and splice segments  
 3 | ========================  
 4 | [![Build Status](https://travis-ci.org/UB-Mannheim/crass.svg?branch=master)](https://travis-ci.org/UB-Mannheim/crass)
 5 | ![Python 2.7](https://img.shields.io/badge/python-2.7-yellow.svg)
 6 | ![license](https://img.shields.io/badge/license-Apache%20License%202.0-blue.svg)
 7 | 
 8 | 
 9 | Overview
10 | --------
11 | 
12 | **crass** is a command line driven post-processing tool for scanned sheets of paper.
13 | The main purpose is to crop segments based on separator lines and splice them afterwards
14 | together in the reading order.
15 | 
16 | ![example-workflow](docs/img/basic_concept.png)
17 | 
18 | It is part of the [Aktienführer-Datenarchiv work process][akf-link],
19 | but can also be used independently. 
20 | 
21 | 
22 | Building instructions
23 | ---------------------
24 | 
25 | Dependencies can be installed into a Python Virtual Environment:
26 | 
27 |     $ virtualenv crass_venv/  
28 |     $ source crass_venv/bin/activate  
29 |     $ pip install -r requirements.txt  
30 |     $ python setup.py install  
31 | 
32 | An alternative method using Conda is also possible:
33 | 
34 |     $ conda create -n crass_env python=2.7  
35 |     $ source activate crass_env  
36 |     $ conda install --file requirements.txt 
37 |     $ python setup.py install  
38 | 
39 | The third alternative is to use the [Dockerfile](Dockerfile):
40 | 
41 |     $ docker build -t crass-app .
42 |     $ docker run -it --rm -v "$PWD":/usr/src/app crass-app bash
43 | 
44 | 
45 | Running
46 | -------
47 | 
48 | Here is an example for a page:
49 | 
50 |     # perform deskewing, crop and splice of a page
51 |     $ python ./crass.py "./test/testimg.jpg" "jpg" 
52 |     
53 |     # perform deskewing, crop and splice of a page 
54 |     # the horziontal line is in the bottom area and is bound to the footer
55 |     $ python ./crass.py "./test/testimg_bottom_skew.jpg" "jpg" --horlinepos 2 --horlinetype 1
56 | 
57 | The resulting files will be saved in the `out/` subdirectory (relative to the image file(s)) in several subsubdirectories.
58 | 
59 | 
60 | Further Information
61 | -------------------
62 | 
63 | You can find more information on the [basic concepts][basic-link] and the
64 | [image processing][img-link] in the available documentation.
65 | 
66 | 
67 | Copyright and License
68 | ----
69 | 
70 | Copyright (c) 2017 Universitätsbibliothek Mannheim
71 | 
72 | Author: [Jan Kamlah](https://github.com/jkamlah)
73 | 
74 | **crass** is Free Software. You may use it under the terms of the Apache 2.0 License.
75 | See [LICENSE](./LICENSE) for details.
76 | 
77 | 
78 | [akf-link]:  https://github.com/UB-Mannheim/Aktienfuehrer-Datenarchiv-Tools
79 | [basic-link]: docs/basic-concepts.md
80 | [img-link]: docs/image-processing.md
81 | 


--------------------------------------------------------------------------------
/docs/image-processing.md:
--------------------------------------------------------------------------------
  1 | Image Processing Features
  2 | =========================
  3 | 
  4 | The automatic processing will sometimes fail with the standard parameter settings. It is always a
  5 | good idea to manually control the results of `crass` and adjust the
  6 | parameter settings according to the requirements of the input.
  7 | 
  8 | ## Processing Order
  9 | 
 10 | Processing of the filters and auto-corrections is performed in a fixed
 11 | order according to the following sequence:
 12 | 
 13 |  1. **load** image file(s)
 14 |  2. perform **deskewing** (optional)
 15 |     1. **save** deskewed image file
 16 |     2. **load** deskewed image file
 17 |  3. **find** the linecoordination informations.
 18 |  4. compute **masks**
 19 |  5. cropping the areas under the **masks**
 20 |  6. **save** single segments and debuginformation (optional)
 21 |  7. **load** segments
 22 |  8. splice **segments** to a new image
 23 |  9. **save** output image file
 24 | 
 25 | ![Processing Order](img/processing-order.png)
 26 | 
 27 | ## Disabling Processing Steps
 28 | 
 29 | Each processing step can be disabled individually by a corresponding
 30 | `--xxx` option (where `xxx` stands for the feature to disable). 
 31 | 
 32 |     ./crass (...options...) --deskew
 33 | 
 34 | This will disable the *deskew*.
 35 | 
 36 | ## Multiprocessing
 37 | You can fully leverage multiple processors on a given machine with 
 38 | the feature "parallel" and the number of process. Recommended is the
 39 | number of available processors - 1 (which is used by the OS).
 40 | 
 41 |     ./crass (...options...) --parallel 3  # for 4 processors
 42 | 
 43 | ## Threshold - Sauvola
 44 | [Sauvola threshold][1] is a local thresholding technique that is useful   
 45 | for images where the background is not uniform, especially for text   
 46 | recognition. Instead of calculating a single global threshold   
 47 | for the entire image, several thresholds are calculated for every pixel by   
 48 | using specific formulae that take into account the mean and standard  
 49 | deviation of the local neighborhood (defined by a window centered around   
 50 | the pixel).
 51 | 
 52 | In the original method a threshold T is calculated for every pixel in the   
 53 | image using the following formula:
 54 | 
 55 | [T = m(x,y) * (1 + k * ((s(x,y) / R) - 1))] [2]
 56 | 
 57 | where m(x,y) and s(x,y) are the mean and standard deviation of pixel (x,y)   
 58 | neighborhood defined by a rectangular window with size w times w centered   
 59 | around the pixel. k is a configurable parameter that weights the effect of   
 60 | standard deviation. R is the maximum standard deviation of a greyscale image.
 61 | 
 62 | ## Linecoord Analyse
 63 | ### The different linetypes to look for
 64 | 
 65 | hor -> Horizontal line 
 66 | ver -> Vertical line
 67 | 
 68 |     xxxver
 69 |     xxxhor
 70 | 
 71 | (where `xxx` stands for the feature to set)
 72 | 
 73 | ### The analyse of the Linecoordination can be influenced with the following parameters:
 74 |     
 75 | minwidth of the linetype + factor of the image width  
 76 | maxwidth of the linetype + factor of the image width  
 77 | minheight of the linetype + factor of the image height  
 78 | maxheight of the linetype + factor of the image height  
 79 | 
 80 |     ./crass (...options...) --minwidthver 0.5 
 81 | 
 82 | This will set the *minwidth of the vertical lines to find* to 50% of the
 83 | image width.
 84 |         
 85 | ### Mask options (search area) for the different linetypes:
 86 | 
 87 | #### Horizontal:   
 88 | minheighthormask + factor of the image height  
 89 | maxheighthormask + factor of the image height
 90 | 
 91 | 0.0 -> 0% height -> top of the image  
 92 | 1.0 -> 100% height -> bottom of the image  
 93 | 
 94 |     ./crass (...options...) --minheightplmask 0.0 --maxheightplmask 0.3 
 95 | 
 96 | This will set the mask (search area) between 0% (top of the image) and 
 97 | 30% of the images height.
 98 |         
 99 | #### Vertical:
100 | minwidthver + factor of the image width  
101 | maxwidthver + factor of the image width  
102 | 
103 | 0.0 -> 0% width -> left side of the image  
104 | 1.0 -> 100% width -> right side of the image  
105 |  
106 |     ./crass (...options...) --minwidthvermask 0.3 --maxwidthvermask 0.7 
107 | 
108 | This will set the mask (search area) between 30% and 70% of the image.
109 | 
110 | 
111 | ## Setting Clipping Masks
112 | Clipping mask mark the area to be crop out. This area will be compute 
113 | automatically but the user can set some extra parameters.
114 | ### Add startheight
115 | *Addstartheight* expands the mask of either a&b or c to the top of the image.
116 | 
117 |      ./crass (...options...) --addstartheightab 50
118 |      ./crass (...options...) --addstartheightc 50
119 | 
120 | This will expand the area 50 pixels to the top.
121 | 
122 | ### Add stopheight
123 | *Addstopheight* expands the mask of either a&b or c to the bottom of the image.     
124 |      
125 |      ./crass (...options...) --addstopheightab 50
126 |      ./crass (...options...) --addstopheightc 50
127 |      
128 | This will expand the area 50 pixels to the bottom.     
129 | 
130 | ## Splice Pattern
131 | 
132 | ### Splicetypes
133 | There are 5 types of segments:
134 |     
135 |    - h = header
136 |    - a = left side separated by a vertical line
137 |    - b = right side separated by a vertical line
138 |    - c = space between header and vertical line or vertical line and another vertical line
139 |    - f = footer
140 |   
141 |     ./crass (...options...) --splicetypes a,b,f
142 |     
143 | Only the segments of the types a, b and f will be considered in the splicing
144 | process. (Default. a,b and c)
145 | 
146 | ### Splicemaintype
147 | The splicemaintype starts or ends (depending on the *splicemaintypestart*-Option) the splicepattern.
148 | 
149 | #### Splicemaintypestop
150 | The splicemaintypestop set the maintype to the end of each segment not the start (default).
151 | 
152 |     ./crass (...options...) --splicetypes a,b,h
153 |     ./crass (...options...) --splicemaintype h
154 | 
155 | Only the segments of the types a, b and h will be considered in the splicing
156 | process. The pattern will start with a h-segment. There can be several a and b segments
157 | in between.
158 | 
159 | [1]: http://scikit-image.org/docs/dev/auto_examples/segmentation/plot_niblack_sauvola.html
160 | [2]: http://scikit-image.org/docs/dev/api/skimage.filters.html#skimage.filters.threshold_sauvola
161 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2017 Universitätsbibliothek Mannheim
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/crass.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | ###################### INFORMATION ##############################
  3 | #          crass-Addin to crop and splice segments of an image
  4 | #          optional preprocess: deskew
  5 | #Program:  **crass**
  6 | #Info:     **Python 2.7**
  7 | #Author:   **Jan Kamlah**
  8 | #Date:     **13.06.2017**
  9 | ####################### IMPORT ##################################
 10 | import argparse
 11 | import copy
 12 | import glob
 13 | import logging
 14 | import multiprocessing
 15 | import os
 16 | import warnings
 17 | 
 18 | import numpy as np
 19 | import scipy.misc as misc
 20 | import skimage as ski
 21 | import skimage.color as color
 22 | import skimage.filters.thresholding as th
 23 | import skimage.morphology as morph
 24 | import skimage.transform as transform
 25 | from scipy.ndimage import measurements
 26 | from skimage.io import imread, imsave
 27 | 
 28 | 
 29 | ####################### CMD-PARSER-SETTINGS ########################
 30 | def get_parser():
 31 |     parser = argparse.ArgumentParser(description="Crop And Splice Segments (CRASS) of an image based on black (separator-)lines")
 32 |     parser.add_argument("--input", type=str, default="",
 33 |                         help='Input file or folder')
 34 | 
 35 |     parser.add_argument("--extension", type=str, choices=["bmp","jpg","png","tif"], default="jpg", help='Extension of the files, default: %(default)s')
 36 | 
 37 |     parser.add_argument('-A', '--addstartheightab', type=float, default=0.01, choices=np.arange(-1.0, 1.0), help='Add some pixel for the clipping mask of segments a&b (startheight), default: %(default)s')
 38 |     parser.add_argument('-a', '--addstopheightab', type=float, default=0.011, choices=np.arange(-1.0, 1.0),help='Add some pixel for the clipping mask of segments a&b (stopheight), default: %(default)s')
 39 |     parser.add_argument('-C', '--addstartheightc', type=float, default=-0.005, choices=np.arange(-1.0, 1.0),help='Add some pixel for the clipping mask of segment c (startheight), default: %(default)s')
 40 |     parser.add_argument('-c', '--addstopheightc', type=float, default=0.0, choices=np.arange(-1.0, 1.0),help='Add some pixel for the clipping mask of segment c (stopheight), default: %(default)s')
 41 |     parser.add_argument('--bgcolor', type=int, default=1,help='Backgroundcolor of the splice image (for "uint8": 0=black,...255=white): %(default)s')
 42 |     parser.add_argument('--crop', action="store_false", help='cropping paper into segments')
 43 |     parser.add_argument("--croptypes", type=str, nargs='+', choices=['a', 'b', 'c', 'f', 'h'],
 44 |                         default=['a', 'b', 'c', 'f', 'h'],
 45 |                         help='Types to be cropped out, default: %(default)s')
 46 |     parser.add_argument('--cutwhite', action="store_true",
 47 |                         help='Cut a white area on the left side of the image')
 48 |     parser.add_argument('--deskew', action="store_false", help='preprocessing: deskewing the paper')
 49 |     parser.add_argument('--deskewlinesize', type=float, default=0.8, choices=np.arange(0.1, 1.0),
 50 |                         help='Percantage of the horizontal line to compute the deskewangle: %(default)s')
 51 |     parser.add_argument('--deskewonly', action="store_true",
 52 |                         help='Only deskew the image')
 53 |     parser.add_argument('--tablesplit', action="store_true",
 54 |                         help='Split a table with coordinates')
 55 |     parser.add_argument('--tablesplice', action="store_true",
 56 |                         help='Split a table with coordinates and merge among themselve')
 57 |     parser.add_argument("--tablewidth", type=int, default=2700,
 58 |                         help='Tablesplit/splice parameter, size of the table in pixel.')
 59 |     parser.add_argument("--tablemaxdiff", type=int, default=200,
 60 |                         help='Tablesplit/splice parameter, max range from tablestart to the right.')
 61 |     parser.add_argument("--tablecolumns", type=int, default=3,
 62 |                         help='Tablesplit/splice parameter, number of columns.')
 63 |     parser.add_argument("--tableoffset", type=int, default=10,
 64 |                         help='Tablesplit/splice parameter, offset of the spliced parts.')
 65 |     parser.add_argument("--binary_dilation", type=int, choices=[0, 1, 2, 3], default=0,
 66 |                         help='Dilate x-times the binarized areas.')
 67 |     parser.add_argument("--horlinepos", type=int, choices=[0, 1, 2, 3], default=0,
 68 |                         help='Position of the horizontal line(0:top, 1:right,2:bottom,3:left), default: %(default)s')
 69 |     parser.add_argument("--horlinetype", type=int, choices=[0, 1], default=0,
 70 |                         help='Type of the horizontal line (0:header, 1:footer), default: %(default)s')
 71 |     parser.add_argument("--imgmask", type=float, nargs=4, default=[0.0,1.0,0.0,1.0], help='Set a mask that only a specific part of the image will be computed, arguments =  Heightstart, Heightend, Widthstart, Widthend')
 72 |     parser.add_argument('--minwidthmask', type=float, default=0.06, choices=np.arange(0, 0.5),
 73 |                         help='min widthdistance of all masks, default: %(default)s')
 74 |     parser.add_argument('--minwidthhor', type=float, default=0.3, choices=np.arange(0, 1.0), help='minwidth of the horizontal lines, default: %(default)s')
 75 |     parser.add_argument('--maxwidthhor', type=float, default=0.95,choices=np.arange(-1.0, 1.0), help='maxwidth of the horizontal lines, default: %(default)s')
 76 |     parser.add_argument('--minheighthor', type=float, default=0.00, choices=np.arange(0, 1.0), help='minheight of the horizontal lines, default: %(default)s')
 77 |     parser.add_argument('--maxheighthor', type=float, default=0.95, choices=np.arange(0, 1.0), help='maxheight of the horizontal lines, default: %(default)s')
 78 |     parser.add_argument('--minheighthormask', type=float, default=0.04, choices=np.arange(0, 1.0), help='minheight of the horizontal lines mask (search area), default: %(default)s')
 79 |     parser.add_argument('--maxheighthormask', type=float, default=0.95, choices=np.arange(0, 1.0), help='maxheight of the horizontal lines mask (search area), default: %(default)s')
 80 |     parser.add_argument('--minheightver', type=float, default=0.0375, choices=np.arange(0, 1.0), help='minheight of the vertical lines, default: %(default)s')  # Value of 0.035 is tested (before 0.05)
 81 |     parser.add_argument('--maxheightver', type=float, default=0.95, choices=np.arange(0, 1.0), help='maxheightof the vertical lines, default: %(default)s')
 82 |     parser.add_argument('--minwidthver', type=float, default=0.00, choices=np.arange(0, 1.0), help='minwidth of the vertical lines, default: %(default)s')
 83 |     parser.add_argument('--maxwidthver', type=float, default=0.022, choices=np.arange(0, 1.0), help='maxwidth of the vertical lines, default: %(default)s')
 84 |     parser.add_argument('--minwidthvermask', type=float, default=0.35, choices=np.arange(0, 1.0), help='minwidth of the vertical lines mask (search area), default: %(default)s')
 85 |     parser.add_argument('--maxwidthvermask', type=float, default=0.75, choices=np.arange(0, 1.0), help='maxwidth of the vertical lines mask (search area), default: %(default)s')
 86 |     parser.add_argument('--maxgradientver', type=float, default=0.05, choices=np.arange(0, 1.0), help='max gradient of the vertical lines: %(default)s')
 87 |     # 0.016
 88 |     parser.add_argument('--minsizeblank', type=float, default=0.015, choices=np.arange(0, 1.0), help='min size of the blank area between to vertical lines, default: %(default)s')
 89 |     parser.add_argument('--minsizeblankobolustop', type=float, default=0.014, choices=np.arange(0, 1.0),help='min size of the blank area between to vertical lines, default: %(default)s')
 90 |     parser.add_argument('--nomnumber', type=int, default=4,help='Sets the quantity of numbers in the nomenclature (for "4": 000x_imagename): %(default)s')
 91 |     parser.add_argument('--parallel', type=int, default=1, help="number of CPUs to use, default: %(default)s")
 92 |     parser.add_argument('--ramp', default=None, help='activates the function whiteout')
 93 |     parser.add_argument('--adaptingmasksoff', action="store_true", help='deactivates adapting maskalgorithm')
 94 |     parser.add_argument('--showmasks', action="store_false", help='output an image with colored masks')
 95 |     parser.add_argument('--specialnomoff', action="store_false", help='Disable the special nomenclature for the AKF-Project!')
 96 |     parser.add_argument('--splice', action="store_false", help='splice the cropped segments')
 97 |     parser.add_argument("--splicetypes", type=str, nargs='+', choices=['a', 'b', 'c', 'f', 'h'],
 98 |                         default=['a', 'b', 'c'],
 99 |                         help='Segmenttypes to be spliced, default: %(default)s')
100 |     parser.add_argument("--splicemaintype", type=str, choices=['a', 'b', 'c', 'f', 'h'], default='c',
101 |                         help='Segmenttype that indicates a new splice process, default: %(default)s')
102 | 
103 |     parser.add_argument('--splicemaintypestop', action="store_true",
104 |                         help='The maintype of splicetyps will be placed on the end')
105 |     parser.add_argument('--threshwindow', type=int, default=31, help='Size of the window (binarization): %(default)s')
106 |     parser.add_argument('--threshweight', type=float, default=0.2, choices=np.arange(0, 1.0), help='Weight the effect of the standard deviation (binarization): %(default)s')
107 |     parser.add_argument('--woblankstop', action="store_true",
108 |                         help='Deactivates the whiteout of the blank parts for the a & b parts, this will lead to less memory usage.')
109 |     parser.add_argument('-q', '--quiet', action='store_true', help='be less verbose, default: %(default)s')
110 | 
111 |     args = parser.parse_args()
112 |     return args
113 | 
114 | ####################### LOGGER-FILE-SETTINGS ########################
115 | logging.basicConfig(filename=os.path.dirname(get_parser().input) + os.path.normcase('//Logfile_crass.log'), level=logging.DEBUG,
116 |                     format='%(asctime)s %(message)s',
117 |                     datefmt='%m/%d/%Y %I:%M:%S %p')
118 | 
119 | ####################### CLASSES & METHODS ###########################
120 | class Clippingmask():
121 |     def __init__(self, image):
122 |         self.height_start, self.width_start = 0, 0
123 |         if len(image.shape) > 2:
124 |             self.height_stop, self.width_stop, self.rgb = image.shape
125 |         else:
126 |             self.height_stop, self.width_stop = image.shape
127 |         self.user = None
128 | 
129 | class ImageParam():
130 |     def __init__(self, image, input):
131 |         if len(image.shape) > 2:
132 |             self.height, self.width, self.rgb = image.shape
133 |         else:
134 |             self.height, self.width = image.shape
135 |         self.path = os.path.dirname(input)
136 |         self.pathout = os.path.normpath(os.path.dirname(input)+"/out/")
137 |         self.deskewpath = None
138 |         self.name = os.path.splitext(os.path.basename(input))[0]
139 | 
140 | class Linecoords():
141 |     def __init__(self, binary, value ,object):
142 |         self.height_start = object[0].start
143 |         self.height_stop = object[0].stop
144 |         self.width_start = object[1].start
145 |         self.width_stop = object[1].stop
146 |         self.middle = None
147 |         self.object = object
148 |         self.object_value = value
149 |         self.object_matrix = copy.deepcopy(binary[object])
150 |         self.segmenttype = None
151 | 
152 | class SpliceParam():
153 |     def __init__(self, input, parts):
154 |         self.name = os.path.splitext(input)[0]
155 |         self.segment = parts[len(parts)-2]
156 |         self.segmenttype = parts[len(parts)-1]
157 | 
158 | ####################### FUNCTIONS ##################################
159 | def create_dir(newdir):
160 |     if not os.path.isdir(newdir):
161 |         try:
162 |             os.mkdir(newdir)
163 |             print(newdir)
164 |         except IOError:
165 |             print("cannot create %s directoy" % newdir)
166 | 
167 | def crop(args, image, image_param, labels,list_linecoords, clippingmask):
168 |     # Crops the segments based on the given linecoords
169 |     # and export the linecoords into a txt file
170 |     create_dir(image_param.pathout+os.path.normcase("/segments/"))
171 |     filepath = image_param.pathout+os.path.normcase("/segments/")+image_param.name
172 |     create_dir(image_param.pathout+os.path.normcase("/coords/"))
173 |     coordstxt = open(image_param.pathout+os.path.normcase("/coords/")+image_param.name+"_coords.txt", "w")
174 |     coordstxt.write("Image resolution:\t%d\t%d\n" % (image_param.height, image_param.width))
175 |     pixelheight = set_pixelground(image_param.height)
176 |     image = np.rot90(image, args.horlinepos)
177 |     if args.showmasks == True:
178 |         debugimage = color.gray2rgb(copy.deepcopy(image))
179 |     for idx, linecoords in enumerate(list_linecoords):
180 |         # Header
181 |         if idx == 0:
182 |             if not args.quiet: print "header"
183 |             roi = image[0:linecoords.height_start - 2, 0:image_param.width]  # region of interest
184 |             roi = np.rot90(roi, 4-args.horlinepos)
185 |             with warnings.catch_warnings():
186 |                 # Transform rotate convert the img to float and save convert it back
187 |                 warnings.simplefilter("ignore")
188 |                 if args.horlinetype == 1 and 'f' in args.croptypes:
189 |                     imsave("%s_%d_f.%s" % (filepath, len(list_linecoords)+2, args.extension), roi)
190 |                 elif 'h' in args.croptypes:
191 |                     imsave("%s_%d_h.%s" % (filepath, idx, args.extension), roi)
192 |                     coordstxt.write("Header:  \t%d\t%d\t%d\t%d\n" % (0,linecoords.height_start - 2, 0,image_param.width))
193 |             if args.showmasks == True:
194 |                 dim = 0
195 |                 if args.horlinetype == 1:
196 |                     dim = 1
197 |                 set_colored_mask(debugimage, [[0, linecoords.height_start - 2],
198 |                                               [0, image_param.width]], dim, 100)
199 |         # Crop middle segments
200 |         if linecoords.segmenttype == 'B':
201 |             if not args.quiet: print "blank"
202 |             if args.adaptingmasksoff != True:
203 |                 if linecoords.middle - clippingmask.width_start > clippingmask.width_stop - linecoords.middle:
204 |                     linecoords.width_start = linecoords.middle - (clippingmask.width_stop - linecoords.middle)
205 |                     linecoords.width_stop = linecoords.middle + (clippingmask.width_stop - linecoords.middle)
206 |                 else:
207 |                     linecoords.width_start = linecoords.middle - (linecoords.middle - clippingmask.width_start)
208 |                     linecoords.width_stop = linecoords.middle + (linecoords.middle - clippingmask.width_start)
209 |             # Add sum extra space to the cords
210 |             roi = image[linecoords.height_start + 2 - pixelheight(args.addstartheightc):linecoords.height_stop - 2 +pixelheight(args.addstopheightc),
211 |                   linecoords.width_start:linecoords.width_stop]  # region of interest
212 |             roi = np.rot90(roi, 4 - args.horlinepos)
213 |             with warnings.catch_warnings():
214 |                 # Transform rotate convert the img to float and save convert it back
215 |                 warnings.simplefilter("ignore")
216 |                 if args.horlinetype == 1:
217 |                     idx = len(list_linecoords) - idx
218 |                 if 'c' in args.croptypes:
219 |                     imsave("%s_%d_c.%s" % (filepath, idx+1, args.extension), roi)
220 |                     coordstxt.write(
221 |                         "C-Seg: \t%d\t%d\t%d\t%d\n" % (linecoords.height_start + 2 - pixelheight(args.addstartheightc),linecoords.height_stop - 2 +pixelheight(args.addstopheightc),
222 |                   linecoords.width_start,linecoords.width_stop))
223 |             if args.showmasks == True:
224 |                 dim = 1
225 |                 set_colored_mask(debugimage, [[linecoords.height_start + 2- pixelheight(args.addstartheightc), linecoords.height_stop - 2 +pixelheight(args.addstopheightc)],
226 |                                               [linecoords.width_start, linecoords.width_stop]], dim, 220)
227 |         if linecoords.segmenttype == 'L':
228 |             #Fixing column size
229 |             if args.adaptingmasksoff != True:
230 |                 if linecoords.width_stop - clippingmask.width_start > clippingmask.width_stop - linecoords.width_start:
231 |                     clippingmask.width_start = linecoords.width_stop - (clippingmask.width_stop - linecoords.width_start)
232 |                 else:
233 |                     clippingmask.width_stop = linecoords.width_start + linecoords.width_stop - clippingmask.width_start
234 |             if idx == 0:
235 |                 print "line-first"
236 |                 #linecoords.height_start = clippingmask.height_start + 17
237 |             if not args.quiet: print "line"
238 |             if args.woblankstop == False:
239 |                 whiteout_blank(image, labels, linecoords.height_start- pixelheight(args.addstartheightab),linecoords.height_stop + pixelheight(args.addstopheightab)-linecoords.height_start- pixelheight(args.addstartheightab))
240 |             roi = image[linecoords.height_start - pixelheight(args.addstartheightab):linecoords.height_stop + pixelheight(args.addstopheightab),
241 |                   clippingmask.width_start:linecoords.width_stop - 2]  # region of interest
242 |             roi = np.rot90(roi, 4 - args.horlinepos)
243 |             with warnings.catch_warnings():
244 |                 # Transform rotate convert the img to float and save convert it back
245 |                 warnings.simplefilter("ignore")
246 |                 if args.horlinetype == 1 and 'b' in args.croptypes:
247 |                     idx = len(list_linecoords) - idx
248 |                     imsave("%s_%d_b.%s" % (filepath, idx, args.extension), roi)
249 |                 elif 'a' in args.croptypes:
250 |                     imsave("%s_%d_a.%s" % (filepath, idx+1, args.extension), roi)
251 |                     coordstxt.write(
252 |                         "A-Seg:\t%d\t%d\t%d\t%d\n" % (linecoords.height_start - pixelheight(args.addstartheightab),linecoords.height_stop + pixelheight(args.addstopheightab),
253 |                   clippingmask.width_start,linecoords.width_stop - 2))
254 |             if args.showmasks == True:
255 |                 dim = 2
256 |                 if args.horlinetype == 1:
257 |                     dim = 0
258 |                 set_colored_mask(debugimage, [[linecoords.height_start - pixelheight(args.addstartheightab),
259 |                                                linecoords.height_stop + pixelheight(args.addstopheightab)],
260 |                                               [clippingmask.width_start, linecoords.width_stop - 2]], dim, 180)
261 |             roi = image[linecoords.height_start - pixelheight(args.addstartheightab):linecoords.height_stop + pixelheight(args.addstopheightab),
262 |                   linecoords.width_start + 1:clippingmask.width_stop]
263 |             roi = np.rot90(roi, 4 - args.horlinepos)
264 |             with warnings.catch_warnings():
265 |                 # Transform rotate convert the img to float and save convert it back
266 |                 warnings.simplefilter("ignore")
267 |                 if args.horlinetype == 1 and 'a' in args.croptypes:
268 |                     imsave("%s_%d_a.%s" % (filepath, idx, args.extension), roi)
269 |                 elif 'a' in args.croptypes:
270 |                     imsave("%s_%d_b.%s" % (filepath, idx+1, args.extension), roi)
271 |                     coordstxt.write(
272 |                         "B-Seg:\t%d\t%d\t%d\t%d\n" % (linecoords.height_start - pixelheight(args.addstartheightab),linecoords.height_stop + pixelheight(args.addstopheightab),
273 |                                                         linecoords.width_start + 1,clippingmask.width_stop))
274 |             if args.showmasks == True:
275 |                 dim = 0
276 |                 if args.horlinetype == 1:
277 |                     dim = 2
278 |                 set_colored_mask(debugimage, [[linecoords.height_start - pixelheight(args.addstartheightab),
279 |                                                linecoords.height_stop + pixelheight(args.addstopheightab)],
280 |                                               [linecoords.width_start + 1, clippingmask.width_stop]], dim, 180)
281 | 
282 |         # Footer
283 |         if idx == len(list_linecoords) - 1:
284 |             if not args.quiet: print "footer"
285 |             roi = image[linecoords.height_stop + 2:image_param.height,
286 |                   0:image_param.width]  # region of interest
287 |             roi = np.rot90(roi, 4 - args.horlinepos)
288 |             with warnings.catch_warnings():
289 |                 # Transform rotate convert the img to float and save convert it back
290 |                 warnings.simplefilter("ignore")
291 |                 if args.horlinetype == 1 and 'h' in args.croptypes:
292 |                     imsave("%s_%d_h.%s" % (filepath, 0, args.extension), roi)
293 |                 elif 'h' in args.croptypes:
294 |                     imsave("%s_%d_f.%s" % (filepath, idx+2, args.extension), roi)
295 |                     coordstxt.write(
296 |                         "Footer:  \t%d\t%d\t%d\t%d\n" % (linecoords.height_stop + 2,image_param.height, 0,image_param.width))
297 |             if args.showmasks == True:
298 |                 dim = 1
299 |                 if args.horlinetype == 1:
300 |                     dim = 0
301 |                 set_colored_mask(debugimage,
302 |                                  [[linecoords.height_stop + 2, image_param.height], [0, image_param.width]], dim,
303 |                                  100)
304 |     if args.showmasks == True:
305 |         with warnings.catch_warnings():
306 |             # Transform rotate convert the img to float and save convert it back
307 |             create_dir(image_param.pathout+os.path.normcase("/masks/"))
308 |             filename = (image_param.pathout+os.path.normcase("/masks/")+"%s_masked.%s" % (image_param.name, args.extension))
309 |             warnings.simplefilter("ignore")
310 |             debugimage = np.rot90(debugimage, 4 - args.horlinepos)
311 |             imsave(filename, debugimage)
312 |     coordstxt.close()
313 |     return 0
314 | 
315 | def cropping(input):
316 |     # Main cropping function that deskew, analyse and crops the image
317 |     # read image
318 |     print input
319 |     args = get_parser()
320 |     try:
321 |         image = imread("%s" % input)
322 |         image_param = ImageParam(image, input)
323 |         if args.imgmask != [0.0, 1.0, 0.0, 1.0]:
324 |             image = image[int(args.imgmask[0]*image_param.height):int(args.imgmask[1]*image_param.height),
325 |                     int(args.imgmask[2]*image_param.width):int(args.imgmask[3]*image_param.width)]
326 |             image_param = ImageParam(image, input)
327 |     except IOError:
328 |         print("cannot open %s" % input)
329 |         logging.warning("cannot open %s" % input)
330 |         return 1
331 |     create_dir(image_param.pathout)
332 |     ####################### DESKEW ####################################
333 |     if args.cutwhite:
334 |         if not args.quiet: print "start cutwhite"
335 |         cut_white(args, image, image_param)
336 |         return
337 |     # Deskew the loaded image
338 |     if args.deskew == True:
339 |         if not args.quiet: print "start deskew"
340 |         deskew(args, image, image_param)
341 |         try:
342 |             image = imread("%s" % (image_param.deskewpath))
343 |             image_param = ImageParam(image, input)
344 |         except IOError:
345 |             print("cannot open %s" % input)
346 |             logging.warning("cannot open %s" % input)
347 |             return
348 |     if args.deskewonly:
349 |         print "Only Deskew mode finished!"
350 |         return 0
351 |     ####################### SIMPLE TABLE SPLIT AND SPLICE #######################
352 |     if args.tablesplit or args.tablesplice:
353 |         if not args.quiet: print "start table split and splice"
354 |         table_split_and_splice(args, image, image_param)
355 |         return
356 |     ####################### ANALYSE - LINECOORDS #######################
357 |     if not args.quiet: print "start linecoord-analyse"
358 |     clippingmask = Clippingmask(image)
359 |     border, labels, list_linecoords, topline_width_stop = linecoords_analyse(args, image, image_param, clippingmask)
360 |     ####################### CROP #######################################
361 |     if args.crop == True:
362 |         if not args.quiet: print "start crop"
363 |         crop(args, image, image_param, labels, list_linecoords, clippingmask)
364 |     return 0
365 | 
366 | def cut_white(args, image, image_param):
367 |     uintimage = get_uintimg(image)
368 |     white_arr = np.array(uintimage).sum(axis=0) - 65535 * image_param.height
369 |     white_arr[-1] = 0
370 |     first_white_col = min(np.where(white_arr == 0)[0])
371 |     create_dir(image_param.pathout + os.path.normcase("/cutwhite/"))
372 |     deskew_path = "%s.%s" % (image_param.pathout + os.path.normcase("/cutwhite/") + image_param.name, args.extension)
373 |     misc.imsave(deskew_path, image[:, :first_white_col])
374 |     return
375 | 
376 | def deskew(args,image, image_param):
377 |     # Deskew the given image based on the horizontal line
378 |     # Calculate the angle of the points between 20% and 80% of the line
379 |     uintimage = get_uintimg(image)
380 |     binary = get_binary(args, uintimage)
381 |     for x in range(0,args.binary_dilation):
382 |         binary = ski.morphology.binary_dilation(binary,selem=np.ones((3, 3)))
383 |     labels, numl = measurements.label(binary)
384 |     objects = measurements.find_objects(labels)
385 |     deskew_path = None
386 |     for i, b in enumerate(objects):
387 |         linecoords = Linecoords(image, i, b)
388 |         # The line has to be bigger than minwidth, smaller than maxwidth, stay in the top (30%) of the img,
389 |         # only one obj allowed and the line isn't allowed to start contact the topborder of the image
390 |         if int(args.minwidthhor * image_param.width) < get_width(b) < int(args.maxwidthhor * image_param.width) \
391 |                 and int(image_param.height * args.minheighthor) < get_height(b) < int(image_param.height * args.maxheighthor) \
392 |                 and int(image_param.height * args.minheighthormask) < (linecoords.height_start+linecoords.height_stop)/2 < int(image_param.height * args.maxheighthormask) \
393 |                 and linecoords.height_start != 0:
394 | 
395 |             pixelwidth = set_pixelground(binary[b].shape[1])
396 |             #arr = np.arange(1, pixelwidth(args.deskewlinesize) + 1)
397 |             mean_y = []
398 |             #Calculate the mean value for every y-array
399 |             old_start = None
400 |             for idx in range(pixelwidth(args.deskewlinesize)):
401 |                 value_y = measurements.find_objects(labels[b][:, idx + pixelwidth((1.0-args.deskewlinesize)/2)] == i + 1)[0]
402 |                 if old_start is None:
403 |                     old_start = value_y[0].start
404 |                 #mean_y.append((value_y[0].stop + value_y[0].start) / 2)
405 |                 if abs(value_y[0].start-old_start) < 5:
406 |                     mean_y.append(value_y[0].start)
407 |                     old_start = value_y[0].start
408 |             #stuff = range(1, len(mean_y) - 1)
409 |             polyfit_value = np.polyfit(range(0,len(mean_y)), mean_y, 1)
410 |             deskewangle = np.arctan(polyfit_value[0]) * (360 / (2 * np.pi))
411 |             args.ramp = True
412 |             deskew_image = transform.rotate(image, deskewangle, mode="edge")
413 |             create_dir(image_param.pathout+os.path.normcase("/deskew/"))
414 |             deskew_path = "%s_deskew.%s" % (image_param.pathout+os.path.normcase("/deskew/")+image_param.name, args.extension)
415 |             deskewinfo = open(image_param.pathout+os.path.normcase("/deskew/")+image_param.name + "_deskewangle.txt", "w")
416 |             deskewinfo.write("Deskewangle:\t%f" % deskewangle)
417 |             deskewinfo.close()
418 |             image_param.deskewpath = deskew_path
419 |             with warnings.catch_warnings():
420 |                 #Transform rotate convert the img to float and save convert it back
421 |                 warnings.simplefilter("ignore")
422 |                 misc.imsave(deskew_path, deskew_image)
423 |             break
424 |     return deskew_path
425 | 
426 | def table_split_and_splice(args,image, image_param):
427 |     # This function splits an Image with an table by parameters
428 |     # and merge the fragment among each other
429 |     uintimage = get_uintimg(image)
430 |     binary = get_binary(args, uintimage)
431 |     for x in range(0,args.binary_dilation):
432 |         binary = ski.morphology.binary_dilation(binary,selem=np.ones((3, 3)))
433 |     labels, numl = measurements.label(binary)
434 |     objects = measurements.find_objects(labels)
435 |     for i, b in enumerate(objects):
436 |         linecoords = Linecoords(image, i, b)
437 |         if int(args.minwidthhor * image_param.width) < get_width(b) < int(
438 |                 args.maxwidthhor * image_param.width) \
439 |                 and int(image_param.height * args.minheighthor) < get_height(b) < int(
440 |             image_param.height * args.maxheighthor) \
441 |                 and int(image_param.height * args.minheighthormask) < (
442 |                 linecoords.height_start + linecoords.height_stop) / 2 < int(
443 |             image_param.height * args.maxheighthormask) \
444 |                 and linecoords.height_start != 0:
445 |             new_linecoords = objects[i]
446 |             linecoords.width_start = new_linecoords[1].start
447 |             linecoords.widthstop = new_linecoords[1].stop
448 |             table_width = args.tablewidth
449 |             max_table_diff = args.tablemaxdiff
450 |             columns = args.tablecolumns
451 |             spliceoffset = args.tableoffset
452 |             col_width = (get_width(b) /columns )
453 |             splitpoint = linecoords.width_start + (col_width)
454 |             if abs((get_width(b) / columns) - table_width) > max_table_diff:
455 |                 col_width = (table_width / columns)
456 |                 if linecoords.width_start < image_param.width * 0.2:
457 |                     splitpoint = linecoords.width_start + col_width
458 |                 else:
459 |                     splitpoint = linecoords.width_stop - (col_width * (columns - 1))
460 |             #Dynamical reszizing
461 |             img_width = splitpoint+spliceoffset
462 |             if columns > 1:
463 |                 last_splitpoint = splitpoint + (col_width * (columns - 2))-spliceoffset
464 |                 if splitpoint < (image_param.width - (last_splitpoint-spliceoffset)): img_width = image_param.width - (last_splitpoint-spliceoffset)
465 |             spliced_image = np.ones((image_param.height * columns, img_width, 3)) * (255*args.bgcolor)
466 |             startpoint = spliceoffset
467 |             for part in range(1, columns + 1):
468 |                 if part == columns:
469 |                     splitpoint = image_param.width - spliceoffset
470 |                 fragment = image[:, (startpoint - spliceoffset):( splitpoint + spliceoffset)]
471 |                 if args.tablesplit:
472 |                     create_dir(image_param.pathout + os.path.normcase("/tablesplit/"))
473 |                     misc.imsave("%s_deskew_%d.%s" % (
474 |                         image_param.pathout + os.path.normcase("/tablesplit/") + image_param.name,part,args.extension),
475 |                                 fragment)
476 |                 if args.tablesplice:
477 |                     spliced_image[image_param.height * (part - 1):image_param.height * part,
478 |                     :(splitpoint + spliceoffset) - (startpoint - spliceoffset)] = fragment
479 |                 startpoint = splitpoint - spliceoffset
480 |                 splitpoint += col_width
481 |             if args. tablesplice:
482 |                 create_dir(image_param.pathout + os.path.normcase("/tablesplice/"))
483 |                 misc.imsave("%s_deskew_merge.%s" % (
484 |                 image_param.pathout + os.path.normcase("/tablesplice/") + image_param.name, args.extension),
485 |                         spliced_image)
486 |             break
487 |     return
488 | 
489 | def get_binary(args, image):
490 |     thresh = th.threshold_sauvola(image, args.threshwindow, args.threshweight)
491 |     binary = image > thresh
492 |     binary = 1 - binary  # inverse binary
493 |     binary = np.rot90(binary, args.horlinepos)
494 |     return binary
495 | 
496 | def get_inputfiles(args):
497 |     input = args.input
498 |     if not os.path.isfile(input):
499 |         os.chdir(input)
500 |         inputfiles = []
501 |         for input in sorted(glob.glob("*.%s" % (args.extension))):
502 |             inputfiles.append(os.getcwd() + os.path.normcase("/") + input)
503 |     else:
504 |         inputfiles = []
505 |         inputfiles.append(input)
506 |     return inputfiles
507 | 
508 | def get_height(s):
509 |     return s[0].stop-s[0].start
510 | 
511 | def get_linecoords(s):
512 |     return [[s[0].start,s[0].stop],[s[1].start,s[1].stop]]
513 | 
514 | def get_mindist(s,length):
515 |     # Computes the min. distance to the border and cuts the smallest one in half
516 |     d1 = s[1].start
517 |     d2 = length - s[1].stop
518 |     if d1 < d2:
519 |         return d1-int(d1*0.5)
520 |     else:
521 |         return d2-int(d2*0.5)
522 | 
523 | def get_uintimg(image):
524 |     if len(image.shape) > 2:
525 |         uintimage = color.rgb2gray(copy.deepcopy(image))
526 |     else:
527 |         uintimage = copy.deepcopy(image)
528 |     if uintimage.dtype == "float64":
529 |         with warnings.catch_warnings():
530 |             # Transform rotate convert the img to float and save convert it back
531 |             warnings.simplefilter("ignore")
532 |             uintimage = ski.img_as_uint(uintimage, force_copy=True)
533 |     return uintimage
534 | 
535 | def get_width(s):
536 |     return s[1].stop-s[1].start
537 | 
538 | def linecoords_analyse(args,origimg, image_param, clippingmask):
539 |     # Computes the clipping coords of the masks
540 |     image = get_uintimg(origimg)
541 |     origimg = np.rot90(origimg, args.horlinepos)
542 |     binary = get_binary(args, image)
543 |     labels, numl = measurements.label(binary)
544 |     objects = measurements.find_objects(labels)
545 |     count_height = 0
546 |     count_width = 0
547 |     pixelheight = set_pixelground(image_param.height)
548 |     pixelwidth = set_pixelground(image_param.width)
549 |     list_linecoords = [] # Init list of linecoordinates the format is: [0]: width.start, width.stopt,
550 |     # [1]:height.start, height.stop, [2]: Type of line [B = blank, L = vertical line]
551 |     for i, b in enumerate(objects):
552 |         # The line has to be bigger than minwidth, smaller than maxwidth, stay in the top (30%) of the img,
553 |         # only one obj allowed and the line isn't allowed to start contact the topborder of the image
554 | 
555 |         linecoords = Linecoords(labels, i, b)
556 |         if pixelwidth(args.minwidthhor) <  get_width(b) < pixelwidth(args.maxwidthhor) \
557 |                 and pixelheight(args.minheighthor) < get_height(b) < pixelheight(args.maxheighthor) \
558 |                 and pixelheight(args.minheighthormask) <  linecoords.height_stop < pixelheight(args.maxheighthormask) \
559 |                 and count_width == 0 \
560 |                 and linecoords.height_start != 0:
561 |             # Distance Calculation - defining the clippingmask
562 |             border = get_mindist(b, image_param.width)
563 |             topline_width_stop = b[0].stop + 2 # Lowest Point of object + 2 Pixel
564 |             if clippingmask.user is None:
565 |                 clippingmask.width_start = border
566 |                 clippingmask.width_stop = image_param.width - border
567 |                 clippingmask.height_start = copy.deepcopy(topline_width_stop)
568 |                 clippingmask.height_stop = 0
569 |             # Get coordinats of the line
570 |             labels[b][labels[b] == i + 1] = 0
571 |             count_width += 1
572 |         if pixelheight(args.minheightver) < get_height(b) < pixelheight(args.maxheightver) \
573 |                 and pixelwidth(args.minwidthver) < get_width(b) < pixelwidth(args.maxwidthver) \
574 |                 and pixelwidth(args.minwidthvermask) < (linecoords.width_start+linecoords.width_stop)/2 < pixelwidth(args.maxwidthvermask) \
575 |                 and float(get_width(b))/float(get_height(b)) < args.maxgradientver:
576 |             linecoords.segmenttype = 'L' # Defaultvalue for segmenttype 'P' for horizontal lines
577 |             if count_height == 0:
578 |                 if b[0].start - topline_width_stop > pixelheight(args.minsizeblank+args.minsizeblankobolustop):
579 |                     blankline = Linecoords(labels,i,b)
580 |                     blankline.segmenttype = 'B'
581 |                     blankline.height_start = topline_width_stop
582 |                     blankline.height_stop = linecoords.height_start
583 |                     blankline.width_start = border
584 |                     blankline.width_stop = image_param.width - border
585 |                     blankline.middle = int(((linecoords.width_start+linecoords.width_stop)-1)/2)
586 |                     list_linecoords.append(copy.deepcopy(blankline))
587 |                     count_height += 1
588 |                     if args.ramp != None:
589 |                         whiteout_ramp(origimg, linecoords)
590 |                     list_linecoords.append(copy.deepcopy(linecoords))
591 |                     count_height += 1
592 |                 else:
593 |                     # Should fix to short vertical lines, in the height to top if they appear before any B Part in the image
594 |                     if topline_width_stop > 0:
595 |                         linecoords.height_start = topline_width_stop + pixelheight(args.addstartheightab)
596 |                     list_linecoords.append(copy.deepcopy(linecoords))
597 |                     count_height += 1
598 |                     if args.ramp != None:
599 |                         whiteout_ramp(origimg, linecoords)
600 |             elif list_linecoords[count_height - 1].height_stop < b[0].stop:
601 |                 #Test argument to filter braces
602 |                 if b[0].start - list_linecoords[count_height - 1].height_stop > pixelheight(args.minsizeblank):
603 |                     blankline = Linecoords(labels,i,b)
604 |                     blankline.segmenttype = 'B'
605 |                     blankline.height_start = list_linecoords[count_height - 1].height_stop
606 |                     blankline.height_stop = linecoords.height_start
607 |                     blankline.width_start = border
608 |                     blankline.width_stop = image_param.width - border
609 |                     blankline.middle = int(((linecoords.width_start+linecoords.width_stop)-1)/2)
610 |                     list_linecoords.append(copy.deepcopy(blankline))
611 |                     count_height += 1
612 |                     list_linecoords.append(copy.deepcopy(linecoords))
613 |                     if args.ramp != None:
614 |                         whiteout_ramp(origimg, linecoords)
615 |                     count_height += 1
616 |                     labels[b][labels[b] == i + 1] = 0
617 |                 else:
618 |                     if args.ramp != None:
619 |                         whiteout_ramp(origimg, linecoords)
620 |                     print b[0].stop
621 |                     list_linecoords[count_height - 1].height_stop = b[0].stop
622 |                     labels[b][labels[b] == i + 1] = 0
623 |     #imsave("%s_EDIT%d.%s" % (image_param.pathout, linecoords.object_value, args.extension), image)
624 |     return border, labels, list_linecoords, topline_width_stop
625 | 
626 | def set_colored_mask(image, borders, color, intensity):
627 |     # Colorize the masked areas and create a black border
628 |     image[borders[0][0]:borders[0][0]+5,borders[1][0]:borders[1][1]] = 0
629 |     image[borders[0][1]-6:borders[0][1]-1, borders[1][0]:borders[1][1]] = 0
630 |     image[borders[0][0]:borders[0][1], borders[1][0]:borders[1][0]+5] = 0
631 |     image[borders[0][0]:borders[0][1], borders[1][1]-6:borders[1][1]-1] = 0
632 |     # masks all values <= 55 to protect them against the color addition
633 |     masked_image = np.ma.greater(image[borders[0][0]:borders[0][1], borders[1][0]:borders[1][1], color],55)
634 |     image[borders[0][0]:borders[0][1],borders[1][0]:borders[1][1],color] += intensity
635 |     image[borders[0][0]:borders[0][1], borders[1][0]:borders[1][1], color] = image[borders[0][0]:borders[0][1],borders[1][0]:borders[1][1],color] *masked_image
636 |     return 0
637 | 
638 | def set_pixelground(image_length):
639 |     #Computes the real pixel number out of the given percantage
640 |     def get_pixel(prc):
641 |         return int(image_length*prc)
642 |     return get_pixel
643 | 
644 | def splice(args,inputdir):
645 |     #Search the segments pattern in the given directory and splice them together
646 |     #Spliceinfo writes a txt file with all segments in the spliced image
647 |     #prints(os.path.normpath(inputdir+os.path.normcase("\\segments\\")))
648 |     os.chdir(inputdir+os.path.normcase("/segments/"))
649 |     outputdir = inputdir + os.path.normcase("/splice/")
650 |     spliceinfo = list()
651 |     create_dir(outputdir)
652 |     list_splice = []
653 |     entry_count = 1
654 |     image = "Nothing!"
655 |     nomnumber = '{0:0>%d}' % args.nomnumber
656 |     for image in sorted(glob.glob("*.%s" % args.extension)):
657 |         if os.path.splitext(image)[0].split("_")[len(os.path.splitext(image)[0].split("_"))-1] in args.splicetypes:
658 |             splice_param = SpliceParam(inputdir, os.path.splitext(image)[0].split("_"))
659 |             if splice_param.segmenttype != args.splicemaintype:
660 |                 list_splice.append(image)
661 |                 spliceinfo.append(image)
662 |             else:
663 |                 if not args.quiet: print "splice %s" % image
664 |                 if args.splicemaintypestop:
665 |                     list_splice.append(image)
666 |                     spliceinfo.append(image)
667 |                 if len(list_splice) != 0:
668 |                     segments = [misc.imread(img,mode='RGB') for img in list_splice]
669 |                     img_height = sum(segment.shape[0] for segment in segments)
670 |                     img_width = max(segment.shape[1] for segment in segments)
671 |                     spliced_image = np.ones((img_height, img_width, 3), dtype=segments[0].dtype)*args.bgcolor
672 |                     y = 0
673 |                     for segment in segments:
674 |                         h, w, d = segment.shape
675 |                         spliced_image[y:y + h, 0:w] = segment
676 |                         y += h
677 |                     with warnings.catch_warnings():
678 |                         # Transform rotate convert the img to float and save convert it back
679 |                         warnings.simplefilter("ignore")
680 |                         if args.specialnomoff:
681 |                             firstitem = os.path.splitext(spliceinfo[0])[0].split("_")[:-2]
682 |                             firstitem = "_".join(firstitem)
683 |                             #print(inputdir)
684 |                             year = os.path.splitext(os.path.normpath(inputdir))[0].split(os.sep)[-3:-2][0]
685 |                             imsave("%s" % (outputdir+(nomnumber.format(entry_count))+"_"+year+"_"+firstitem+os.path.splitext(spliceinfo[0])[1]), spliced_image)
686 |                             spliceinfofile = open(outputdir+(nomnumber.format(entry_count)) + "_" + firstitem + "_SegInfo" +".txt", "w")
687 |                             entry_count += 1
688 |                             spliceinfofile.writelines([x+"\n" for x in spliceinfo])
689 |                             spliceinfofile.close()
690 |                         else:
691 |                             imsave("%s" % (outputdir+os.path.splitext(spliceinfo[0])[0]+"_spliced"+os.path.splitext(spliceinfo[0])[1]), spliced_image)
692 |                             spliceinfofile = open(outputdir + os.path.splitext(spliceinfo[0])[0] + "_SegInfo" + ".txt",
693 |                                                   "w")
694 |                             spliceinfofile.writelines([x + "\n" for x in spliceinfo])
695 |                             spliceinfofile.close()
696 |                     spliceinfo = list()
697 |                     list_splice = []
698 |                 if not args.splicemaintypestop:
699 |                     list_splice.append(image)
700 |                     spliceinfo.append(image)
701 |     if len(list_splice) != 0:
702 |         if not args.quiet: print "splice %s" % image
703 |         segments = [misc.imread(img, mode='RGB') for img in list_splice]
704 |         img_height = sum(segment.shape[0] for segment in segments)
705 |         img_width = max(segment.shape[1] for segment in segments)
706 |         spliced_image = np.ones((img_height, img_width, 3), dtype=segments[0].dtype) * args.bgcolor
707 |         y = 0
708 |         for segment in segments:
709 |             h, w, d = segment.shape
710 |             spliced_image[y:y + h, 0:w] = segment
711 |             y += h
712 |         with warnings.catch_warnings():
713 |             # Transform rotate convert the img to float and save convert it back
714 |             warnings.simplefilter("ignore")
715 |             if args.specialnomoff:
716 |                 firstitem = os.path.splitext(spliceinfo[0])[0].split("_")[:-2]
717 |                 firstitem = "_".join(firstitem)
718 |                 year = os.path.splitext(os.path.normpath(inputdir))[0].split(os.sep)[-3:-2][0]
719 |                 imsave("%s" % (outputdir + (nomnumber.format(entry_count)) + "_" +year+"_"+firstitem + os.path.splitext(spliceinfo[0])[1]),
720 |                        spliced_image)
721 |                 spliceinfofile = open(outputdir + (nomnumber.format(entry_count)) + "_" + firstitem + "_SegInfo" + ".txt",
722 |                                       "w")
723 |                 spliceinfofile.writelines([x + "\n" for x in spliceinfo])
724 |                 spliceinfofile.close()
725 |             else:
726 |                 imsave("%s" % (outputdir + os.path.splitext(spliceinfo[0])[0]+"_spliced"+os.path.splitext(spliceinfo[0])[1]), spliced_image)
727 |                 spliceinfofile = open(outputdir + os.path.splitext(spliceinfo[0])[0] + "_SegInfo" + ".txt","w")
728 |                 spliceinfofile.writelines([x + "\n" for x in spliceinfo])
729 |                 spliceinfofile.close()
730 |     return 0
731 | 
732 | def whiteout_ramp(image, linecoords):
733 |     # Dilation enlarge the bright segments and cut them out off the original image
734 |     imagesection = image[linecoords.object]
735 |     count = 0
736 |     for i in morph.dilation(linecoords.object_matrix, morph.square(10)):
737 |         whitevalue = measurements.find_objects(i == linecoords.object_value + 1)
738 |         if whitevalue:
739 |             whitevalue = whitevalue[0][0]
740 |             imagesection[count,whitevalue.start:whitevalue.stop] = 255
741 |             count +=1
742 |     return 0
743 | 
744 | def whiteout_blank(image, labels, height, fullheight):
745 |     # Dilation enlarge the bright segments and cut them out off the original image
746 |     objects = measurements.find_objects(labels)
747 |     for i, b in enumerate(objects):
748 |         if b != None:
749 |             #print(b[0])
750 |             #print(height)
751 |             if b[0].start <= height and fullheight*0.2 >= b[0].stop-b[0].start and b[0].stop != 0:
752 |                 linecoords = Linecoords(labels, i, b)
753 |                 whiteout_ramp(image, linecoords)
754 |     return 0
755 | 
756 | ####################### MAIN-FUNCTIONS ############################################
757 | def crass():
758 |     args = get_parser()
759 |     args.input = os.path.abspath(args.input)
760 |     # Read inputfiles
761 |     inputfiles = get_inputfiles(args)
762 |     ####################### CRASS #######################################
763 |     ####################### CROP  #######################################
764 |     # Start crass with serialprocessing
765 |     if args.parallel < 2:
766 |         for input in inputfiles:
767 |             cropping(input)
768 |     # Start crass with multiprocessing
769 |     else:
770 |         pool = multiprocessing.Pool(processes=args.parallel)
771 |         # chunksize = 1 every multiprocess gets exact the next free image (sorted order)
772 |         pool.map(cropping, inputfiles,chunksize=1)
773 |     ####################### SPLICE #######################################
774 |     if not args.splice == True:
775 |         if not args.splicemaintype in args.splicetypes:
776 |                 print("%s is not part of the pattern %s" % (args.splicemaintype,args.splicetypes))
777 |                 logging.warning("Input error by user!")
778 |         else:
779 |             if not args.quiet: print "start splice"
780 |             path = args.input + os.path.normcase("/out/")
781 |             if not os.path.isdir(args.input):
782 |                 path = os.path.dirname(args.input)+os.path.normcase("/out/")
783 |             splice(args, os.path.normpath(path))
784 | 
785 | ####################### MAIN ############################################
786 | if __name__=="__main__":
787 |     crass()
788 | 


--------------------------------------------------------------------------------