├── .travis.yml
├── CMakeLists.txt
├── LICENSE
├── README.md
├── demo.py
├── include
├── conversion.h
├── delaunay.h
├── imgwarp_mls.h
├── imgwarp_mls_rigid.h
├── imgwarp_mls_similarity.h
└── imgwarp_piecewiseaffine.h
├── pic
├── demo.png
├── gif_Distort.gif
├── gif_Perspective.gif
└── gif_Stretch.gif
└── src
├── Augment.cpp
├── conversion.cpp
├── delaunay.cpp
├── imgwarp_mls.cpp
├── imgwarp_mls_rigid.cpp
├── imgwarp_mls_similarity.cpp
└── imgwarp_piecewiseaffine.cpp
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 |
3 | language: cpp
4 | compiler: g++
5 |
6 | before_install:
7 | - wget https://repo.anaconda.com/miniconda/Miniconda2-latest-Linux-x86_64.sh -O conda.sh
8 | - chmod +x conda.sh
9 | - "./conda.sh -b"
10 | - export PATH=/home/travis/miniconda2/bin:$PATH
11 | - conda update --yes conda
12 |
13 | install:
14 | - conda install pip --yes
15 | - sudo sudo apt-get install python-numpy
16 | - pip install numpy
17 | - conda install cmake --yes
18 | - conda install boost --yes
19 | - conda install --channel https://conda.anaconda.org/menpo opencv --yes
20 |
21 | script:
22 | - echo ${PATH}
23 | - mkdir build
24 | - cd build
25 | - pwd
26 | - cmake -D CUDA_USE_STATIC_CUDA_RUNTIME=OFF ..
27 | - make
28 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required (VERSION 2.6.0)
2 |
3 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Wall -std=c++11")
4 |
5 | # Head and source
6 | include_directories(include)
7 | AUX_SOURCE_DIRECTORY(src DIR_SRCS)
8 |
9 | # Find necessary packages
10 | find_package( PythonLibs 2.7 REQUIRED )
11 | include_directories( ${PYTHON_INCLUDE_DIRS} )
12 |
13 | find_package( Boost COMPONENTS python REQUIRED )
14 | include_directories( ${Boost_INCLUDE_DIR} )
15 |
16 | find_package( OpenCV REQUIRED )
17 | include_directories( ${OpenCV_INCLUDE_DIR} )
18 |
19 | # Define the wrapper library
20 | add_library(Augment SHARED ${DIR_SRCS})
21 | target_link_libraries(Augment ${Boost_LIBRARIES} ${PYTHON_LIBRARIES} ${OpenCV_LIBRARIES} Augment)
22 |
23 | # Don't prepend wrapper library name with lib
24 | set_target_properties(Augment PROPERTIES PREFIX "" )
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Canjie Luo
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Text Image Augmentation
2 |
3 | [](https://travis-ci.org/Canjie-Luo/Text-Image-Augmentation)
4 |
5 | A general geometric augmentation tool for text images in the CVPR 2020 paper "[Learn to Augment: Joint Data Augmentation and Network Optimization for Text Recognition](https://arxiv.org/abs/2003.06606)". We provide the tool to avoid overfitting and gain robustness of text recognizers.
6 |
7 | ***Note that this is a general toolkit. Please customize for your specific task. If the repo benefits your work, please [cite the papers](https://github.com/Canjie-Luo/Text-Image-Augmentation#citation).***
8 |
9 | ## News
10 | - 2020-02 The paper "Learn to Augment: Joint Data Augmentation and Network Optimization for Text Recognition" was accepted to ***CVPR 2020***. It is a preliminary attempt for smart augmentation.
11 |
12 | - 2019-11 The paper "Decoupled Attention Network for Text Recognition" ([Paper](https://arxiv.org/abs/1912.10205) [Code](https://github.com/Wang-Tianwei/Decoupled-attention-network)) was accepted to ***AAAI 2020***. This augmentation tool was used in the experiments of handwritten text recognition.
13 |
14 | - 2019-04 We applied this tool in the ReCTS competition of ***ICDAR 2019***. Our ensemble model won the championship.
15 |
16 | - 2019-01 The similarity transformation was specifically customized for geomeric augmentation of text images.
17 |
18 | ## Requirements
19 |
20 | - [GCC](https://gcc.gnu.org/gcc-4.8/) 4.8.*
21 | - [Python](https://www.python.org/) 2.7.*
22 | - [Boost](https://www.boost.org/) 1.67
23 | - [OpenCV](https://opencv.org/) 2.4.*
24 |
25 | We recommend [Anaconda](https://www.anaconda.com/) to manage the version of your dependencies. For example:
26 |
27 | ```bash
28 | conda install boost=1.67.0
29 | ```
30 |
31 | ## Installation
32 | Build library:
33 |
34 | ```bash
35 | mkdir build
36 | cd build
37 | cmake -D CUDA_USE_STATIC_CUDA_RUNTIME=OFF ..
38 | make
39 | ```
40 |
41 | Copy the **Augment.so** to the target folder and follow **demo.py** to use the tool.
42 |
43 | ```bash
44 | cp Augment.so ..
45 | cd ..
46 | python demo.py
47 | ```
48 |
49 | ## Demo
50 |
51 | - Distortion
52 |
53 | 
54 |
55 | - Stretch
56 |
57 | 
58 |
59 | - Perspective
60 |
61 | 
62 |
63 | ## Speed
64 |
65 | To transform an image with size (H:64, W:200), it takes less than 3ms using a 2.0GHz CPU. It is possible to accelerate the process by calling multi-process batch samplers in an on-the-fly manner, such as setting [**\"num_workers\"**](https://pytorch.org/docs/0.3.1/data.html?highlight=dataset#torch.utils.data.DataLoader) in [PyTorch](https://pytorch.org/docs/0.3.1/data.html?highlight=dataset#torch.utils.data.DataLoader).
66 |
67 | ## Improvement for Recognition
68 |
69 | We compare the accuracies of [CRNN](https://github.com/meijieru/crnn.pytorch) trained using only the corresponding small training set.
70 |
71 | |
Dataset | IIIT5K | IC13 | IC15 |
72 | | :---: | :---: | :---: | :---:|
73 | | Without Data Augmentation | 40.8% | 6.8% | 8.7% |
74 | | With Data Augmentation | 53.4% | 9.6% | 24.9% |
75 |
76 |
77 | ## Citation
78 |
79 | ```
80 | @inproceedings{luo2020learn,
81 | author = {Canjie Luo, Yuanzhi Zhu, Lianwen Jin, Yongpan Wang},
82 | title = {Learn to Augment: Joint Data Augmentation and Network Optimization for Text Recognition},
83 | booktitle = {CVPR},
84 | Year = {2020}
85 | }
86 |
87 | @InProceedings{wang2020decoupled,
88 | author = {Tianwei Wang and Yuanzhi Zhu and Lianwen Jin and Canjie Luo and Xiaoxue Chen and Yaqiang Wu and Qianying Wang and Mingxiang Cai},
89 | title = {Decoupled attention network for text recognition},
90 | booktitle ={AAAI},
91 | year = {2020}
92 | }
93 |
94 | @article{schaefer2006image,
95 | title={Image deformation using moving least squares},
96 | author={Schaefer, Scott and McPhail, Travis and Warren, Joe},
97 | journal={ACM Transactions on Graphics (TOG)},
98 | volume={25},
99 | number={3},
100 | pages={533--540},
101 | year={2006},
102 | publisher={ACM New York, NY, USA}
103 | }
104 | ```
105 |
106 | ## Acknowledgment
107 |
108 | Thanks for the contribution of the following developers.
109 |
110 | [@keeofkoo](https://github.com/keeofkoo)
111 |
112 | [@cxcxcxcx](https://github.com/cxcxcxcx)
113 |
114 | [@Yati Sagade](https://github.com/yati-sagade)
115 | ## Attention
116 | The tool is only free for academic research purposes.
117 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | import Augment
2 | import cv2
3 | import numpy as np
4 |
5 |
6 | '''
7 | The code is for OpenCV format.
8 | If your data format is PIL.Image, please convert the format by:
9 |
10 | import numpy as np
11 | import cv2
12 | from PIL import Image
13 |
14 | img = Image.open("The Path to the image")
15 | img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
16 | '''
17 |
18 | im = cv2.imread("pic/demo.png")
19 | im = cv2.resize(im, (200, 64))
20 | cv2.imshow("im_CV", im)
21 | for i in range(5000):
22 | im_Distort = Augment.GenerateDistort(im, 4)
23 | cv2.imshow("im_Distort", im_Distort)
24 | im_Stretch = Augment.GenerateStretch(im, 4)
25 | cv2.imshow("im_Stretch", im_Stretch)
26 | im_Perspective = Augment.GeneratePerspective(im)
27 | cv2.imshow("im_Perspective", im_Perspective)
28 | cv2.waitKey(1)
29 |
--------------------------------------------------------------------------------
/include/conversion.h:
--------------------------------------------------------------------------------
1 | # ifndef __COVERSION_OPENCV_H__
2 | # define __COVERSION_OPENCV_H__
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include "numpy/ndarrayobject.h"
9 |
10 | #define NUMPY_IMPORT_ARRAY_RETVAL
11 |
12 |
13 | // static PyObject* opencv_error = 0;
14 |
15 | // static int failmsg(const char *fmt, ...);
16 |
17 | class PyAllowThreads;
18 |
19 | class PyEnsureGIL;
20 |
21 | #define ERRWRAP2(expr) \
22 | try \
23 | { \
24 | PyAllowThreads allowThreads; \
25 | expr; \
26 | } \
27 | catch (const cv::Exception &e) \
28 | { \
29 | PyErr_SetString(opencv_error, e.what()); \
30 | return 0; \
31 | }
32 |
33 | // static PyObject* failmsgp(const char *fmt, ...);
34 |
35 | static size_t REFCOUNT_OFFSET = (size_t)&(((PyObject*)0)->ob_refcnt) +
36 | (0x12345678 != *(const size_t*)"\x78\x56\x34\x12\0\0\0\0\0")*sizeof(int);
37 |
38 | static inline PyObject* pyObjectFromRefcount(const int* refcount)
39 | {
40 | return (PyObject*)((size_t)refcount - REFCOUNT_OFFSET);
41 | }
42 |
43 | static inline int* refcountFromPyObject(const PyObject* obj)
44 | {
45 | return (int*)((size_t)obj + REFCOUNT_OFFSET);
46 | }
47 |
48 |
49 | class NumpyAllocator;
50 |
51 | // enum { ARG_NONE = 0, ARG_MAT = 1, ARG_SCALAR = 2 };
52 |
53 | class NDArrayConverter
54 | {
55 | private:
56 | void init();
57 | public:
58 | NDArrayConverter();
59 | cv::Mat toMat(const PyObject* o);
60 | PyObject* toNDArray(const cv::Mat& mat);
61 | };
62 |
63 | # endif
64 |
--------------------------------------------------------------------------------
/include/delaunay.h:
--------------------------------------------------------------------------------
1 | /*
2 | * delaunay.h
3 | * aamlib-opencv
4 | *
5 | * Created by Chen Xing on 10-2-12.
6 | * Copyright 2010 __MyCompanyName__. All rights reserved.
7 | *
8 | */
9 |
10 | #include "opencv/cv.h"
11 | #include "opencv2/legacy/legacy.hpp"
12 | #include
13 | #include
14 | #include