├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── demo.py ├── include ├── conversion.h ├── delaunay.h ├── imgwarp_mls.h ├── imgwarp_mls_rigid.h ├── imgwarp_mls_similarity.h └── imgwarp_piecewiseaffine.h ├── pic ├── demo.png ├── gif_Distort.gif ├── gif_Perspective.gif └── gif_Stretch.gif └── src ├── Augment.cpp ├── conversion.cpp ├── delaunay.cpp ├── imgwarp_mls.cpp ├── imgwarp_mls_rigid.cpp ├── imgwarp_mls_similarity.cpp └── imgwarp_piecewiseaffine.cpp /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | language: cpp 4 | compiler: g++ 5 | 6 | before_install: 7 | - wget https://repo.anaconda.com/miniconda/Miniconda2-latest-Linux-x86_64.sh -O conda.sh 8 | - chmod +x conda.sh 9 | - "./conda.sh -b" 10 | - export PATH=/home/travis/miniconda2/bin:$PATH 11 | - conda update --yes conda 12 | 13 | install: 14 | - conda install pip --yes 15 | - sudo sudo apt-get install python-numpy 16 | - pip install numpy 17 | - conda install cmake --yes 18 | - conda install boost --yes 19 | - conda install --channel https://conda.anaconda.org/menpo opencv --yes 20 | 21 | script: 22 | - echo ${PATH} 23 | - mkdir build 24 | - cd build 25 | - pwd 26 | - cmake -D CUDA_USE_STATIC_CUDA_RUNTIME=OFF .. 27 | - make 28 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 2.6.0) 2 | 3 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Wall -std=c++11") 4 | 5 | # Head and source 6 | include_directories(include) 7 | AUX_SOURCE_DIRECTORY(src DIR_SRCS) 8 | 9 | # Find necessary packages 10 | find_package( PythonLibs 2.7 REQUIRED ) 11 | include_directories( ${PYTHON_INCLUDE_DIRS} ) 12 | 13 | find_package( Boost COMPONENTS python REQUIRED ) 14 | include_directories( ${Boost_INCLUDE_DIR} ) 15 | 16 | find_package( OpenCV REQUIRED ) 17 | include_directories( ${OpenCV_INCLUDE_DIR} ) 18 | 19 | # Define the wrapper library 20 | add_library(Augment SHARED ${DIR_SRCS}) 21 | target_link_libraries(Augment ${Boost_LIBRARIES} ${PYTHON_LIBRARIES} ${OpenCV_LIBRARIES} Augment) 22 | 23 | # Don't prepend wrapper library name with lib 24 | set_target_properties(Augment PROPERTIES PREFIX "" ) 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Canjie Luo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Text Image Augmentation 2 | 3 | [![Build Status](https://travis-ci.org/Canjie-Luo/Text-Image-Augmentation.svg?branch=master)](https://travis-ci.org/Canjie-Luo/Text-Image-Augmentation) 4 | 5 | A general geometric augmentation tool for text images in the CVPR 2020 paper "[Learn to Augment: Joint Data Augmentation and Network Optimization for Text Recognition](https://arxiv.org/abs/2003.06606)". We provide the tool to avoid overfitting and gain robustness of text recognizers. 6 | 7 | ***Note that this is a general toolkit. Please customize for your specific task. If the repo benefits your work, please [cite the papers](https://github.com/Canjie-Luo/Text-Image-Augmentation#citation).*** 8 | 9 | ## News 10 | - 2020-02 The paper "Learn to Augment: Joint Data Augmentation and Network Optimization for Text Recognition" was accepted to ***CVPR 2020***. It is a preliminary attempt for smart augmentation. 11 | 12 | - 2019-11 The paper "Decoupled Attention Network for Text Recognition" ([Paper](https://arxiv.org/abs/1912.10205) [Code](https://github.com/Wang-Tianwei/Decoupled-attention-network)) was accepted to ***AAAI 2020***. This augmentation tool was used in the experiments of handwritten text recognition. 13 | 14 | - 2019-04 We applied this tool in the ReCTS competition of ***ICDAR 2019***. Our ensemble model won the championship. 15 | 16 | - 2019-01 The similarity transformation was specifically customized for geomeric augmentation of text images. 17 | 18 | ## Requirements 19 | 20 | - [GCC](https://gcc.gnu.org/gcc-4.8/) 4.8.* 21 | - [Python](https://www.python.org/) 2.7.* 22 | - [Boost](https://www.boost.org/) 1.67 23 | - [OpenCV](https://opencv.org/) 2.4.* 24 | 25 | We recommend [Anaconda](https://www.anaconda.com/) to manage the version of your dependencies. For example: 26 | 27 | ```bash 28 | conda install boost=1.67.0 29 | ``` 30 | 31 | ## Installation 32 | Build library: 33 | 34 | ```bash 35 | mkdir build 36 | cd build 37 | cmake -D CUDA_USE_STATIC_CUDA_RUNTIME=OFF .. 38 | make 39 | ``` 40 | 41 | Copy the **Augment.so** to the target folder and follow **demo.py** to use the tool. 42 | 43 | ```bash 44 | cp Augment.so .. 45 | cd .. 46 | python demo.py 47 | ``` 48 | 49 | ## Demo 50 | 51 | - Distortion 52 | 53 | ![](pic/gif_Distort.gif) 54 | 55 | - Stretch 56 | 57 | ![](pic/gif_Stretch.gif) 58 | 59 | - Perspective 60 | 61 | ![](pic/gif_Perspective.gif) 62 | 63 | ## Speed 64 | 65 | To transform an image with size (H:64, W:200), it takes less than 3ms using a 2.0GHz CPU. It is possible to accelerate the process by calling multi-process batch samplers in an on-the-fly manner, such as setting [**\"num_workers\"**](https://pytorch.org/docs/0.3.1/data.html?highlight=dataset#torch.utils.data.DataLoader) in [PyTorch](https://pytorch.org/docs/0.3.1/data.html?highlight=dataset#torch.utils.data.DataLoader). 66 | 67 | ## Improvement for Recognition 68 | 69 | We compare the accuracies of [CRNN](https://github.com/meijieru/crnn.pytorch) trained using only the corresponding small training set. 70 | 71 | |
Dataset
|
IIIT5K
|
IC13
|
IC15
| 72 | | :---: | :---: | :---: | :---:| 73 | | Without Data Augmentation |
40.8%
|
6.8%
|
8.7%
| 74 | |
With Data Augmentation
|
53.4%
|
9.6%
|
24.9%
| 75 | 76 | 77 | ## Citation 78 | 79 | ``` 80 | @inproceedings{luo2020learn, 81 | author = {Canjie Luo, Yuanzhi Zhu, Lianwen Jin, Yongpan Wang}, 82 | title = {Learn to Augment: Joint Data Augmentation and Network Optimization for Text Recognition}, 83 | booktitle = {CVPR}, 84 | Year = {2020} 85 | } 86 | 87 | @InProceedings{wang2020decoupled, 88 | author = {Tianwei Wang and Yuanzhi Zhu and Lianwen Jin and Canjie Luo and Xiaoxue Chen and Yaqiang Wu and Qianying Wang and Mingxiang Cai}, 89 | title = {Decoupled attention network for text recognition}, 90 | booktitle ={AAAI}, 91 | year = {2020} 92 | } 93 | 94 | @article{schaefer2006image, 95 | title={Image deformation using moving least squares}, 96 | author={Schaefer, Scott and McPhail, Travis and Warren, Joe}, 97 | journal={ACM Transactions on Graphics (TOG)}, 98 | volume={25}, 99 | number={3}, 100 | pages={533--540}, 101 | year={2006}, 102 | publisher={ACM New York, NY, USA} 103 | } 104 | ``` 105 | 106 | ## Acknowledgment 107 | 108 | Thanks for the contribution of the following developers. 109 | 110 | [@keeofkoo](https://github.com/keeofkoo) 111 | 112 | [@cxcxcxcx](https://github.com/cxcxcxcx) 113 | 114 | [@Yati Sagade](https://github.com/yati-sagade) 115 | ## Attention 116 | The tool is only free for academic research purposes. 117 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import Augment 2 | import cv2 3 | import numpy as np 4 | 5 | 6 | ''' 7 | The code is for OpenCV format. 8 | If your data format is PIL.Image, please convert the format by: 9 | 10 | import numpy as np 11 | import cv2 12 | from PIL import Image 13 | 14 | img = Image.open("The Path to the image") 15 | img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) 16 | ''' 17 | 18 | im = cv2.imread("pic/demo.png") 19 | im = cv2.resize(im, (200, 64)) 20 | cv2.imshow("im_CV", im) 21 | for i in range(5000): 22 | im_Distort = Augment.GenerateDistort(im, 4) 23 | cv2.imshow("im_Distort", im_Distort) 24 | im_Stretch = Augment.GenerateStretch(im, 4) 25 | cv2.imshow("im_Stretch", im_Stretch) 26 | im_Perspective = Augment.GeneratePerspective(im) 27 | cv2.imshow("im_Perspective", im_Perspective) 28 | cv2.waitKey(1) 29 | -------------------------------------------------------------------------------- /include/conversion.h: -------------------------------------------------------------------------------- 1 | # ifndef __COVERSION_OPENCV_H__ 2 | # define __COVERSION_OPENCV_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "numpy/ndarrayobject.h" 9 | 10 | #define NUMPY_IMPORT_ARRAY_RETVAL 11 | 12 | 13 | // static PyObject* opencv_error = 0; 14 | 15 | // static int failmsg(const char *fmt, ...); 16 | 17 | class PyAllowThreads; 18 | 19 | class PyEnsureGIL; 20 | 21 | #define ERRWRAP2(expr) \ 22 | try \ 23 | { \ 24 | PyAllowThreads allowThreads; \ 25 | expr; \ 26 | } \ 27 | catch (const cv::Exception &e) \ 28 | { \ 29 | PyErr_SetString(opencv_error, e.what()); \ 30 | return 0; \ 31 | } 32 | 33 | // static PyObject* failmsgp(const char *fmt, ...); 34 | 35 | static size_t REFCOUNT_OFFSET = (size_t)&(((PyObject*)0)->ob_refcnt) + 36 | (0x12345678 != *(const size_t*)"\x78\x56\x34\x12\0\0\0\0\0")*sizeof(int); 37 | 38 | static inline PyObject* pyObjectFromRefcount(const int* refcount) 39 | { 40 | return (PyObject*)((size_t)refcount - REFCOUNT_OFFSET); 41 | } 42 | 43 | static inline int* refcountFromPyObject(const PyObject* obj) 44 | { 45 | return (int*)((size_t)obj + REFCOUNT_OFFSET); 46 | } 47 | 48 | 49 | class NumpyAllocator; 50 | 51 | // enum { ARG_NONE = 0, ARG_MAT = 1, ARG_SCALAR = 2 }; 52 | 53 | class NDArrayConverter 54 | { 55 | private: 56 | void init(); 57 | public: 58 | NDArrayConverter(); 59 | cv::Mat toMat(const PyObject* o); 60 | PyObject* toNDArray(const cv::Mat& mat); 61 | }; 62 | 63 | # endif 64 | -------------------------------------------------------------------------------- /include/delaunay.h: -------------------------------------------------------------------------------- 1 | /* 2 | * delaunay.h 3 | * aamlib-opencv 4 | * 5 | * Created by Chen Xing on 10-2-12. 6 | * Copyright 2010 __MyCompanyName__. All rights reserved. 7 | * 8 | */ 9 | 10 | #include "opencv/cv.h" 11 | #include "opencv2/legacy/legacy.hpp" 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | using cv::Point_; 18 | using cv::Mat_; 19 | using std::vector; 20 | using std::set; 21 | using std::map; 22 | using std::sort; 23 | 24 | class Delaunay; 25 | 26 | struct Triangle { 27 | Point_ v[3]; 28 | }; 29 | 30 | struct TriangleInID { 31 | int v[3]; 32 | }; 33 | 34 | int pComp(const void *p1, const void *p2); 35 | 36 | bool FindTriangleFromEdge(CvSubdiv2DEdge e, set &V); 37 | 38 | //! Find the Delaunay division for given points(Return in int coordinates). 39 | template 40 | vector delaunayDiv(const vector > &vP, cv::Rect boundRect) { 41 | CvSubdiv2D *subdiv; 42 | 43 | CvMemStorage *storage; 44 | storage = cvCreateMemStorage(0); 45 | subdiv = cvCreateSubdivDelaunay2D(boundRect, storage); 46 | for (size_t e = 0; e < vP.size(); e++) { 47 | cvSubdivDelaunay2DInsert(subdiv, vP[e]); 48 | } 49 | 50 | CvSeqReader reader; 51 | int i, total = subdiv->edges->total; 52 | int elem_size = subdiv->edges->elem_size; 53 | 54 | cvStartReadSeq((CvSeq *)(subdiv->edges), &reader, 0); 55 | 56 | set V; 57 | 58 | for (i = 0; i < total; i++) { 59 | CvQuadEdge2D *edge = (CvQuadEdge2D *)(reader.ptr); 60 | 61 | if (CV_IS_SET_ELEM(edge)) { 62 | CvSubdiv2DEdge e = (CvSubdiv2DEdge)edge; 63 | FindTriangleFromEdge(e, V); 64 | 65 | CvSubdiv2DEdge e1 = (CvSubdiv2DEdge)edge + 2; //=next[2] 66 | FindTriangleFromEdge(e1, V); 67 | } 68 | CV_NEXT_SEQ_ELEM(elem_size, reader); 69 | } 70 | cvReleaseMemStorage(&storage); 71 | vector ans; 72 | ans.resize(V.size()); 73 | std::copy(V.begin(), V.end(), ans.begin()); 74 | return ans; 75 | } 76 | 77 | template 78 | struct PointLess { 79 | bool operator()(const Point_ &pa, const Point_ &pb) const { 80 | return (pa.x < pb.x) || (pa.x == pb.x && pa.y < pb.y); 81 | } 82 | }; 83 | 84 | bool operator<(const TriangleInID &a, const TriangleInID &b); 85 | 86 | template 87 | bool FindTriangleIDFromEdge(CvSubdiv2DEdge e, set &V, 88 | map, int, PointLess > &pMap) { 89 | CvSubdiv2DEdge t = e; 90 | TriangleInID triT; 91 | int iPointNum = 3; 92 | int j; 93 | 94 | for (j = 0; j < iPointNum; j++) { 95 | CvSubdiv2DPoint *pt = cvSubdiv2DEdgeOrg(t); 96 | if (!pt) break; 97 | if (pMap.find(Point_(pt->pt.x, pt->pt.y)) != pMap.end()) 98 | triT.v[j] = pMap.find(Point_(pt->pt.x, pt->pt.y))->second; 99 | else 100 | return false; 101 | t = cvSubdiv2DGetEdge(t, CV_NEXT_AROUND_LEFT); 102 | } 103 | if (j == iPointNum) { 104 | sort(triT.v, triT.v + 3); 105 | V.insert(triT); 106 | return true; 107 | } 108 | 109 | return false; 110 | } 111 | 112 | //! Find the Delaunay division for given points(Return in point id). 113 | template 114 | vector delaunayDivInID(const vector > &vP, 115 | cv::Rect boundRect) { 116 | map, int, PointLess > pMap; 117 | 118 | CvSubdiv2D *subdiv; 119 | 120 | CvMemStorage *storage; 121 | storage = cvCreateMemStorage(0); 122 | subdiv = cvCreateSubdivDelaunay2D(boundRect, storage); 123 | for (size_t e = 0; e < vP.size(); e++) { 124 | pMap[vP[e]] = e; 125 | cvSubdivDelaunay2DInsert(subdiv, vP[e]); 126 | } 127 | 128 | CvSeqReader reader; 129 | int i, total = subdiv->edges->total; 130 | int elem_size = subdiv->edges->elem_size; 131 | 132 | cvStartReadSeq((CvSeq *)(subdiv->edges), &reader, 0); 133 | 134 | set V; 135 | 136 | for (i = 0; i < total; i++) { 137 | CvQuadEdge2D *edge = (CvQuadEdge2D *)(reader.ptr); 138 | 139 | if (CV_IS_SET_ELEM(edge)) { 140 | CvSubdiv2DEdge e = (CvSubdiv2DEdge)edge; 141 | FindTriangleIDFromEdge(e, V, pMap); 142 | 143 | CvSubdiv2DEdge e1 = (CvSubdiv2DEdge)edge + 2; 144 | FindTriangleIDFromEdge(e1, V, pMap); 145 | } 146 | CV_NEXT_SEQ_ELEM(elem_size, reader); 147 | } 148 | cvReleaseMemStorage(&storage); 149 | vector ans; 150 | ans.resize(V.size()); 151 | std::copy(V.begin(), V.end(), ans.begin()); 152 | return ans; 153 | } 154 | 155 | template 156 | void labelMatByTriInID(const vector > &vP, 157 | vector &triList, Mat_ &mapMat, 158 | cv::Size labelSize) { 159 | mapMat.create(labelSize); 160 | mapMat.setTo(triList.size()); 161 | 162 | vector::iterator it; 163 | Point_ v[3]; 164 | for (it = triList.begin(); it != triList.end(); it++) { 165 | // Not interested in points outside the region. 166 | v[0] = vP[it->v[0]]; 167 | v[1] = vP[it->v[1]]; 168 | v[2] = vP[it->v[2]]; 169 | 170 | cv::fillConvexPoly(mapMat, v, 3, it - triList.begin()); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /include/imgwarp_mls.h: -------------------------------------------------------------------------------- 1 | #ifndef IMGTRANS_MLS_H 2 | #define IMGTRANS_MLS_H 3 | 4 | #include "opencv/cv.h" 5 | #include 6 | using std::vector; 7 | 8 | using cv::Mat; 9 | using cv::Mat_; 10 | using cv::Point_; 11 | using cv::Point; 12 | 13 | //! The base class for Moving Least Square image warping. 14 | /*! 15 | * Choose one of the subclasses, the easiest interface to generate 16 | * an output is to use setAllAndGenerate function. 17 | */ 18 | class ImgWarp_MLS { 19 | public: 20 | ImgWarp_MLS(); 21 | virtual ~ImgWarp_MLS() {} 22 | 23 | //! Set all and generate an output. 24 | /*! 25 | \param oriImg the image to be warped. 26 | \param qsrc A list of "from" points. 27 | \param qdst A list of "target" points. 28 | \param outW The width of the output image. 29 | \param outH The height of the output image. 30 | \param transRatio 1 means warp to target points, 0 means no warping 31 | 32 | This will do all the initialization and generate a warped image. 33 | After calling this, one can later call genNewImg with different 34 | transRatios to generate a warping animation. 35 | */ 36 | Mat setAllAndGenerate(const Mat &oriImg, const vector > &qsrc, 37 | const vector > &qdst, const int outW, 38 | const int outH, const double transRatio = 1); 39 | 40 | //! Generate the warped image. 41 | /*! This function generate a warped image using PRE-CALCULATED data. 42 | * DO NOT CALL THIS AT FIRST! Call this after at least one call of 43 | * setAllAndGenerate. 44 | */ 45 | Mat genNewImg(const Mat &oriImg, double transRatio); 46 | 47 | //! Calculate delta value which will be used for generating the warped 48 | //image. 49 | virtual void calcDelta() = 0; 50 | 51 | //! Parameter for MLS. 52 | double alpha; 53 | 54 | //! Parameter for MLS. 55 | int gridSize; 56 | 57 | //! Set the list of target points 58 | void setDstPoints(const vector > &qdst); 59 | 60 | //! Set the list of source points 61 | void setSrcPoints(const vector > &qsrc); 62 | 63 | //! The size of the original image. For precalculation. 64 | void setSize(int w, int h) { srcW = w, srcH = h; } 65 | 66 | //! The size of output image 67 | void setTargetSize(const int outW, const int outH) { 68 | tarW = outW; 69 | tarH = outH; 70 | } 71 | 72 | protected: 73 | vector > oldDotL, newDotL; 74 | 75 | int nPoint; 76 | 77 | Mat_ /*! \brief delta_x */ rDx, /*! \brief delta_y */ rDy; 78 | 79 | int srcW, srcH; 80 | int tarW, tarH; 81 | }; 82 | 83 | #endif // IMGTRANS_MLS_H 84 | -------------------------------------------------------------------------------- /include/imgwarp_mls_rigid.h: -------------------------------------------------------------------------------- 1 | /* 2 | This library is free software; you can redistribute it and/or 3 | modify it under the terms of the GNU Library General Public 4 | License version 2 as published by the Free Software Foundation. 5 | 6 | This library is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 9 | Library General Public License for more details. 10 | 11 | You should have received a copy of the GNU Library General Public License 12 | along with this library; see the file COPYING.LIB. If not, write to 13 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 14 | Boston, MA 02110-1301, USA. 15 | */ 16 | #ifndef IMGTRANS_MLS_RIGID_H 17 | #define IMGTRANS_MLS_RIGID_H 18 | 19 | #include "imgwarp_mls.h" 20 | #include "opencv/cv.h" 21 | #include 22 | using std::vector; 23 | 24 | using cv::Mat; 25 | using cv::Mat_; 26 | using cv::Point_; 27 | 28 | //! The class for MLS Rigid transform. 29 | /*! 30 | * It will try to keep the image rigid. You can set preScale if you 31 | * can accept uniform transform. 32 | */ 33 | class ImgWarp_MLS_Rigid : public ImgWarp_MLS 34 | { 35 | public: 36 | //! Whether do unify scale on the points before deformation 37 | bool preScale; 38 | 39 | ImgWarp_MLS_Rigid(); 40 | void calcDelta(); 41 | }; 42 | 43 | #endif // IMGTRANS_MLS_RIGID_H 44 | -------------------------------------------------------------------------------- /include/imgwarp_mls_similarity.h: -------------------------------------------------------------------------------- 1 | /* 2 | This library is free software; you can redistribute it and/or 3 | modify it under the terms of the GNU Library General Public 4 | License version 2 as published by the Free Software Foundation. 5 | 6 | This library is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 9 | Library General Public License for more details. 10 | 11 | You should have received a copy of the GNU Library General Public License 12 | along with this library; see the file COPYING.LIB. If not, write to 13 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 14 | Boston, MA 02110-1301, USA. 15 | */ 16 | 17 | #ifndef IMGTRANS_MLS_SIMILARITY_H 18 | #define IMGTRANS_MLS_SIMILARITY_H 19 | #include "imgwarp_mls.h" 20 | #include "opencv/cv.h" 21 | #include 22 | using std::vector; 23 | 24 | using cv::Mat; 25 | using cv::Mat_; 26 | using cv::Point_; 27 | 28 | //! The class for MLS Similarity transform. 29 | class ImgWarp_MLS_Similarity : public ImgWarp_MLS { 30 | public: 31 | void calcDelta(); 32 | }; 33 | 34 | #endif // IMGTRANS_MLS_SIMILARITY_H 35 | -------------------------------------------------------------------------------- /include/imgwarp_piecewiseaffine.h: -------------------------------------------------------------------------------- 1 | #ifndef IMGTRANSPIECEWISEAFFINE_H 2 | #define IMGTRANSPIECEWISEAFFINE_H 3 | 4 | #include "imgwarp_mls.h" 5 | 6 | class ImgWarp_PieceWiseAffine : 7 | public ImgWarp_MLS 8 | { 9 | public: 10 | //! How to deal with the background. 11 | /*! 12 | BGNone: No background is reserved. 13 | BGMLS: Use MLS to deal with the background. 14 | BGPieceWise: Use the same scheme for the background. 15 | */ 16 | enum BGFill { 17 | BGNone, //! No background is reserved. 18 | BGMLS, //! Use MLS to deal with the background. 19 | BGPieceWise}; //! Use the same scheme for the background. 20 | 21 | ImgWarp_PieceWiseAffine(void); 22 | ~ImgWarp_PieceWiseAffine(void); 23 | 24 | void calcDelta(); 25 | BGFill backGroundFillAlg; 26 | private: 27 | Point_ getMLSDelta(int x, int y); 28 | }; 29 | 30 | #endif //IMGTRANSPIECEWISEAFFINE_H -------------------------------------------------------------------------------- /pic/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HCIILAB/Text-Image-Augmentation/e1f85a2b2980987828d6faaf5faad47d4e9c765a/pic/demo.png -------------------------------------------------------------------------------- /pic/gif_Distort.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HCIILAB/Text-Image-Augmentation/e1f85a2b2980987828d6faaf5faad47d4e9c765a/pic/gif_Distort.gif -------------------------------------------------------------------------------- /pic/gif_Perspective.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HCIILAB/Text-Image-Augmentation/e1f85a2b2980987828d6faaf5faad47d4e9c765a/pic/gif_Perspective.gif -------------------------------------------------------------------------------- /pic/gif_Stretch.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HCIILAB/Text-Image-Augmentation/e1f85a2b2980987828d6faaf5faad47d4e9c765a/pic/gif_Stretch.gif -------------------------------------------------------------------------------- /src/Augment.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "opencv2/opencv.hpp" 3 | #include "opencv2/core/core.hpp" 4 | #include 5 | #include 6 | 7 | #include "conversion.h" 8 | #include "imgwarp_mls_rigid.h" 9 | #include "imgwarp_mls_similarity.h" 10 | #include "imgwarp_piecewiseaffine.h" 11 | 12 | #include 13 | #include 14 | 15 | namespace py = boost::python; 16 | 17 | typedef unsigned char uchar_t; 18 | 19 | cv::Mat Distort(cv::Mat img_input, const int segment){ 20 | 21 | ImgWarp_MLS_Similarity trans1; 22 | // ImgWarp_MLS_Rigid trans2; 23 | // ImgWarp_PieceWiseAffine trans3; 24 | vector > qsrc; 25 | vector > qdst; 26 | qsrc.clear(); 27 | qdst.clear(); 28 | 29 | int cut = img_input.cols/segment; 30 | int threshold = cut*1./3; 31 | 32 | struct timeb timeSeed; 33 | ftime(&timeSeed); 34 | srand(timeSeed.time*1000+timeSeed.millitm); 35 | 36 | qsrc.push_back(Point(0, 0)); 37 | qsrc.push_back(Point(img_input.cols, 0)); 38 | qsrc.push_back(Point(img_input.cols, img_input.rows)); 39 | qsrc.push_back(Point(0, img_input.rows)); 40 | 41 | qdst.push_back(Point(rand()%threshold, rand()%threshold)); 42 | qdst.push_back(Point(img_input.cols-rand()%threshold, rand()%threshold)); 43 | qdst.push_back(Point(img_input.cols-rand()%threshold, img_input.rows-rand()%threshold)); 44 | qdst.push_back(Point(rand()%threshold, img_input.rows-rand()%threshold)); 45 | 46 | for (int i = 1; i < segment; i++){ 47 | qsrc.push_back(Point(cut*i, 0)); 48 | qsrc.push_back(Point(cut*i, img_input.rows)); 49 | qdst.push_back(Point(cut*i+rand()%threshold-0.5*threshold, rand()%threshold-0.5*threshold)); 50 | qdst.push_back(Point(cut*i+rand()%threshold-0.5*threshold, img_input.rows+rand()%threshold-0.5*threshold)); 51 | } 52 | 53 | cv::Mat result = trans1.setAllAndGenerate(img_input, qsrc, qdst, img_input.cols, img_input.rows); 54 | return result; 55 | } 56 | 57 | cv::Mat Stretch(cv::Mat img_input, const int segment){ 58 | 59 | ImgWarp_MLS_Similarity trans1; 60 | // ImgWarp_MLS_Rigid trans2; 61 | // ImgWarp_PieceWiseAffine trans3; 62 | vector > qsrc; 63 | vector > qdst; 64 | qsrc.clear(); 65 | qdst.clear(); 66 | 67 | qsrc.push_back(Point(0, 0)); 68 | qsrc.push_back(Point(img_input.cols, 0)); 69 | qsrc.push_back(Point(img_input.cols, img_input.rows)); 70 | qsrc.push_back(Point(0, img_input.rows)); 71 | 72 | qdst.push_back(Point(0, 0)); 73 | qdst.push_back(Point(img_input.cols, 0)); 74 | qdst.push_back(Point(img_input.cols, img_input.rows)); 75 | qdst.push_back(Point(0, img_input.rows)); 76 | 77 | int cut = img_input.cols/segment; 78 | int threshold = cut*4/5; 79 | 80 | struct timeb timeSeed; 81 | ftime(&timeSeed); 82 | srand(timeSeed.time*1000+timeSeed.millitm); 83 | int move = 0; 84 | for (int i = 1; i < segment; i++){ 85 | move = rand()%threshold-0.5*threshold; 86 | qsrc.push_back(Point(cut*i, 0)); 87 | qsrc.push_back(Point(cut*i, img_input.rows)); 88 | qdst.push_back(Point(cut*i+move, 0)); 89 | qdst.push_back(Point(cut*i+move, img_input.rows)); 90 | } 91 | 92 | cv::Mat result = trans1.setAllAndGenerate(img_input, qsrc, qdst, img_input.cols, img_input.rows); 93 | return result; 94 | } 95 | 96 | cv::Mat Perspective(cv::Mat img_input){ 97 | 98 | ImgWarp_MLS_Similarity trans1; 99 | // ImgWarp_MLS_Rigid trans2; 100 | // ImgWarp_PieceWiseAffine trans3; 101 | vector > qsrc; 102 | vector > qdst; 103 | qsrc.clear(); 104 | qdst.clear(); 105 | 106 | int threshold = img_input.rows*0.5; 107 | 108 | struct timeb timeSeed; 109 | ftime(&timeSeed); 110 | srand(timeSeed.time*1000+timeSeed.millitm); 111 | 112 | qsrc.push_back(Point(0, 0)); 113 | qsrc.push_back(Point(img_input.cols, 0)); 114 | qsrc.push_back(Point(img_input.cols, img_input.rows)); 115 | qsrc.push_back(Point(0, img_input.rows)); 116 | 117 | qdst.push_back(Point(0, rand()%threshold)); 118 | qdst.push_back(Point(img_input.cols, rand()%threshold)); 119 | qdst.push_back(Point(img_input.cols, img_input.rows-rand()%threshold)); 120 | qdst.push_back(Point(0, img_input.rows-rand()%threshold)); 121 | 122 | cv::Mat result = trans1.setAllAndGenerate(img_input, qsrc, qdst, img_input.cols, img_input.rows); 123 | return result; 124 | } 125 | 126 | PyObject* 127 | GenerateDistort(PyObject *img, int segment) 128 | { 129 | NDArrayConverter cvt; 130 | cv::Mat img_input; 131 | img_input = cvt.toMat(img); 132 | cv::Mat result = Distort(img_input, segment); 133 | 134 | PyObject* ret = cvt.toNDArray(result); 135 | 136 | return ret; 137 | } 138 | 139 | PyObject* 140 | GenerateStretch(PyObject *img, int segment) 141 | { 142 | NDArrayConverter cvt; 143 | cv::Mat img_input; 144 | img_input = cvt.toMat(img); 145 | cv::Mat result = Stretch(img_input, segment); 146 | 147 | PyObject* ret = cvt.toNDArray(result); 148 | 149 | return ret; 150 | } 151 | 152 | PyObject* 153 | GeneratePerspective(PyObject *img) 154 | { 155 | NDArrayConverter cvt; 156 | cv::Mat img_input; 157 | img_input = cvt.toMat(img); 158 | cv::Mat result = Perspective(img_input); 159 | 160 | PyObject* ret = cvt.toNDArray(result); 161 | 162 | return ret; 163 | } 164 | 165 | static void init() 166 | { 167 | Py_Initialize(); 168 | import_array(); 169 | } 170 | 171 | BOOST_PYTHON_MODULE(Augment) 172 | { 173 | init(); 174 | py::def("GenerateDistort", GenerateDistort); 175 | py::def("GenerateStretch", GenerateStretch); 176 | py::def("GeneratePerspective", GeneratePerspective); 177 | } 178 | -------------------------------------------------------------------------------- /src/conversion.cpp: -------------------------------------------------------------------------------- 1 | # include "conversion.h" 2 | /* 3 | * The following conversion functions are taken/adapted from OpenCV's cv2.cpp file 4 | * inside modules/python/src2 folder. 5 | */ 6 | 7 | // static void init() 8 | // { 9 | // import_array(); 10 | // } 11 | 12 | static int failmsg(const char *fmt, ...) 13 | { 14 | char str[1000]; 15 | 16 | va_list ap; 17 | va_start(ap, fmt); 18 | vsnprintf(str, sizeof(str), fmt, ap); 19 | va_end(ap); 20 | 21 | PyErr_SetString(PyExc_TypeError, str); 22 | return 0; 23 | } 24 | 25 | class PyAllowThreads 26 | { 27 | public: 28 | PyAllowThreads() : _state(PyEval_SaveThread()) {} 29 | ~PyAllowThreads() 30 | { 31 | PyEval_RestoreThread(_state); 32 | } 33 | private: 34 | PyThreadState* _state; 35 | }; 36 | 37 | class PyEnsureGIL 38 | { 39 | public: 40 | PyEnsureGIL() : _state(PyGILState_Ensure()) {} 41 | ~PyEnsureGIL() 42 | { 43 | PyGILState_Release(_state); 44 | } 45 | private: 46 | PyGILState_STATE _state; 47 | }; 48 | 49 | using namespace cv; 50 | 51 | // static PyObject* failmsgp(const char *fmt, ...) 52 | // { 53 | // char str[1000]; 54 | 55 | // va_list ap; 56 | // va_start(ap, fmt); 57 | // vsnprintf(str, sizeof(str), fmt, ap); 58 | // va_end(ap); 59 | 60 | // PyErr_SetString(PyExc_TypeError, str); 61 | // return 0; 62 | // } 63 | 64 | class NumpyAllocator : public MatAllocator 65 | { 66 | public: 67 | NumpyAllocator() {} 68 | ~NumpyAllocator() {} 69 | 70 | void allocate(int dims, const int* sizes, int type, int*& refcount, 71 | uchar*& datastart, uchar*& data, size_t* step) 72 | { 73 | PyEnsureGIL gil; 74 | 75 | int depth = CV_MAT_DEPTH(type); 76 | int cn = CV_MAT_CN(type); 77 | const int f = (int)(sizeof(size_t)/8); 78 | int typenum = depth == CV_8U ? NPY_UBYTE : depth == CV_8S ? NPY_BYTE : 79 | depth == CV_16U ? NPY_USHORT : depth == CV_16S ? NPY_SHORT : 80 | depth == CV_32S ? NPY_INT : depth == CV_32F ? NPY_FLOAT : 81 | depth == CV_64F ? NPY_DOUBLE : f*NPY_ULONGLONG + (f^1)*NPY_UINT; 82 | int i; 83 | npy_intp _sizes[CV_MAX_DIM+1]; 84 | for( i = 0; i < dims; i++ ) 85 | { 86 | _sizes[i] = sizes[i]; 87 | } 88 | 89 | if( cn > 1 ) 90 | { 91 | _sizes[dims++] = cn; 92 | } 93 | 94 | PyObject* o = PyArray_SimpleNew(dims, _sizes, typenum); 95 | 96 | if(!o) 97 | { 98 | CV_Error_(CV_StsError, ("The numpy array of typenum=%d, ndims=%d can not be created", typenum, dims)); 99 | } 100 | refcount = refcountFromPyObject(o); 101 | 102 | npy_intp* _strides = PyArray_STRIDES(o); 103 | for( i = 0; i < dims - (cn > 1); i++ ) 104 | step[i] = (size_t)_strides[i]; 105 | datastart = data = (uchar*)PyArray_DATA(o); 106 | } 107 | 108 | void deallocate(int* refcount, uchar*, uchar*) 109 | { 110 | PyEnsureGIL gil; 111 | if( !refcount ) 112 | return; 113 | PyObject* o = pyObjectFromRefcount(refcount); 114 | Py_INCREF(o); 115 | Py_DECREF(o); 116 | } 117 | }; 118 | 119 | NumpyAllocator g_numpyAllocator; 120 | 121 | NDArrayConverter::NDArrayConverter() { init(); } 122 | 123 | void NDArrayConverter::init() 124 | { 125 | import_array(); 126 | } 127 | 128 | cv::Mat NDArrayConverter::toMat(const PyObject *o) 129 | { 130 | cv::Mat m; 131 | 132 | if(!o || o == Py_None) 133 | { 134 | if( !m.data ) 135 | m.allocator = &g_numpyAllocator; 136 | } 137 | 138 | if( !PyArray_Check(o) ) 139 | { 140 | failmsg("toMat: Object is not a numpy array"); 141 | } 142 | 143 | int typenum = PyArray_TYPE(o); 144 | int type = typenum == NPY_UBYTE ? CV_8U : typenum == NPY_BYTE ? CV_8S : 145 | typenum == NPY_USHORT ? CV_16U : typenum == NPY_SHORT ? CV_16S : 146 | typenum == NPY_INT || typenum == NPY_LONG ? CV_32S : 147 | typenum == NPY_FLOAT ? CV_32F : 148 | typenum == NPY_DOUBLE ? CV_64F : -1; 149 | 150 | if( type < 0 ) 151 | { 152 | failmsg("toMat: Data type = %d is not supported", typenum); 153 | } 154 | 155 | int ndims = PyArray_NDIM(o); 156 | 157 | if(ndims >= CV_MAX_DIM) 158 | { 159 | failmsg("toMat: Dimensionality (=%d) is too high", ndims); 160 | } 161 | 162 | int size[CV_MAX_DIM+1]; 163 | size_t step[CV_MAX_DIM+1], elemsize = CV_ELEM_SIZE1(type); 164 | const npy_intp* _sizes = PyArray_DIMS(o); 165 | const npy_intp* _strides = PyArray_STRIDES(o); 166 | bool transposed = false; 167 | 168 | for(int i = 0; i < ndims; i++) 169 | { 170 | size[i] = (int)_sizes[i]; 171 | step[i] = (size_t)_strides[i]; 172 | } 173 | 174 | if( ndims == 0 || step[ndims-1] > elemsize ) { 175 | size[ndims] = 1; 176 | step[ndims] = elemsize; 177 | ndims++; 178 | } 179 | 180 | if( ndims >= 2 && step[0] < step[1] ) 181 | { 182 | std::swap(size[0], size[1]); 183 | std::swap(step[0], step[1]); 184 | transposed = true; 185 | } 186 | 187 | if( ndims == 3 && size[2] <= CV_CN_MAX && step[1] == elemsize*size[2] ) 188 | { 189 | ndims--; 190 | type |= CV_MAKETYPE(0, size[2]); 191 | } 192 | 193 | if( ndims > 2) 194 | { 195 | failmsg("toMat: Object has more than 2 dimensions"); 196 | } 197 | 198 | m = Mat(ndims, size, type, PyArray_DATA(o), step); 199 | 200 | if( m.data ) 201 | { 202 | m.refcount = refcountFromPyObject(o); 203 | m.addref(); // protect the original numpy array from deallocation 204 | // (since Mat destructor will decrement the reference counter) 205 | }; 206 | m.allocator = &g_numpyAllocator; 207 | 208 | if( transposed ) 209 | { 210 | Mat tmp; 211 | tmp.allocator = &g_numpyAllocator; 212 | transpose(m, tmp); 213 | m = tmp; 214 | } 215 | return m; 216 | } 217 | 218 | PyObject* NDArrayConverter::toNDArray(const cv::Mat& m) 219 | { 220 | if( !m.data ) 221 | Py_RETURN_NONE; 222 | Mat temp, *p = (Mat*)&m; 223 | if(!p->refcount || p->allocator != &g_numpyAllocator) 224 | { 225 | temp.allocator = &g_numpyAllocator; 226 | m.copyTo(temp); 227 | p = &temp; 228 | } 229 | p->addref(); 230 | return pyObjectFromRefcount(p->refcount); 231 | } 232 | -------------------------------------------------------------------------------- /src/delaunay.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * delaunay.cpp 3 | * aamlib-opencv 4 | * 5 | * Created by Chen Xing on 10-2-12. 6 | * Copyright 2010 __MyCompanyName__. All rights reserved. 7 | * 8 | */ 9 | 10 | #include "delaunay.h" 11 | using cv::Point2i; 12 | 13 | bool operator<(const Triangle &t1, const Triangle &t2) { 14 | for (int i = 0; i < 3; i++) { 15 | if ((t1.v[i].x < t2.v[i].x) || 16 | (t1.v[i].x == t2.v[i].x && t1.v[i].y < t2.v[i].y)) 17 | return true; 18 | else if (t1.v[i].x > t2.v[i].x) 19 | return false; 20 | else if (t1.v[i].x == t2.v[i].x && t1.v[i].y > t2.v[i].y) 21 | return false; 22 | } 23 | return false; 24 | } 25 | 26 | bool operator<(const TriangleInID &a, const TriangleInID &b) { 27 | return (a.v[0] < b.v[0]) || (a.v[0] == b.v[0] && a.v[1] < b.v[1]) || 28 | (a.v[0] == b.v[0] && a.v[1] == b.v[1] && a.v[2] < b.v[2]); 29 | } 30 | 31 | int pComp(const void *p1, const void *p2) { 32 | if (((Point_ *)p1)->x < ((Point_ *)p2)->x || 33 | (((Point_ *)p1)->x == ((Point_ *)p2)->x && 34 | ((Point_ *)p1)->y < ((Point_ *)p2)->y)) 35 | return -1; 36 | else 37 | return 1; 38 | } 39 | 40 | bool FindTriangleFromEdge(CvSubdiv2DEdge e, set &V) { 41 | CvSubdiv2DEdge t = e; 42 | Triangle triT; 43 | int iPointNum = 3; 44 | int j; 45 | 46 | for (j = 0; j < iPointNum; j++) { 47 | CvSubdiv2DPoint *pt = cvSubdiv2DEdgeOrg(t); 48 | if (!pt) break; 49 | triT.v[j] = cvPoint(cvRound(pt->pt.x), cvRound(pt->pt.y)); 50 | t = cvSubdiv2DGetEdge(t, CV_NEXT_AROUND_LEFT); 51 | } 52 | if (j == iPointNum) { 53 | qsort(triT.v, 3, sizeof(Point_), pComp); 54 | V.insert(triT); 55 | return true; 56 | } 57 | 58 | return false; 59 | } 60 | -------------------------------------------------------------------------------- /src/imgwarp_mls.cpp: -------------------------------------------------------------------------------- 1 | #include "imgwarp_mls.h" 2 | #include 3 | 4 | using cv::Vec3b; 5 | 6 | ImgWarp_MLS::ImgWarp_MLS() { gridSize = 100; } 7 | 8 | inline double bilinear_interp(double x, double y, double v11, double v12, 9 | double v21, double v22) { 10 | return (v11 * (1 - y) + v12 * y) * (1 - x) + (v21 * (1 - y) + v22 * y) * x; 11 | } 12 | 13 | Mat ImgWarp_MLS::setAllAndGenerate(const Mat &oriImg, 14 | const vector > &qsrc, 15 | const vector > &qdst, 16 | const int outW, const int outH, 17 | const double transRatio) { 18 | setSize(oriImg.cols, oriImg.rows); 19 | setTargetSize(outW, outH); 20 | setSrcPoints(qsrc); 21 | setDstPoints(qdst); 22 | calcDelta(); 23 | return genNewImg(oriImg, transRatio); 24 | } 25 | 26 | Mat ImgWarp_MLS::genNewImg(const Mat &oriImg, double transRatio) { 27 | int i, j; 28 | double di, dj; 29 | double nx, ny; 30 | int nxi, nyi, nxi1, nyi1; 31 | double deltaX, deltaY; 32 | double w, h; 33 | int ni, nj; 34 | 35 | Mat newImg(tarH, tarW, oriImg.type()); 36 | for (i = 0; i < tarH; i += gridSize) 37 | for (j = 0; j < tarW; j += gridSize) { 38 | ni = i + gridSize, nj = j + gridSize; 39 | w = h = gridSize; 40 | if (ni >= tarH) ni = tarH - 1, h = ni - i + 1; 41 | if (nj >= tarW) nj = tarW - 1, w = nj - j + 1; 42 | for (di = 0; di < h; di++) 43 | for (dj = 0; dj < w; dj++) { 44 | deltaX = 45 | bilinear_interp(di / h, dj / w, rDx(i, j), rDx(i, nj), 46 | rDx(ni, j), rDx(ni, nj)); 47 | deltaY = 48 | bilinear_interp(di / h, dj / w, rDy(i, j), rDy(i, nj), 49 | rDy(ni, j), rDy(ni, nj)); 50 | nx = j + dj + deltaX * transRatio; 51 | ny = i + di + deltaY * transRatio; 52 | if (nx > srcW - 1) nx = srcW - 1; 53 | if (ny > srcH - 1) ny = srcH - 1; 54 | if (nx < 0) nx = 0; 55 | if (ny < 0) ny = 0; 56 | nxi = int(nx); 57 | nyi = int(ny); 58 | nxi1 = ceil(nx); 59 | nyi1 = ceil(ny); 60 | 61 | if (oriImg.channels() == 1) 62 | newImg.at(i + di, j + dj) = bilinear_interp( 63 | ny - nyi, nx - nxi, oriImg.at(nyi, nxi), 64 | oriImg.at(nyi, nxi1), 65 | oriImg.at(nyi1, nxi), 66 | oriImg.at(nyi1, nxi1)); 67 | else { 68 | for (int ll = 0; ll < 3; ll++) 69 | newImg.at(i + di, j + dj)[ll] = 70 | bilinear_interp( 71 | ny - nyi, nx - nxi, 72 | oriImg.at(nyi, nxi)[ll], 73 | oriImg.at(nyi, nxi1)[ll], 74 | oriImg.at(nyi1, nxi)[ll], 75 | oriImg.at(nyi1, nxi1)[ll]); 76 | } 77 | } 78 | } 79 | return newImg; 80 | } 81 | 82 | // Set source points and prepare transformation matrices 83 | void ImgWarp_MLS::setSrcPoints(const vector > &qsrc) { 84 | nPoint = qsrc.size(); 85 | 86 | newDotL.clear(); 87 | newDotL.reserve(nPoint); 88 | 89 | for (size_t i = 0; i < qsrc.size(); i++) newDotL.push_back(qsrc[i]); 90 | } 91 | 92 | void ImgWarp_MLS::setDstPoints(const vector > &qdst) { 93 | nPoint = qdst.size(); 94 | oldDotL.clear(); 95 | oldDotL.reserve(nPoint); 96 | 97 | for (size_t i = 0; i < qdst.size(); i++) oldDotL.push_back(qdst[i]); 98 | } 99 | -------------------------------------------------------------------------------- /src/imgwarp_mls_rigid.cpp: -------------------------------------------------------------------------------- 1 | #include "imgwarp_mls_rigid.h" 2 | 3 | ImgWarp_MLS_Rigid::ImgWarp_MLS_Rigid() { preScale = false; } 4 | 5 | double calcArea(const vector > &V) { 6 | Point_ lt, rb; 7 | lt.x = lt.y = 1e10; 8 | rb.x = rb.y = -1e10; 9 | for (vector >::const_iterator i = V.begin(); i != V.end(); 10 | i++) { 11 | if (i->x < lt.x) lt.x = i->x; 12 | if (i->x > rb.x) rb.x = i->x; 13 | if (i->y < lt.y) lt.y = i->y; 14 | if (i->y > rb.y) rb.y = i->y; 15 | } 16 | return (rb.x - lt.x) * (rb.y - lt.y); 17 | } 18 | 19 | void ImgWarp_MLS_Rigid::calcDelta() { 20 | int i, j, k; 21 | 22 | Point_ swq, qstar, newP, tmpP; 23 | double sw; 24 | 25 | double ratio; 26 | 27 | if (preScale) { 28 | ratio = sqrt(calcArea(newDotL) / calcArea(oldDotL)); 29 | for (i = 0; i < nPoint; i++) newDotL[i] *= 1 / ratio; 30 | } 31 | 32 | double *w = new double[nPoint]; 33 | 34 | rDx.create(tarH, tarW); 35 | rDy.create(tarH, tarW); 36 | 37 | if (nPoint < 2) { 38 | rDx.setTo(0); 39 | rDy.setTo(0); 40 | return; 41 | } 42 | Point_ swp, pstar, curV, curVJ, Pi, PiJ, Qi; 43 | double miu_r; 44 | 45 | for (i = 0;; i += gridSize) { 46 | if (i >= tarW && i < tarW + gridSize - 1) 47 | i = tarW - 1; 48 | else if (i >= tarW) 49 | break; 50 | for (j = 0;; j += gridSize) { 51 | if (j >= tarH && j < tarH + gridSize - 1) 52 | j = tarH - 1; 53 | else if (j >= tarH) 54 | break; 55 | sw = 0; 56 | swp.x = swp.y = 0; 57 | swq.x = swq.y = 0; 58 | newP.x = newP.y = 0; 59 | curV.x = i; 60 | curV.y = j; 61 | for (k = 0; k < nPoint; k++) { 62 | if ((i == oldDotL[k].x) && j == oldDotL[k].y) break; 63 | if (alpha == 1) 64 | w[k] = 1 / ((i - oldDotL[k].x) * (i - oldDotL[k].x) + 65 | (j - oldDotL[k].y) * (j - oldDotL[k].y)); 66 | else 67 | w[k] = pow((i - oldDotL[k].x) * (i - oldDotL[k].x) + 68 | (j - oldDotL[k].y) * (j - oldDotL[k].y), 69 | -alpha); 70 | sw = sw + w[k]; 71 | swp = swp + w[k] * oldDotL[k]; 72 | swq = swq + w[k] * newDotL[k]; 73 | } 74 | if (k == nPoint) { 75 | pstar = (1 / sw) * swp; 76 | qstar = 1 / sw * swq; 77 | 78 | // Calc miu_r 79 | double s1 = 0, s2 = 0; 80 | for (k = 0; k < nPoint; k++) { 81 | if (i == oldDotL[k].x && j == oldDotL[k].y) continue; 82 | 83 | Pi = oldDotL[k] - pstar; 84 | PiJ.x = -Pi.y, PiJ.y = Pi.x; 85 | Qi = newDotL[k] - qstar; 86 | s1 += w[k] * Qi.dot(Pi); 87 | s2 += w[k] * Qi.dot(PiJ); 88 | } 89 | miu_r = sqrt(s1 * s1 + s2 * s2); 90 | 91 | curV -= pstar; 92 | curVJ.x = -curV.y, curVJ.y = curV.x; 93 | 94 | for (k = 0; k < nPoint; k++) { 95 | if (i == oldDotL[k].x && j == oldDotL[k].y) continue; 96 | 97 | Pi = oldDotL[k] - pstar; 98 | PiJ.x = -Pi.y, PiJ.y = Pi.x; 99 | 100 | tmpP.x = Pi.dot(curV) * newDotL[k].x - 101 | PiJ.dot(curV) * newDotL[k].y; 102 | tmpP.y = -Pi.dot(curVJ) * newDotL[k].x + 103 | PiJ.dot(curVJ) * newDotL[k].y; 104 | tmpP *= w[k] / miu_r; 105 | newP += tmpP; 106 | } 107 | newP += qstar; 108 | } else { 109 | newP = newDotL[k]; 110 | } 111 | 112 | if (preScale) { 113 | rDx(j, i) = newP.x * ratio - i; 114 | rDy(j, i) = newP.y * ratio - j; 115 | } else { 116 | rDx(j, i) = newP.x - i; 117 | rDy(j, i) = newP.y - j; 118 | } 119 | } 120 | } 121 | delete[] w; 122 | 123 | if (preScale) { 124 | for (i = 0; i < nPoint; i++) newDotL[i] *= ratio; 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/imgwarp_mls_similarity.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This library is free software; you can redistribute it and/or 3 | modify it under the terms of the GNU Library General Public 4 | License version 2 as published by the Free Software Foundation. 5 | 6 | This library is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 9 | Library General Public License for more details. 10 | 11 | You should have received a copy of the GNU Library General Public License 12 | along with this library; see the file COPYING.LIB. If not, write to 13 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 14 | Boston, MA 02110-1301, USA. 15 | */ 16 | 17 | #include "imgwarp_mls_similarity.h" 18 | 19 | void ImgWarp_MLS_Similarity::calcDelta() { 20 | int i, j, k; 21 | 22 | Point_ swq, qstar, newP, tmpP; 23 | double sw; 24 | 25 | double *w = new double[nPoint]; 26 | 27 | rDx.create(tarH, tarW); 28 | rDy.create(tarH, tarW); 29 | 30 | if (nPoint < 2) { 31 | rDx.setTo(0); 32 | rDy.setTo(0); 33 | return; 34 | } 35 | 36 | Point_ swp, pstar, curV, curVJ, Pi, PiJ; 37 | double miu_s; 38 | 39 | for (i = 0;; i += gridSize) { 40 | if (i >= tarW && i < tarW + gridSize - 1) 41 | i = tarW - 1; 42 | else if (i >= tarW) 43 | break; 44 | for (j = 0;; j += gridSize) { 45 | if (j >= tarH && j < tarH + gridSize - 1) 46 | j = tarH - 1; 47 | else if (j >= tarH) 48 | break; 49 | sw = 0; 50 | swp.x = swp.y = 0; 51 | swq.x = swq.y = 0; 52 | newP.x = newP.y = 0; 53 | curV.x = i; 54 | curV.y = j; 55 | for (k = 0; k < nPoint; k++) { 56 | if ((i == oldDotL[k].x) && j == oldDotL[k].y) break; 57 | /* w[k] = pow((i-oldDotL[k].x)*(i-oldDotL[k].x)+ 58 | (j-oldDotL[k].y)*(j-oldDotL[k].y), -alpha);*/ 59 | w[k] = 1 / ((i - oldDotL[k].x) * (i - oldDotL[k].x) + 60 | (j - oldDotL[k].y) * (j - oldDotL[k].y)); 61 | sw = sw + w[k]; 62 | swp = swp + w[k] * oldDotL[k]; 63 | swq = swq + w[k] * newDotL[k]; 64 | } 65 | if (k == nPoint) { 66 | pstar = (1 / sw) * swp; 67 | qstar = 1 / sw * swq; 68 | 69 | // Calc miu_s 70 | miu_s = 0; 71 | for (k = 0; k < nPoint; k++) { 72 | if (i == oldDotL[k].x && j == oldDotL[k].y) continue; 73 | 74 | Pi = oldDotL[k] - pstar; 75 | miu_s += w[k] * Pi.dot(Pi); 76 | } 77 | 78 | curV -= pstar; 79 | curVJ.x = -curV.y, curVJ.y = curV.x; 80 | 81 | for (k = 0; k < nPoint; k++) { 82 | if (i == oldDotL[k].x && j == oldDotL[k].y) continue; 83 | 84 | Pi = oldDotL[k] - pstar; 85 | PiJ.x = -Pi.y, PiJ.y = Pi.x; 86 | 87 | tmpP.x = Pi.dot(curV) * newDotL[k].x - 88 | PiJ.dot(curV) * newDotL[k].y; 89 | tmpP.y = -Pi.dot(curVJ) * newDotL[k].x + 90 | PiJ.dot(curVJ) * newDotL[k].y; 91 | tmpP *= w[k] / miu_s; 92 | newP += tmpP; 93 | } 94 | newP += qstar; 95 | } else { 96 | newP = newDotL[k]; 97 | } 98 | 99 | rDx(j, i) = newP.x - i; 100 | rDy(j, i) = newP.y - j; 101 | } 102 | } 103 | 104 | delete[] w; 105 | } 106 | -------------------------------------------------------------------------------- /src/imgwarp_piecewiseaffine.cpp: -------------------------------------------------------------------------------- 1 | #include "imgwarp_piecewiseaffine.h" 2 | #include "delaunay.h" 3 | 4 | #include "opencv/highgui.h" 5 | 6 | using cv::Point2d; 7 | 8 | ImgWarp_PieceWiseAffine::ImgWarp_PieceWiseAffine(void) { 9 | backGroundFillAlg = BGNone; 10 | } 11 | 12 | ImgWarp_PieceWiseAffine::~ImgWarp_PieceWiseAffine(void) {} 13 | 14 | Point_ ImgWarp_PieceWiseAffine::getMLSDelta(int x, int y) { 15 | static Point_ swq, qstar, newP, tmpP; 16 | double sw; 17 | 18 | static vector w; 19 | w.resize(nPoint); 20 | 21 | static Point_ swp, pstar, curV, curVJ, Pi, PiJ; 22 | double miu_s; 23 | 24 | int i = x; 25 | int j = y; 26 | int k; 27 | 28 | sw = 0; 29 | swp.x = swp.y = 0; 30 | swq.x = swq.y = 0; 31 | newP.x = newP.y = 0; 32 | curV.x = i; 33 | curV.y = j; 34 | for (k = 0; k < nPoint; k++) { 35 | if ((i == oldDotL[k].x) && j == oldDotL[k].y) break; 36 | /* w[k] = pow((i-oldDotL[k].x)*(i-oldDotL[k].x)+ 37 | (j-oldDotL[k].y)*(j-oldDotL[k].y), -alpha);*/ 38 | w[k] = 1 / ((i - oldDotL[k].x) * (i - oldDotL[k].x) + 39 | (j - oldDotL[k].y) * (j - oldDotL[k].y)); 40 | sw = sw + w[k]; 41 | swp = swp + w[k] * oldDotL[k]; 42 | swq = swq + w[k] * newDotL[k]; 43 | } 44 | if (k == nPoint) { 45 | pstar = (1 / sw) * swp; 46 | qstar = 1 / sw * swq; 47 | 48 | // Calc miu_s 49 | miu_s = 0; 50 | for (k = 0; k < nPoint; k++) { 51 | if (i == oldDotL[k].x && j == oldDotL[k].y) continue; 52 | 53 | Pi = oldDotL[k] - pstar; 54 | miu_s += w[k] * Pi.dot(Pi); 55 | } 56 | 57 | curV -= pstar; 58 | curVJ.x = -curV.y, curVJ.y = curV.x; 59 | 60 | for (k = 0; k < nPoint; k++) { 61 | if (i == oldDotL[k].x && j == oldDotL[k].y) continue; 62 | 63 | Pi = oldDotL[k] - pstar; 64 | PiJ.x = -Pi.y, PiJ.y = Pi.x; 65 | 66 | tmpP.x = Pi.dot(curV) * newDotL[k].x - PiJ.dot(curV) * newDotL[k].y; 67 | tmpP.y = 68 | -Pi.dot(curVJ) * newDotL[k].x + PiJ.dot(curVJ) * newDotL[k].y; 69 | tmpP *= w[k] / miu_s; 70 | newP += tmpP; 71 | } 72 | newP += qstar; 73 | } else { 74 | newP = newDotL[k]; 75 | } 76 | 77 | newP.x -= i; 78 | newP.y -= j; 79 | return newP; 80 | } 81 | 82 | void ImgWarp_PieceWiseAffine::calcDelta() { 83 | Mat_ imgLabel = Mat_::zeros(tarH, tarW); 84 | 85 | rDx = rDx.zeros(tarH, tarW); 86 | rDy = rDy.zeros(tarH, tarW); 87 | for (int i = 0; i < this->nPoint; i++) { 88 | //! Ignore points outside the target image 89 | if (oldDotL[i].x < 0) oldDotL[i].x = 0; 90 | if (oldDotL[i].y < 0) oldDotL[i].y = 0; 91 | if (oldDotL[i].x >= tarW) oldDotL[i].x = tarW - 1; 92 | if (oldDotL[i].y >= tarH) oldDotL[i].y = tarH - 1; 93 | 94 | rDx(oldDotL[i]) = newDotL[i].x - oldDotL[i].x; 95 | rDy(oldDotL[i]) = newDotL[i].y - oldDotL[i].y; 96 | } 97 | rDx(0, 0) = rDy(0, 0) = 0; 98 | rDx(tarH - 1, 0) = rDy(0, tarW - 1) = 0; 99 | rDy(tarH - 1, 0) = rDy(tarH - 1, tarW - 1) = srcH - tarH; 100 | rDx(0, tarW - 1) = rDx(tarH - 1, tarW - 1) = srcW - tarW; 101 | 102 | vector V; 103 | vector::iterator it; 104 | cv::Rect_ boundRect(0, 0, tarW, tarH); 105 | vector > oL1 = oldDotL; 106 | if (backGroundFillAlg == BGPieceWise) { 107 | oL1.push_back(Point2d(0, 0)); 108 | oL1.push_back(Point2d(0, tarH - 1)); 109 | oL1.push_back(Point2d(tarW - 1, 0)); 110 | oL1.push_back(Point2d(tarW - 1, tarH - 1)); 111 | } 112 | // In order preserv the background 113 | V = ::delaunayDiv(oL1, boundRect); 114 | 115 | // vector< TriangleInID > Vt; 116 | // // vector< Triangle >::iterator it; 117 | // // cv::Rect_ boundRect(0, 0, tarW, tarH); 118 | // Vt = ::delaunayDivInID(oldDotL, boundRect); 119 | Mat_ imgTmp = Mat_::zeros(tarH, tarW); 120 | for (it = V.begin(); it != V.end(); it++) { 121 | cv::line(imgTmp, it->v[0], it->v[1], 255, 1, CV_AA); 122 | cv::line(imgTmp, it->v[0], it->v[2], 255, 1, CV_AA); 123 | cv::line(imgTmp, it->v[2], it->v[1], 255, 1, CV_AA); 124 | 125 | // Not interested in points outside the region. 126 | if (!(it->v[0].inside(boundRect) && it->v[1].inside(boundRect) && 127 | it->v[2].inside(boundRect))) 128 | continue; 129 | 130 | cv::fillConvexPoly(imgLabel, it->v, 3, 131 | cv::Scalar_(it - V.begin() + 1)); 132 | } 133 | // imshow("imgTmp", imgTmp); 134 | // cvWaitKey(10); 135 | 136 | int i, j; 137 | 138 | Point_ v1, v2, curV; 139 | 140 | for (i = 0;; i += gridSize) { 141 | if (i >= tarW && i < tarW + gridSize - 1) 142 | i = tarW - 1; 143 | else if (i >= tarW) 144 | break; 145 | for (j = 0;; j += gridSize) { 146 | if (j >= tarH && j < tarH + gridSize - 1) 147 | j = tarH - 1; 148 | else if (j >= tarH) 149 | break; 150 | int tId = imgLabel(j, i) - 1; 151 | if (tId < 0) { 152 | if (backGroundFillAlg == BGMLS) { 153 | Point_ dV = getMLSDelta(i, j); 154 | rDx(j, i) = dV.x; 155 | rDy(j, i) = dV.y; 156 | } else { 157 | rDx(j, i) = -i; 158 | rDy(j, i) = -j; 159 | } 160 | continue; 161 | } 162 | v1 = V[tId].v[1] - V[tId].v[0]; 163 | v2 = V[tId].v[2] - V[tId].v[0]; 164 | curV.x = i, curV.y = j; 165 | curV -= V[tId].v[0]; 166 | 167 | double d0, d1, d2; 168 | d2 = double(v1.x * curV.y - curV.x * v1.y) / 169 | (v1.x * v2.y - v2.x * v1.y); 170 | d1 = double(v2.x * curV.y - curV.x * v2.y) / 171 | (v2.x * v1.y - v1.x * v2.y); 172 | d0 = 1 - d1 - d2; 173 | rDx(j, i) = d0 * rDx(V[tId].v[0]) + d1 * rDx(V[tId].v[1]) + 174 | d2 * rDx(V[tId].v[2]); 175 | rDy(j, i) = d0 * rDy(V[tId].v[0]) + d1 * rDy(V[tId].v[1]) + 176 | d2 * rDy(V[tId].v[2]); 177 | } 178 | } 179 | } 180 | --------------------------------------------------------------------------------