├── __init__.py ├── nets ├── __init__.py ├── NASNet │ ├── __init__.py │ ├── nasnet_utils_test.py │ ├── pnasnet_test.py │ └── pnasnet.py └── Inception_ResNet_V2 │ ├── __init__.py │ └── nets │ ├── __init__.py │ ├── mobilenet_v1.png │ ├── inception.py │ ├── nets_factory_test.py │ ├── inception_utils.py │ ├── lenet.py │ ├── cyclegan_test.py │ ├── dcgan_test.py │ ├── cifarnet.py │ ├── mobilenet_v1_eval.py │ ├── overfeat.py │ ├── alexnet.py │ ├── pix2pix_test.py │ ├── nets_factory.py │ ├── overfeat_test.py │ ├── alexnet_test.py │ └── dcgan.py ├── utils ├── __init__.py └── tools.py ├── augmentation ├── __init__.py ├── rotated_10.png ├── rotated_10.txt ├── data_agumentation.py └── test.py ├── lanms ├── .gitignore ├── include │ ├── clipper │ │ └── clipper.cpp │ └── pybind11 │ │ ├── typeid.h │ │ ├── complex.h │ │ ├── options.h │ │ ├── functional.h │ │ ├── eval.h │ │ ├── buffer_info.h │ │ ├── chrono.h │ │ └── embed.h ├── __main__.py ├── Makefile ├── __init__.py ├── adaptor.cpp ├── .ycm_extra_conf.py └── lanms.h ├── notice_sample ├── .idea ├── markdown-navigator │ └── profiles_settings.xml ├── vcs.xml ├── misc.xml ├── preferred-vcs.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── modules.xml ├── deployment.xml ├── webServers.xml ├── EAST-master.iml └── markdown-navigator.xml ├── requirements.txt ├── static └── css │ └── app.css ├── deploy.sh ├── train_flowchart.md ├── test_flowchart.md ├── locality_aware_nms.py ├── .gitignore ├── tools.py ├── data_util.py ├── readme.md ├── run_demo_server.py ├── eval.py └── cal_IoU_gt_py.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nets/NASNet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lanms/.gitignore: -------------------------------------------------------------------------------- 1 | adaptor.so 2 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notice_sample: -------------------------------------------------------------------------------- 1 | 10 2 | 17 3 | 32 4 | 43 -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /augmentation/rotated_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UpCoder/ICPR_TextDection/HEAD/augmentation/rotated_10.png -------------------------------------------------------------------------------- /lanms/include/clipper/clipper.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UpCoder/ICPR_TextDection/HEAD/lanms/include/clipper/clipper.cpp -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/mobilenet_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UpCoder/ICPR_TextDection/HEAD/nets/Inception_ResNet_V2/nets/mobilenet_v1.png -------------------------------------------------------------------------------- /.idea/markdown-navigator/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Shapely==1.5.13 2 | Flask==0.10.1 3 | matplotlib==1.5.1 4 | scipy==0.19.0 5 | plumbum==1.6.2 6 | numpy==1.12.1 7 | ipython==6.1.0 8 | Pillow==4.2.1 9 | -------------------------------------------------------------------------------- /static/css/app.css: -------------------------------------------------------------------------------- 1 | #app { 2 | padding: 20px; 3 | } 4 | 5 | #result .item { 6 | padding-bottom: 20px; 7 | } 8 | 9 | .form-content-container { 10 | padding-left: 20px; 11 | } 12 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p server_log 3 | gunicorn -w 3 run_demo_server:app -b 0.0.0.0:8769 -t 120 \ 4 | --error-logfile server_log/error.log \ 5 | --access-logfile server_log/access.log 6 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/preferred-vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Git 5 | 6 | 7 | -------------------------------------------------------------------------------- /train_flowchart.md: -------------------------------------------------------------------------------- 1 | graph TD; 2 | A[输入图像]-->B[基于ResNet的U-Net结构]; 3 | A-->G[计算ground truth] 4 | B-->C[使用卷积得到预测的pixel-based的像素信息] 5 | B-->D[使用卷积得到预测的集合信息] 6 | G-->E 7 | C-->E[和ground truth计算balanced cross-entropy loss] 8 | G-->F 9 | D-->F[和ground truth计算geometry loss] -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | -------------------------------------------------------------------------------- /lanms/__main__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | from . import merge_quadrangle_n9 5 | 6 | if __name__ == '__main__': 7 | # unit square with confidence 1 8 | q = np.array([0, 0, 0, 1, 1, 1, 1, 0, 1], dtype='float32') 9 | 10 | print(merge_quadrangle_n9(np.array([q, q + 0.1, q + 2]))) 11 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /test_flowchart.md: -------------------------------------------------------------------------------- 1 | graph TD; 2 | A[输入图像]-->B[基于ResNet的U-Net结构]; 3 | B-->C[使用卷积得到预测的pixel-based的像素信息] 4 | B-->D[使用卷积得到预测的几何信息] 5 | C-->E["挑选出大于阈值(比如说0.8)的pixel"] 6 | D-->F 7 | E-->F["选择满足阈值条件的pixel的score和几何信息"] 8 | F-->G[针对每个pixel计算得到一个bounding box] 9 | G-->H[针对上述得到的bounding box进行非最大抑制] 10 | H-->I[得到text的bounding box] -------------------------------------------------------------------------------- /lanms/Makefile: -------------------------------------------------------------------------------- 1 | CXXFLAGS = -I include -std=c++11 -O3 $(shell python2-config --cflags) 2 | LDFLAGS = $(shell python3-config --ldflags) 3 | 4 | DEPS = lanms.h $(shell find include -xtype f) 5 | CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp 6 | 7 | LIB_SO = adaptor.so 8 | 9 | $(LIB_SO): $(CXX_SOURCES) $(DEPS) 10 | $(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC 11 | 12 | clean: 13 | rm -rf $(LIB_SO) 14 | -------------------------------------------------------------------------------- /.idea/deployment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/webServers.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 14 | 15 | -------------------------------------------------------------------------------- /lanms/__init__.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | import numpy as np 4 | 5 | BASE_DIR = os.path.dirname(os.path.realpath(__file__)) 6 | 7 | if subprocess.call(['make', '-C', BASE_DIR]) != 0: # return value 8 | raise RuntimeError('Cannot compile lanms: {}'.format(BASE_DIR)) 9 | 10 | 11 | def merge_quadrangle_n9(polys, thres=0.3, precision=10000): 12 | from .adaptor import merge_quadrangle_n9 as nms_impl 13 | if len(polys) == 0: 14 | return np.array([], dtype='float32') 15 | p = polys.copy() 16 | p[:,:8] *= precision 17 | ret = np.array(nms_impl(p, thres), dtype='float32') 18 | ret[:,:8] /= precision 19 | return ret 20 | 21 | -------------------------------------------------------------------------------- /.idea/EAST-master.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 15 | 16 | 17 | 20 | -------------------------------------------------------------------------------- /augmentation/rotated_10.txt: -------------------------------------------------------------------------------- 1 | 101,3,101,9,194,9,194,3,### 2 | 109,63,104,47,221,5,221,26,找书:en55@ 3 | 0,34,1,52,127,8,121,0,n55@qq.com 4 | 0,84,0,101,72,77,70,60,604682 5 | 171,76,177,94,219,78,214,62,找书: 6 | 42,94,42,116,212,116,212,94,### 7 | 157,132,162,152,221,128,221,110,QQ:29 8 | 5,153,1,137,132,90,139,108,:2956604682 9 | 42,121,42,136,114,136,114,118,### 10 | 99,143,99,157,152,157,152,143,TheLCAO 11 | 101,160,101,174,180,174,180,160,FirstPrincipes 12 | 100,177,100,190,158,190,158,177,Treatment 13 | 99,192,99,209,157,209,157,192,### 14 | 0,190,0,208,125,167,120,152,h55@qq.com 15 | 0,240,3,256,193,191,188,178,找书:en55@qq.com 16 | 7,310,0,289,131,245,140,261,:2956604682 17 | 115,316,115,329,157,329,157,316,Springer 18 | 171,231,179,248,220,234,215,218,找书: 19 | 21,337,48,349,204,291,197,273,QQ:2956604682 20 | 150,346,148,349,190,349,185,336,### 21 | 101,72,101,83,169,83,169,72,R.A.Evarestov 22 | 11,0,11,0,0,0,0,0,### 23 | -------------------------------------------------------------------------------- /lanms/include/pybind11/typeid.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/typeid.h: Compiler-independent access to type identifiers 3 | 4 | Copyright (c) 2016 Wenzel Jakob 5 | 6 | All rights reserved. Use of this source code is governed by a 7 | BSD-style license that can be found in the LICENSE file. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include 13 | #include 14 | 15 | #if defined(__GNUG__) 16 | #include 17 | #endif 18 | 19 | NAMESPACE_BEGIN(pybind11) 20 | NAMESPACE_BEGIN(detail) 21 | /// Erase all occurrences of a substring 22 | inline void erase_all(std::string &string, const std::string &search) { 23 | for (size_t pos = 0;;) { 24 | pos = string.find(search, pos); 25 | if (pos == std::string::npos) break; 26 | string.erase(pos, search.length()); 27 | } 28 | } 29 | 30 | PYBIND11_NOINLINE inline void clean_type_id(std::string &name) { 31 | #if defined(__GNUG__) 32 | int status = 0; 33 | std::unique_ptr res { 34 | abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status), std::free }; 35 | if (status == 0) 36 | name = res.get(); 37 | #else 38 | detail::erase_all(name, "class "); 39 | detail::erase_all(name, "struct "); 40 | detail::erase_all(name, "enum "); 41 | #endif 42 | detail::erase_all(name, "pybind11::"); 43 | } 44 | NAMESPACE_END(detail) 45 | 46 | /// Return a string representation of a C++ type 47 | template static std::string type_id() { 48 | std::string name(typeid(T).name()); 49 | detail::clean_type_id(name); 50 | return name; 51 | } 52 | 53 | NAMESPACE_END(pybind11) 54 | -------------------------------------------------------------------------------- /locality_aware_nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from shapely.geometry import Polygon 3 | 4 | 5 | def intersection(g, p): 6 | g = Polygon(g[:8].reshape((4, 2))) 7 | p = Polygon(p[:8].reshape((4, 2))) 8 | if not g.is_valid or not p.is_valid: 9 | return 0 10 | inter = Polygon(g).intersection(Polygon(p)).area 11 | union = g.area + p.area - inter 12 | if union == 0: 13 | return 0 14 | else: 15 | return inter/union 16 | 17 | 18 | def weighted_merge(g, p): 19 | g[:8] = (g[8] * g[:8] + p[8] * p[:8])/(g[8] + p[8]) 20 | g[8] = (g[8] + p[8]) 21 | return g 22 | 23 | 24 | def standard_nms(S, thres): 25 | order = np.argsort(S[:, 8])[::-1] 26 | keep = [] 27 | while order.size > 0: 28 | i = order[0] 29 | keep.append(i) 30 | ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) 31 | 32 | inds = np.where(ovr <= thres)[0] 33 | order = order[inds+1] 34 | 35 | return S[keep] 36 | 37 | 38 | def nms_locality(polys, thres=0.3): 39 | ''' 40 | locality aware nms of EAST 41 | :param polys: a N*9 numpy array. first 8 coordinates, then prob 42 | :return: boxes after nms 43 | ''' 44 | S = [] 45 | p = None 46 | for g in polys: 47 | if p is not None and intersection(g, p) > thres: 48 | p = weighted_merge(g, p) 49 | else: 50 | if p is not None: 51 | S.append(p) 52 | p = g 53 | if p is not None: 54 | S.append(p) 55 | 56 | if len(S) == 0: 57 | return np.array([]) 58 | return standard_nms(np.array(S), thres) 59 | 60 | 61 | if __name__ == '__main__': 62 | # 343,350,448,135,474,143,369,359 63 | print(Polygon(np.array([[343, 350], [448, 135], 64 | [474, 143], [369, 359]])).area) 65 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/inception.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Brings all inception models under one namespace.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # pylint: disable=unused-import 22 | from nets.inception_resnet_v2 import inception_resnet_v2 23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope 24 | from nets.inception_resnet_v2 import inception_resnet_v2_base 25 | from nets.inception_v1 import inception_v1 26 | from nets.inception_v1 import inception_v1_arg_scope 27 | from nets.inception_v1 import inception_v1_base 28 | from nets.inception_v2 import inception_v2 29 | from nets.inception_v2 import inception_v2_arg_scope 30 | from nets.inception_v2 import inception_v2_base 31 | from nets.inception_v3 import inception_v3 32 | from nets.inception_v3 import inception_v3_arg_scope 33 | from nets.inception_v3 import inception_v3_base 34 | from nets.inception_v4 import inception_v4 35 | from nets.inception_v4 import inception_v4_arg_scope 36 | from nets.inception_v4 import inception_v4_base 37 | # pylint: enable=unused-import 38 | -------------------------------------------------------------------------------- /lanms/adaptor.cpp: -------------------------------------------------------------------------------- 1 | #include "pybind11/pybind11.h" 2 | #include "pybind11/numpy.h" 3 | #include "pybind11/stl.h" 4 | #include "pybind11/stl_bind.h" 5 | 6 | #include "lanms.h" 7 | 8 | namespace py = pybind11; 9 | 10 | 11 | namespace lanms_adaptor { 12 | 13 | std::vector> polys2floats(const std::vector &polys) { 14 | std::vector> ret; 15 | for (size_t i = 0; i < polys.size(); i ++) { 16 | auto &p = polys[i]; 17 | auto &poly = p.poly; 18 | ret.emplace_back(std::vector{ 19 | float(poly[0].X), float(poly[0].Y), 20 | float(poly[1].X), float(poly[1].Y), 21 | float(poly[2].X), float(poly[2].Y), 22 | float(poly[3].X), float(poly[3].Y), 23 | float(p.score), 24 | }); 25 | } 26 | 27 | return ret; 28 | } 29 | 30 | 31 | /** 32 | * 33 | * \param quad_n9 an n-by-9 numpy array, where first 8 numbers denote the 34 | * quadrangle, and the last one is the score 35 | * \param iou_threshold two quadrangles with iou score above this threshold 36 | * will be merged 37 | * 38 | * \return an n-by-9 numpy array, the merged quadrangles 39 | */ 40 | std::vector> merge_quadrangle_n9( 41 | py::array_t quad_n9, 42 | float iou_threshold) { 43 | auto pbuf = quad_n9.request(); 44 | if (pbuf.ndim != 2 || pbuf.shape[1] != 9) 45 | throw std::runtime_error("quadrangles must have a shape of (n, 9)"); 46 | auto n = pbuf.shape[0]; 47 | auto ptr = static_cast(pbuf.ptr); 48 | return polys2floats(lanms::merge_quadrangle_n9(ptr, n, iou_threshold)); 49 | } 50 | 51 | } 52 | 53 | PYBIND11_PLUGIN(adaptor) { 54 | py::module m("adaptor", "NMS"); 55 | 56 | m.def("merge_quadrangle_n9", &lanms_adaptor::merge_quadrangle_n9, 57 | "merge quadrangels"); 58 | 59 | return m.ptr(); 60 | } 61 | 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # vim swapfile 104 | *.swp 105 | 106 | # result files for demo 107 | static/results 108 | 109 | 110 | # tmp 111 | tmp/* 112 | 113 | demo_images/* 114 | demo_result/* 115 | templates/* 116 | training_samples/* 117 | 118 | east_icdar2015_resnet_v1_50_rbox (1).zip 119 | resnet_v1_50_2016_08_28.tar.gz 120 | 121 | log/* 122 | 123 | -------------------------------------------------------------------------------- /lanms/include/pybind11/complex.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/complex.h: Complex number support 3 | 4 | Copyright (c) 2016 Wenzel Jakob 5 | 6 | All rights reserved. Use of this source code is governed by a 7 | BSD-style license that can be found in the LICENSE file. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "pybind11.h" 13 | #include 14 | 15 | /// glibc defines I as a macro which breaks things, e.g., boost template names 16 | #ifdef I 17 | # undef I 18 | #endif 19 | 20 | NAMESPACE_BEGIN(pybind11) 21 | 22 | template struct format_descriptor, detail::enable_if_t::value>> { 23 | static constexpr const char c = format_descriptor::c; 24 | static constexpr const char value[3] = { 'Z', c, '\0' }; 25 | static std::string format() { return std::string(value); } 26 | }; 27 | 28 | template constexpr const char format_descriptor< 29 | std::complex, detail::enable_if_t::value>>::value[3]; 30 | 31 | NAMESPACE_BEGIN(detail) 32 | 33 | template struct is_fmt_numeric, detail::enable_if_t::value>> { 34 | static constexpr bool value = true; 35 | static constexpr int index = is_fmt_numeric::index + 3; 36 | }; 37 | 38 | template class type_caster> { 39 | public: 40 | bool load(handle src, bool convert) { 41 | if (!src) 42 | return false; 43 | if (!convert && !PyComplex_Check(src.ptr())) 44 | return false; 45 | Py_complex result = PyComplex_AsCComplex(src.ptr()); 46 | if (result.real == -1.0 && PyErr_Occurred()) { 47 | PyErr_Clear(); 48 | return false; 49 | } 50 | value = std::complex((T) result.real, (T) result.imag); 51 | return true; 52 | } 53 | 54 | static handle cast(const std::complex &src, return_value_policy /* policy */, handle /* parent */) { 55 | return PyComplex_FromDoubles((double) src.real(), (double) src.imag()); 56 | } 57 | 58 | PYBIND11_TYPE_CASTER(std::complex, _("complex")); 59 | }; 60 | NAMESPACE_END(detail) 61 | NAMESPACE_END(pybind11) 62 | -------------------------------------------------------------------------------- /lanms/include/pybind11/options.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/options.h: global settings that are configurable at runtime. 3 | 4 | Copyright (c) 2016 Wenzel Jakob 5 | 6 | All rights reserved. Use of this source code is governed by a 7 | BSD-style license that can be found in the LICENSE file. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "common.h" 13 | 14 | NAMESPACE_BEGIN(pybind11) 15 | 16 | class options { 17 | public: 18 | 19 | // Default RAII constructor, which leaves settings as they currently are. 20 | options() : previous_state(global_state()) {} 21 | 22 | // Class is non-copyable. 23 | options(const options&) = delete; 24 | options& operator=(const options&) = delete; 25 | 26 | // Destructor, which restores settings that were in effect before. 27 | ~options() { 28 | global_state() = previous_state; 29 | } 30 | 31 | // Setter methods (affect the global state): 32 | 33 | options& disable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = false; return *this; } 34 | 35 | options& enable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = true; return *this; } 36 | 37 | options& disable_function_signatures() & { global_state().show_function_signatures = false; return *this; } 38 | 39 | options& enable_function_signatures() & { global_state().show_function_signatures = true; return *this; } 40 | 41 | // Getter methods (return the global state): 42 | 43 | static bool show_user_defined_docstrings() { return global_state().show_user_defined_docstrings; } 44 | 45 | static bool show_function_signatures() { return global_state().show_function_signatures; } 46 | 47 | // This type is not meant to be allocated on the heap. 48 | void* operator new(size_t) = delete; 49 | 50 | private: 51 | 52 | struct state { 53 | bool show_user_defined_docstrings = true; //< Include user-supplied texts in docstrings. 54 | bool show_function_signatures = true; //< Include auto-generated function signatures in docstrings. 55 | }; 56 | 57 | static state &global_state() { 58 | static state instance; 59 | return instance; 60 | } 61 | 62 | state previous_state; 63 | }; 64 | 65 | NAMESPACE_END(pybind11) 66 | -------------------------------------------------------------------------------- /nets/NASNet/nasnet_utils_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.nasnet.nasnet_utils.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | 24 | from nets.NASNet import nasnet_utils 25 | 26 | 27 | class NasnetUtilsTest(tf.test.TestCase): 28 | 29 | def testCalcReductionLayers(self): 30 | num_cells = 18 31 | num_reduction_layers = 2 32 | reduction_layers = nasnet_utils.calc_reduction_layers( 33 | num_cells, num_reduction_layers) 34 | self.assertEqual(len(reduction_layers), 2) 35 | self.assertEqual(reduction_layers[0], 6) 36 | self.assertEqual(reduction_layers[1], 12) 37 | 38 | def testGetChannelIndex(self): 39 | data_formats = ['NHWC', 'NCHW'] 40 | for data_format in data_formats: 41 | index = nasnet_utils.get_channel_index(data_format) 42 | correct_index = 3 if data_format == 'NHWC' else 1 43 | self.assertEqual(index, correct_index) 44 | 45 | def testGetChannelDim(self): 46 | data_formats = ['NHWC', 'NCHW'] 47 | shape = [10, 20, 30, 40] 48 | for data_format in data_formats: 49 | dim = nasnet_utils.get_channel_dim(shape, data_format) 50 | correct_dim = shape[3] if data_format == 'NHWC' else shape[1] 51 | self.assertEqual(dim, correct_dim) 52 | 53 | def testGlobalAvgPool(self): 54 | data_formats = ['NHWC', 'NCHW'] 55 | inputs = tf.placeholder(tf.float32, (5, 10, 20, 10)) 56 | for data_format in data_formats: 57 | output = nasnet_utils.global_avg_pool( 58 | inputs, data_format) 59 | self.assertEqual(output.shape, [5, 10]) 60 | 61 | 62 | if __name__ == '__main__': 63 | tf.test.main() 64 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/nets_factory_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for slim.inception.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | 23 | import tensorflow as tf 24 | 25 | from nets import nets_factory 26 | 27 | 28 | class NetworksTest(tf.test.TestCase): 29 | 30 | def testGetNetworkFnFirstHalf(self): 31 | batch_size = 5 32 | num_classes = 1000 33 | for net in list(nets_factory.networks_map.keys())[:10]: 34 | with tf.Graph().as_default() as g, self.test_session(g): 35 | net_fn = nets_factory.get_network_fn(net, num_classes) 36 | # Most networks use 224 as their default_image_size 37 | image_size = getattr(net_fn, 'default_image_size', 224) 38 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) 39 | logits, end_points = net_fn(inputs) 40 | self.assertTrue(isinstance(logits, tf.Tensor)) 41 | self.assertTrue(isinstance(end_points, dict)) 42 | self.assertEqual(logits.get_shape().as_list()[0], batch_size) 43 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes) 44 | 45 | def testGetNetworkFnSecondHalf(self): 46 | batch_size = 5 47 | num_classes = 1000 48 | for net in list(nets_factory.networks_map.keys())[10:]: 49 | with tf.Graph().as_default() as g, self.test_session(g): 50 | net_fn = nets_factory.get_network_fn(net, num_classes) 51 | # Most networks use 224 as their default_image_size 52 | image_size = getattr(net_fn, 'default_image_size', 224) 53 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) 54 | logits, end_points = net_fn(inputs) 55 | self.assertTrue(isinstance(logits, tf.Tensor)) 56 | self.assertTrue(isinstance(end_points, dict)) 57 | self.assertEqual(logits.get_shape().as_list()[0], batch_size) 58 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes) 59 | 60 | if __name__ == '__main__': 61 | tf.test.main() 62 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/inception_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains common code shared by all inception models. 16 | 17 | Usage of arg scope: 18 | with slim.arg_scope(inception_arg_scope()): 19 | logits, end_points = inception.inception_v3(images, num_classes, 20 | is_training=is_training) 21 | 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | 29 | slim = tf.contrib.slim 30 | 31 | 32 | def inception_arg_scope(weight_decay=0.00004, 33 | use_batch_norm=True, 34 | batch_norm_decay=0.9997, 35 | batch_norm_epsilon=0.001, 36 | activation_fn=tf.nn.relu): 37 | """Defines the default arg scope for inception models. 38 | 39 | Args: 40 | weight_decay: The weight decay to use for regularizing the model. 41 | use_batch_norm: "If `True`, batch_norm is applied after each convolution. 42 | batch_norm_decay: Decay for batch norm moving average. 43 | batch_norm_epsilon: Small float added to variance to avoid dividing by zero 44 | in batch norm. 45 | activation_fn: Activation function for conv2d. 46 | 47 | Returns: 48 | An `arg_scope` to use for the inception models. 49 | """ 50 | batch_norm_params = { 51 | # Decay for the moving averages. 52 | 'decay': batch_norm_decay, 53 | # epsilon to prevent 0s in variance. 54 | 'epsilon': batch_norm_epsilon, 55 | # collection containing update_ops. 56 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 57 | # use fused batch norm if possible. 58 | 'fused': None, 59 | } 60 | if use_batch_norm: 61 | normalizer_fn = slim.batch_norm 62 | normalizer_params = batch_norm_params 63 | else: 64 | normalizer_fn = None 65 | normalizer_params = {} 66 | # Set weight_decay for weights in Conv and FC layers. 67 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 68 | weights_regularizer=slim.l2_regularizer(weight_decay)): 69 | with slim.arg_scope( 70 | [slim.conv2d], 71 | weights_initializer=slim.variance_scaling_initializer(), 72 | activation_fn=activation_fn, 73 | normalizer_fn=normalizer_fn, 74 | normalizer_params=normalizer_params) as sc: 75 | return sc 76 | -------------------------------------------------------------------------------- /lanms/include/pybind11/functional.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/functional.h: std::function<> support 3 | 4 | Copyright (c) 2016 Wenzel Jakob 5 | 6 | All rights reserved. Use of this source code is governed by a 7 | BSD-style license that can be found in the LICENSE file. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "pybind11.h" 13 | #include 14 | 15 | NAMESPACE_BEGIN(pybind11) 16 | NAMESPACE_BEGIN(detail) 17 | 18 | template 19 | struct type_caster> { 20 | using type = std::function; 21 | using retval_type = conditional_t::value, void_type, Return>; 22 | using function_type = Return (*) (Args...); 23 | 24 | public: 25 | bool load(handle src, bool convert) { 26 | if (src.is_none()) { 27 | // Defer accepting None to other overloads (if we aren't in convert mode): 28 | if (!convert) return false; 29 | return true; 30 | } 31 | 32 | if (!isinstance(src)) 33 | return false; 34 | 35 | auto func = reinterpret_borrow(src); 36 | 37 | /* 38 | When passing a C++ function as an argument to another C++ 39 | function via Python, every function call would normally involve 40 | a full C++ -> Python -> C++ roundtrip, which can be prohibitive. 41 | Here, we try to at least detect the case where the function is 42 | stateless (i.e. function pointer or lambda function without 43 | captured variables), in which case the roundtrip can be avoided. 44 | */ 45 | if (auto cfunc = func.cpp_function()) { 46 | auto c = reinterpret_borrow(PyCFunction_GET_SELF(cfunc.ptr())); 47 | auto rec = (function_record *) c; 48 | 49 | if (rec && rec->is_stateless && 50 | same_type(typeid(function_type), *reinterpret_cast(rec->data[1]))) { 51 | struct capture { function_type f; }; 52 | value = ((capture *) &rec->data)->f; 53 | return true; 54 | } 55 | } 56 | 57 | value = [func](Args... args) -> Return { 58 | gil_scoped_acquire acq; 59 | object retval(func(std::forward(args)...)); 60 | /* Visual studio 2015 parser issue: need parentheses around this expression */ 61 | return (retval.template cast()); 62 | }; 63 | return true; 64 | } 65 | 66 | template 67 | static handle cast(Func &&f_, return_value_policy policy, handle /* parent */) { 68 | if (!f_) 69 | return none().inc_ref(); 70 | 71 | auto result = f_.template target(); 72 | if (result) 73 | return cpp_function(*result, policy).release(); 74 | else 75 | return cpp_function(std::forward(f_), policy).release(); 76 | } 77 | 78 | PYBIND11_TYPE_CASTER(type, _("Callable[[") + 79 | argument_loader::arg_names() + _("], ") + 80 | make_caster::name() + 81 | _("]")); 82 | }; 83 | 84 | NAMESPACE_END(detail) 85 | NAMESPACE_END(pybind11) 86 | -------------------------------------------------------------------------------- /tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from PIL import Image 3 | import numpy as np 4 | import cv2 5 | import sys 6 | from utils.tools import draw_rect, order_points 7 | from icdar import expand_poly 8 | sys.setrecursionlimit(300000000) 9 | 10 | 11 | def show_image(image_arr): 12 | img = Image.fromarray(image_arr) 13 | img.show() 14 | 15 | def find_connected(score_map, threshold=0.7): 16 | binary_map = (score_map > threshold).astype(np.uint8) 17 | connectivity = 8 18 | output = cv2.connectedComponentsWithStats(binary_map, connectivity=connectivity, ltype=cv2.CV_32S) 19 | label_map = output[1] 20 | # show_image(np.asarray(label_map * 100.0, np.uint8)) 21 | return np.max(label_map), label_map 22 | 23 | 24 | def calculate_boundingbox_score(score_map, threshold=0.7): 25 | # score_map = score_map[::-1, :] 26 | score_map[score_map < threshold] = 0.0 27 | h, w = np.shape(score_map) 28 | # show_image(np.asarray(score_map * 255, np.uint8)) 29 | flag = np.zeros([h, w]) 30 | boundingboxs = [] 31 | rects = [] 32 | count_connecter, label_map = find_connected(score_map, threshold) 33 | label_map = np.array(label_map) 34 | bbox_image = np.zeros(np.shape(label_map), np.uint8) 35 | expand_image = np.zeros(np.shape(label_map), np.uint8) 36 | for idx in range(1, count_connecter+1): 37 | connected = np.array(np.where(label_map == idx)).transpose((1, 0)) 38 | rect = cv2.minAreaRect(np.array(connected)) 39 | rects.append(rect) 40 | bbox = order_points(cv2.boxPoints(rect)[:, ::-1]) 41 | r = [None, None, None, None] 42 | for i in range(4): 43 | r[i] = min(np.linalg.norm(bbox[i] - bbox[(i + 1) % 4]), 44 | np.linalg.norm(bbox[i] - bbox[(i - 1) % 4])) 45 | expand_bbox = expand_poly(bbox.copy(), r).astype(np.int32) 46 | boundingboxs.append(expand_bbox) 47 | cur_points = [] 48 | expand_points = [] 49 | for i in range(4): 50 | for j in range(2): 51 | cur_points.append(bbox[i, j]) 52 | expand_points.append(expand_bbox[i, j]) 53 | expand_image = draw_rect(expand_image, expand_points) 54 | bbox_image = draw_rect(bbox_image, cur_points) 55 | 56 | for i in range(len(rects)): 57 | for j in range(len(rects)): 58 | if i == j: 59 | continue 60 | rect1 = rects[i] 61 | rect2 = rects[j] 62 | theta1 = rect1[2] 63 | theta2 = rect2[2] 64 | if abs(theta1 - theta2) < 5: 65 | center1 = rect1[0] 66 | center2 = rect2[1] 67 | center_distance = (center1[0] - center2[0])**2 + (center1[1] - center2[1])**2 68 | # dis_sub_width = center_distance - rect1[1][] 69 | # print 'ok' 70 | points = [] 71 | for bbox in boundingboxs: 72 | cur_points = [] 73 | for i in range(4): 74 | for j in range(2): 75 | cur_points.append(bbox[i, j]) 76 | points.append(cur_points) 77 | return np.array(points) 78 | 79 | if __name__ == '__main__': 80 | test = np.zeros([100, 100]) 81 | test[10:21, 10:81] = 1.0 82 | 83 | test[30:40, 10:81] = 1.0 84 | 85 | test[42:50, 10:81] = 1.0 86 | 87 | test[62:70, 10:81] = 1.0 88 | 89 | test[82:90, 10:81] = 1.0 90 | find_connected(test) 91 | show_image(np.asarray(test * 255, np.uint8)) 92 | # xys = np.argwhere(test != 0) 93 | # rect = cv2.minAreaRect(xys) 94 | # print rect -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/lenet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the LeNet model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | 26 | def lenet(images, num_classes=10, is_training=False, 27 | dropout_keep_prob=0.5, 28 | prediction_fn=slim.softmax, 29 | scope='LeNet'): 30 | """Creates a variant of the LeNet model. 31 | 32 | Note that since the output is a set of 'logits', the values fall in the 33 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 34 | probability distribution over the characters, one will need to convert them 35 | using the softmax function: 36 | 37 | logits = lenet.lenet(images, is_training=False) 38 | probabilities = tf.nn.softmax(logits) 39 | predictions = tf.argmax(logits, 1) 40 | 41 | Args: 42 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 43 | num_classes: the number of classes in the dataset. If 0 or None, the logits 44 | layer is omitted and the input features to the logits layer are returned 45 | instead. 46 | is_training: specifies whether or not we're currently training the model. 47 | This variable will determine the behaviour of the dropout layer. 48 | dropout_keep_prob: the percentage of activation values that are retained. 49 | prediction_fn: a function to get predictions out of logits. 50 | scope: Optional variable_scope. 51 | 52 | Returns: 53 | net: a 2D Tensor with the logits (pre-softmax activations) if num_classes 54 | is a non-zero integer, or the inon-dropped-out nput to the logits layer 55 | if num_classes is 0 or None. 56 | end_points: a dictionary from components of the network to the corresponding 57 | activation. 58 | """ 59 | end_points = {} 60 | 61 | with tf.variable_scope(scope, 'LeNet', [images]): 62 | net = end_points['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1') 63 | net = end_points['pool1'] = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 64 | net = end_points['conv2'] = slim.conv2d(net, 64, [5, 5], scope='conv2') 65 | net = end_points['pool2'] = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 66 | net = slim.flatten(net) 67 | end_points['Flatten'] = net 68 | 69 | net = end_points['fc3'] = slim.fully_connected(net, 1024, scope='fc3') 70 | if not num_classes: 71 | return net, end_points 72 | net = end_points['dropout3'] = slim.dropout( 73 | net, dropout_keep_prob, is_training=is_training, scope='dropout3') 74 | logits = end_points['Logits'] = slim.fully_connected( 75 | net, num_classes, activation_fn=None, scope='fc4') 76 | 77 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 78 | 79 | return logits, end_points 80 | lenet.default_image_size = 28 81 | 82 | 83 | def lenet_arg_scope(weight_decay=0.0): 84 | """Defines the default lenet argument scope. 85 | 86 | Args: 87 | weight_decay: The weight decay to use for regularizing the model. 88 | 89 | Returns: 90 | An `arg_scope` to use for the inception v3 model. 91 | """ 92 | with slim.arg_scope( 93 | [slim.conv2d, slim.fully_connected], 94 | weights_regularizer=slim.l2_regularizer(weight_decay), 95 | weights_initializer=tf.truncated_normal_initializer(stddev=0.1), 96 | activation_fn=tf.nn.relu) as sc: 97 | return sc 98 | -------------------------------------------------------------------------------- /.idea/markdown-navigator.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 36 | 37 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /lanms/include/pybind11/eval.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/exec.h: Support for evaluating Python expressions and statements 3 | from strings and files 4 | 5 | Copyright (c) 2016 Klemens Morgenstern and 6 | Wenzel Jakob 7 | 8 | All rights reserved. Use of this source code is governed by a 9 | BSD-style license that can be found in the LICENSE file. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "pybind11.h" 15 | 16 | NAMESPACE_BEGIN(pybind11) 17 | 18 | enum eval_mode { 19 | /// Evaluate a string containing an isolated expression 20 | eval_expr, 21 | 22 | /// Evaluate a string containing a single statement. Returns \c none 23 | eval_single_statement, 24 | 25 | /// Evaluate a string containing a sequence of statement. Returns \c none 26 | eval_statements 27 | }; 28 | 29 | template 30 | object eval(str expr, object global = globals(), object local = object()) { 31 | if (!local) 32 | local = global; 33 | 34 | /* PyRun_String does not accept a PyObject / encoding specifier, 35 | this seems to be the only alternative */ 36 | std::string buffer = "# -*- coding: utf-8 -*-\n" + (std::string) expr; 37 | 38 | int start; 39 | switch (mode) { 40 | case eval_expr: start = Py_eval_input; break; 41 | case eval_single_statement: start = Py_single_input; break; 42 | case eval_statements: start = Py_file_input; break; 43 | default: pybind11_fail("invalid evaluation mode"); 44 | } 45 | 46 | PyObject *result = PyRun_String(buffer.c_str(), start, global.ptr(), local.ptr()); 47 | if (!result) 48 | throw error_already_set(); 49 | return reinterpret_steal(result); 50 | } 51 | 52 | template 53 | object eval(const char (&s)[N], object global = globals(), object local = object()) { 54 | /* Support raw string literals by removing common leading whitespace */ 55 | auto expr = (s[0] == '\n') ? str(module::import("textwrap").attr("dedent")(s)) 56 | : str(s); 57 | return eval(expr, global, local); 58 | } 59 | 60 | inline void exec(str expr, object global = globals(), object local = object()) { 61 | eval(expr, global, local); 62 | } 63 | 64 | template 65 | void exec(const char (&s)[N], object global = globals(), object local = object()) { 66 | eval(s, global, local); 67 | } 68 | 69 | template 70 | object eval_file(str fname, object global = globals(), object local = object()) { 71 | if (!local) 72 | local = global; 73 | 74 | int start; 75 | switch (mode) { 76 | case eval_expr: start = Py_eval_input; break; 77 | case eval_single_statement: start = Py_single_input; break; 78 | case eval_statements: start = Py_file_input; break; 79 | default: pybind11_fail("invalid evaluation mode"); 80 | } 81 | 82 | int closeFile = 1; 83 | std::string fname_str = (std::string) fname; 84 | #if PY_VERSION_HEX >= 0x03040000 85 | FILE *f = _Py_fopen_obj(fname.ptr(), "r"); 86 | #elif PY_VERSION_HEX >= 0x03000000 87 | FILE *f = _Py_fopen(fname.ptr(), "r"); 88 | #else 89 | /* No unicode support in open() :( */ 90 | auto fobj = reinterpret_steal(PyFile_FromString( 91 | const_cast(fname_str.c_str()), 92 | const_cast("r"))); 93 | FILE *f = nullptr; 94 | if (fobj) 95 | f = PyFile_AsFile(fobj.ptr()); 96 | closeFile = 0; 97 | #endif 98 | if (!f) { 99 | PyErr_Clear(); 100 | pybind11_fail("File \"" + fname_str + "\" could not be opened!"); 101 | } 102 | 103 | #if PY_VERSION_HEX < 0x03000000 && defined(PYPY_VERSION) 104 | PyObject *result = PyRun_File(f, fname_str.c_str(), start, global.ptr(), 105 | local.ptr()); 106 | (void) closeFile; 107 | #else 108 | PyObject *result = PyRun_FileEx(f, fname_str.c_str(), start, global.ptr(), 109 | local.ptr(), closeFile); 110 | #endif 111 | 112 | if (!result) 113 | throw error_already_set(); 114 | return reinterpret_steal(result); 115 | } 116 | 117 | NAMESPACE_END(pybind11) 118 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/cyclegan_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for tensorflow.contrib.slim.nets.cyclegan.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | from nets import cyclegan 24 | 25 | 26 | # TODO(joelshor): Add a test to check generator endpoints. 27 | class CycleganTest(tf.test.TestCase): 28 | 29 | def test_generator_inference(self): 30 | """Check one inference step.""" 31 | img_batch = tf.zeros([2, 32, 32, 3]) 32 | model_output, _ = cyclegan.cyclegan_generator_resnet(img_batch) 33 | with self.test_session() as sess: 34 | sess.run(tf.global_variables_initializer()) 35 | sess.run(model_output) 36 | 37 | def _test_generator_graph_helper(self, shape): 38 | """Check that generator can take small and non-square inputs.""" 39 | output_imgs, _ = cyclegan.cyclegan_generator_resnet(tf.ones(shape)) 40 | self.assertAllEqual(shape, output_imgs.shape.as_list()) 41 | 42 | def test_generator_graph_small(self): 43 | self._test_generator_graph_helper([4, 32, 32, 3]) 44 | 45 | def test_generator_graph_medium(self): 46 | self._test_generator_graph_helper([3, 128, 128, 3]) 47 | 48 | def test_generator_graph_nonsquare(self): 49 | self._test_generator_graph_helper([2, 80, 400, 3]) 50 | 51 | def test_generator_unknown_batch_dim(self): 52 | """Check that generator can take unknown batch dimension inputs.""" 53 | img = tf.placeholder(tf.float32, shape=[None, 32, None, 3]) 54 | output_imgs, _ = cyclegan.cyclegan_generator_resnet(img) 55 | 56 | self.assertAllEqual([None, 32, None, 3], output_imgs.shape.as_list()) 57 | 58 | def _input_and_output_same_shape_helper(self, kernel_size): 59 | img_batch = tf.placeholder(tf.float32, shape=[None, 32, 32, 3]) 60 | output_img_batch, _ = cyclegan.cyclegan_generator_resnet( 61 | img_batch, kernel_size=kernel_size) 62 | 63 | self.assertAllEqual(img_batch.shape.as_list(), 64 | output_img_batch.shape.as_list()) 65 | 66 | def input_and_output_same_shape_kernel3(self): 67 | self._input_and_output_same_shape_helper(3) 68 | 69 | def input_and_output_same_shape_kernel4(self): 70 | self._input_and_output_same_shape_helper(4) 71 | 72 | def input_and_output_same_shape_kernel5(self): 73 | self._input_and_output_same_shape_helper(5) 74 | 75 | def input_and_output_same_shape_kernel6(self): 76 | self._input_and_output_same_shape_helper(6) 77 | 78 | def _error_if_height_not_multiple_of_four_helper(self, height): 79 | self.assertRaisesRegexp( 80 | ValueError, 81 | 'The input height must be a multiple of 4.', 82 | cyclegan.cyclegan_generator_resnet, 83 | tf.placeholder(tf.float32, shape=[None, height, 32, 3])) 84 | 85 | def test_error_if_height_not_multiple_of_four_height29(self): 86 | self._error_if_height_not_multiple_of_four_helper(29) 87 | 88 | def test_error_if_height_not_multiple_of_four_height30(self): 89 | self._error_if_height_not_multiple_of_four_helper(30) 90 | 91 | def test_error_if_height_not_multiple_of_four_height31(self): 92 | self._error_if_height_not_multiple_of_four_helper(31) 93 | 94 | def _error_if_width_not_multiple_of_four_helper(self, width): 95 | self.assertRaisesRegexp( 96 | ValueError, 97 | 'The input width must be a multiple of 4.', 98 | cyclegan.cyclegan_generator_resnet, 99 | tf.placeholder(tf.float32, shape=[None, 32, width, 3])) 100 | 101 | def test_error_if_width_not_multiple_of_four_width29(self): 102 | self._error_if_width_not_multiple_of_four_helper(29) 103 | 104 | def test_error_if_width_not_multiple_of_four_width30(self): 105 | self._error_if_width_not_multiple_of_four_helper(30) 106 | 107 | def test_error_if_width_not_multiple_of_four_width31(self): 108 | self._error_if_width_not_multiple_of_four_helper(31) 109 | 110 | 111 | if __name__ == '__main__': 112 | tf.test.main() 113 | -------------------------------------------------------------------------------- /augmentation/data_agumentation.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from utils.tools import read_from_gt 3 | import imutils 4 | import numpy as np 5 | import imgaug as ia 6 | from imgaug import augmenters as iaa 7 | import time 8 | from utils.tools import save_gt_file, vis_img_bbox 9 | 10 | 11 | def data_agumentation(img, gt_bbox, operation_obj, txts=None, save_flag=None): 12 | ia.seed(int((time.time() * 1000) % 100000)) 13 | shape = np.shape(gt_bbox) 14 | [h, w, _] = np.shape(img) 15 | if shape[1] == 8: 16 | bboxes = np.reshape(gt_bbox, [-1, 4, 2]) 17 | else: 18 | bboxes = gt_bbox 19 | keypoints_on_images = [] 20 | keypoints_imgaug_obj = [] 21 | # print bboxes 22 | # print np.shape(bboxes) 23 | for key_points in bboxes: 24 | # print key_points 25 | for key_point in key_points: 26 | keypoints_imgaug_obj.append(ia.Keypoint(x=key_point[0], y=key_point[1])) 27 | keypoints_on_images.append(ia.KeypointsOnImage(keypoints_imgaug_obj, shape=img.shape)) 28 | 29 | seq_det = operation_obj.to_deterministic() 30 | 31 | img_aug = seq_det.augment_image(img) 32 | key_points_aug = seq_det.augment_keypoints(keypoints_on_images) 33 | key_points_after = [] 34 | for idx, (keypoints_before, keypoints_after) in enumerate(zip(keypoints_on_images, key_points_aug)): 35 | for kp_idx, keypoint in enumerate(keypoints_after.keypoints): 36 | keypoint.x = keypoint.x if keypoint.x < w else w 37 | keypoint.x = keypoint.x if keypoint.x > 0 else 0 38 | keypoint.y = keypoint.y if keypoint.y < h else h 39 | keypoint.y = keypoint.y if keypoint.y > 0 else 0 40 | key_points_after.append([keypoint.x, keypoint.y]) 41 | # print np.shape(key_points_after) 42 | key_points_after = np.reshape(key_points_after, [-1, 4, 2]) 43 | if save_flag: 44 | save_gt_file('./rotated_10.txt', np.reshape(key_points_after, [-1, 8]), txts=txts) 45 | cv2.imwrite('./rotated_10.png', img_aug) 46 | vis_img_bbox('./rotated_10.png', './rotated_10.txt') 47 | return img_aug, np.asarray(key_points_after, np.float32) 48 | 49 | if __name__ == '__main__': 50 | 51 | 52 | # using imgaug package 53 | import random 54 | img = cv2.imread('/home/give/Game/OCR/data/ICPR/rename/100/image_100/10.png') 55 | gt_data = read_from_gt('/home/give/Game/OCR/data/ICPR/rename/100/txt_100/10.txt') 56 | coords = gt_data[0] 57 | bboxes = np.reshape(coords, [-1, 4, 2]) 58 | 59 | angle = np.random.random() * 90 60 | operation_obj = iaa.Crop(px=(0, 10), random_state=np.random.randint(0, 10000)) 61 | # operation_obj = iaa.Affine(rotate=(-angle, angle)) 62 | # operation_obj = iaa.Sequential([iaa.Flipud(1.0)]) 63 | # operation_obj = iaa.Sequential([iaa.Fliplr(1.0)]) 64 | # operation_obj = iaa.Sequential([iaa.Dropout(p=(0, 0.1), random_state=np.random.randint(0, 10000))]) 65 | # operation_obj = iaa.Sequential([iaa.AdditiveGaussianNoise(scale=np.random.random() * 30)]) 66 | # operation_obj = iaa.Affine(shear=(-10, 10)) 67 | # fliplr_rate = 0.5 68 | # angle = 10 69 | # additive, contrast_norm = (45, 0.1) 70 | # gaussian_noise, dropout = (0.05, 0.01) 71 | # shear, shift = (2, 20) 72 | # operation_obj = iaa.Sequential([ 73 | # iaa.Sometimes(0.5, iaa.OneOf([ 74 | # iaa.Affine(rotate=(-angle, angle)), 75 | # iaa.ContrastNormalization((1 - contrast_norm, 1 + contrast_norm)) 76 | # ])), 77 | # iaa.Sometimes(0.5, iaa.OneOf([ 78 | # iaa.Sequential([iaa.Flipud(0.5)]), 79 | # iaa.Dropout(dropout) 80 | # ])) 81 | # ]) 82 | data_agumentation(img, bboxes, operation_obj, save_flag=True, txts=gt_data[1]) 83 | 84 | 85 | keypoints_on_images = [] 86 | start = 0 87 | keypoints_imgaug_obj = [] 88 | for key_points in bboxes: 89 | for key_point in key_points: 90 | keypoints_imgaug_obj.append(ia.Keypoint(x=key_point[0], y=key_point[1])) 91 | keypoints_on_images.append(ia.KeypointsOnImage(keypoints_imgaug_obj, shape=img.shape)) 92 | # seq = iaa.Sequential([iaa.GaussianBlur((0, 3.0))]) 93 | # seq = iaa.Sequential([iaa.AdditiveGaussianNoise(scale=10)]) 94 | # seq = iaa.Sequential([iaa.Flipud(0.5)]) 95 | # seq = iaa.Sequential([iaa.Fliplr(0.5)]) 96 | # seq = iaa.Sequential([iaa.Dropout(p=(0, 0.2))]) 97 | # seq = iaa.Sequential([iaa.Affine(rotate=(-10, 10), shear=(-10, 10))]) 98 | seq = iaa.Affine(rotate=(-60, 60), shear=(-20, 20)) 99 | # seq = iaa.Sometimes( 100 | # 0.5, 101 | # iaa.GaussianBlur(sigma=2.0), 102 | # iaa.Sequential([iaa.Affine(rotate=45), iaa.Sharpen(alpha=1.0)]) 103 | # ) 104 | seq_det = seq.to_deterministic() -------------------------------------------------------------------------------- /lanms/include/pybind11/buffer_info.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/buffer_info.h: Python buffer object interface 3 | 4 | Copyright (c) 2016 Wenzel Jakob 5 | 6 | All rights reserved. Use of this source code is governed by a 7 | BSD-style license that can be found in the LICENSE file. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "common.h" 13 | 14 | NAMESPACE_BEGIN(pybind11) 15 | 16 | /// Information record describing a Python buffer object 17 | struct buffer_info { 18 | void *ptr = nullptr; // Pointer to the underlying storage 19 | ssize_t itemsize = 0; // Size of individual items in bytes 20 | ssize_t size = 0; // Total number of entries 21 | std::string format; // For homogeneous buffers, this should be set to format_descriptor::format() 22 | ssize_t ndim = 0; // Number of dimensions 23 | std::vector shape; // Shape of the tensor (1 entry per dimension) 24 | std::vector strides; // Number of entries between adjacent entries (for each per dimension) 25 | 26 | buffer_info() { } 27 | 28 | buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim, 29 | detail::any_container shape_in, detail::any_container strides_in) 30 | : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim), 31 | shape(std::move(shape_in)), strides(std::move(strides_in)) { 32 | if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size()) 33 | pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length"); 34 | for (size_t i = 0; i < (size_t) ndim; ++i) 35 | size *= shape[i]; 36 | } 37 | 38 | template 39 | buffer_info(T *ptr, detail::any_container shape_in, detail::any_container strides_in) 40 | : buffer_info(private_ctr_tag(), ptr, sizeof(T), format_descriptor::format(), static_cast(shape_in->size()), std::move(shape_in), std::move(strides_in)) { } 41 | 42 | buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t size) 43 | : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}) { } 44 | 45 | template 46 | buffer_info(T *ptr, ssize_t size) 47 | : buffer_info(ptr, sizeof(T), format_descriptor::format(), size) { } 48 | 49 | explicit buffer_info(Py_buffer *view, bool ownview = true) 50 | : buffer_info(view->buf, view->itemsize, view->format, view->ndim, 51 | {view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}) { 52 | this->view = view; 53 | this->ownview = ownview; 54 | } 55 | 56 | buffer_info(const buffer_info &) = delete; 57 | buffer_info& operator=(const buffer_info &) = delete; 58 | 59 | buffer_info(buffer_info &&other) { 60 | (*this) = std::move(other); 61 | } 62 | 63 | buffer_info& operator=(buffer_info &&rhs) { 64 | ptr = rhs.ptr; 65 | itemsize = rhs.itemsize; 66 | size = rhs.size; 67 | format = std::move(rhs.format); 68 | ndim = rhs.ndim; 69 | shape = std::move(rhs.shape); 70 | strides = std::move(rhs.strides); 71 | std::swap(view, rhs.view); 72 | std::swap(ownview, rhs.ownview); 73 | return *this; 74 | } 75 | 76 | ~buffer_info() { 77 | if (view && ownview) { PyBuffer_Release(view); delete view; } 78 | } 79 | 80 | private: 81 | struct private_ctr_tag { }; 82 | 83 | buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim, 84 | detail::any_container &&shape_in, detail::any_container &&strides_in) 85 | : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in)) { } 86 | 87 | Py_buffer *view = nullptr; 88 | bool ownview = false; 89 | }; 90 | 91 | NAMESPACE_BEGIN(detail) 92 | 93 | template struct compare_buffer_info { 94 | static bool compare(const buffer_info& b) { 95 | return b.format == format_descriptor::format() && b.itemsize == (ssize_t) sizeof(T); 96 | } 97 | }; 98 | 99 | template struct compare_buffer_info::value>> { 100 | static bool compare(const buffer_info& b) { 101 | return (size_t) b.itemsize == sizeof(T) && (b.format == format_descriptor::value || 102 | ((sizeof(T) == sizeof(long)) && b.format == (std::is_unsigned::value ? "L" : "l")) || 103 | ((sizeof(T) == sizeof(size_t)) && b.format == (std::is_unsigned::value ? "N" : "n"))); 104 | } 105 | }; 106 | 107 | NAMESPACE_END(detail) 108 | NAMESPACE_END(pybind11) 109 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/dcgan_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for dcgan.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from six.moves import xrange # pylint: disable=redefined-builtin 22 | import tensorflow as tf 23 | 24 | from nets import dcgan 25 | 26 | 27 | class DCGANTest(tf.test.TestCase): 28 | 29 | def test_generator_run(self): 30 | tf.set_random_seed(1234) 31 | noise = tf.random_normal([100, 64]) 32 | image, _ = dcgan.generator(noise) 33 | with self.test_session() as sess: 34 | sess.run(tf.global_variables_initializer()) 35 | image.eval() 36 | 37 | def test_generator_graph(self): 38 | tf.set_random_seed(1234) 39 | # Check graph construction for a number of image size/depths and batch 40 | # sizes. 41 | for i, batch_size in zip(xrange(3, 7), xrange(3, 8)): 42 | tf.reset_default_graph() 43 | final_size = 2 ** i 44 | noise = tf.random_normal([batch_size, 64]) 45 | image, end_points = dcgan.generator( 46 | noise, 47 | depth=32, 48 | final_size=final_size) 49 | 50 | self.assertAllEqual([batch_size, final_size, final_size, 3], 51 | image.shape.as_list()) 52 | 53 | expected_names = ['deconv%i' % j for j in xrange(1, i)] + ['logits'] 54 | self.assertSetEqual(set(expected_names), set(end_points.keys())) 55 | 56 | # Check layer depths. 57 | for j in range(1, i): 58 | layer = end_points['deconv%i' % j] 59 | self.assertEqual(32 * 2**(i-j-1), layer.get_shape().as_list()[-1]) 60 | 61 | def test_generator_invalid_input(self): 62 | wrong_dim_input = tf.zeros([5, 32, 32]) 63 | with self.assertRaises(ValueError): 64 | dcgan.generator(wrong_dim_input) 65 | 66 | correct_input = tf.zeros([3, 2]) 67 | with self.assertRaisesRegexp(ValueError, 'must be a power of 2'): 68 | dcgan.generator(correct_input, final_size=30) 69 | 70 | with self.assertRaisesRegexp(ValueError, 'must be greater than 8'): 71 | dcgan.generator(correct_input, final_size=4) 72 | 73 | def test_discriminator_run(self): 74 | image = tf.random_uniform([5, 32, 32, 3], -1, 1) 75 | output, _ = dcgan.discriminator(image) 76 | with self.test_session() as sess: 77 | sess.run(tf.global_variables_initializer()) 78 | output.eval() 79 | 80 | def test_discriminator_graph(self): 81 | # Check graph construction for a number of image size/depths and batch 82 | # sizes. 83 | for i, batch_size in zip(xrange(1, 6), xrange(3, 8)): 84 | tf.reset_default_graph() 85 | img_w = 2 ** i 86 | image = tf.random_uniform([batch_size, img_w, img_w, 3], -1, 1) 87 | output, end_points = dcgan.discriminator( 88 | image, 89 | depth=32) 90 | 91 | self.assertAllEqual([batch_size, 1], output.get_shape().as_list()) 92 | 93 | expected_names = ['conv%i' % j for j in xrange(1, i+1)] + ['logits'] 94 | self.assertSetEqual(set(expected_names), set(end_points.keys())) 95 | 96 | # Check layer depths. 97 | for j in range(1, i+1): 98 | layer = end_points['conv%i' % j] 99 | self.assertEqual(32 * 2**(j-1), layer.get_shape().as_list()[-1]) 100 | 101 | def test_discriminator_invalid_input(self): 102 | wrong_dim_img = tf.zeros([5, 32, 32]) 103 | with self.assertRaises(ValueError): 104 | dcgan.discriminator(wrong_dim_img) 105 | 106 | spatially_undefined_shape = tf.placeholder(tf.float32, [5, 32, None, 3]) 107 | with self.assertRaises(ValueError): 108 | dcgan.discriminator(spatially_undefined_shape) 109 | 110 | not_square = tf.zeros([5, 32, 16, 3]) 111 | with self.assertRaisesRegexp(ValueError, 'not have equal width and height'): 112 | dcgan.discriminator(not_square) 113 | 114 | not_power_2 = tf.zeros([5, 30, 30, 3]) 115 | with self.assertRaisesRegexp(ValueError, 'not a power of 2'): 116 | dcgan.discriminator(not_power_2) 117 | 118 | 119 | if __name__ == '__main__': 120 | tf.test.main() 121 | -------------------------------------------------------------------------------- /data_util.py: -------------------------------------------------------------------------------- 1 | ''' 2 | this file is modified from keras implemention of data process multi-threading, 3 | see https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py 4 | ''' 5 | import time 6 | import numpy as np 7 | import threading 8 | import multiprocessing 9 | try: 10 | import queue 11 | except ImportError: 12 | import Queue as queue 13 | 14 | 15 | class GeneratorEnqueuer(): 16 | """Builds a queue out of a data generator. 17 | 18 | Used in `fit_generator`, `evaluate_generator`, `predict_generator`. 19 | 20 | # Arguments 21 | generator: a generator function which endlessly yields data 22 | use_multiprocessing: use multiprocessing if True, otherwise threading 23 | wait_time: time to sleep in-between calls to `put()` 24 | random_seed: Initial seed for workers, 25 | will be incremented by one for each workers. 26 | """ 27 | 28 | def __init__(self, generator, 29 | use_multiprocessing=False, 30 | wait_time=0.05, 31 | random_seed=None): 32 | self.wait_time = wait_time 33 | self._generator = generator 34 | self._use_multiprocessing = use_multiprocessing 35 | self._threads = [] 36 | self._stop_event = None 37 | self.queue = None 38 | self.random_seed = random_seed 39 | 40 | def start(self, workers=1, max_queue_size=10): 41 | """Kicks off threads which add data from the generator into the queue. 42 | 43 | # Arguments 44 | workers: number of worker threads 45 | max_queue_size: queue size 46 | (when full, threads could block on `put()`) 47 | """ 48 | 49 | def data_generator_task(): 50 | while not self._stop_event.is_set(): 51 | try: 52 | if self._use_multiprocessing or self.queue.qsize() < max_queue_size: 53 | generator_output = next(self._generator) 54 | self.queue.put(generator_output) 55 | else: 56 | time.sleep(self.wait_time) 57 | except Exception: 58 | self._stop_event.set() 59 | raise 60 | 61 | try: 62 | if self._use_multiprocessing: 63 | self.queue = multiprocessing.Queue(maxsize=max_queue_size) 64 | self._stop_event = multiprocessing.Event() 65 | else: 66 | self.queue = queue.Queue() 67 | self._stop_event = threading.Event() 68 | 69 | for _ in range(workers): 70 | if self._use_multiprocessing: 71 | # Reset random seed else all children processes 72 | # share the same seed 73 | np.random.seed(self.random_seed) 74 | thread = multiprocessing.Process(target=data_generator_task) 75 | thread.daemon = True 76 | if self.random_seed is not None: 77 | self.random_seed += 1 78 | else: 79 | thread = threading.Thread(target=data_generator_task) 80 | self._threads.append(thread) 81 | thread.start() 82 | except: 83 | self.stop() 84 | raise 85 | 86 | def is_running(self): 87 | return self._stop_event is not None and not self._stop_event.is_set() 88 | 89 | def stop(self, timeout=None): 90 | """Stops running threads and wait for them to exit, if necessary. 91 | 92 | Should be called by the same thread which called `start()`. 93 | 94 | # Arguments 95 | timeout: maximum time to wait on `thread.join()`. 96 | """ 97 | if self.is_running(): 98 | self._stop_event.set() 99 | 100 | for thread in self._threads: 101 | if thread.is_alive(): 102 | if self._use_multiprocessing: 103 | thread.terminate() 104 | else: 105 | thread.join(timeout) 106 | 107 | if self._use_multiprocessing: 108 | if self.queue is not None: 109 | self.queue.close() 110 | 111 | self._threads = [] 112 | self._stop_event = None 113 | self.queue = None 114 | 115 | def get(self): 116 | """Creates a generator to extract data from the queue. 117 | 118 | Skip the data if it is `None`. 119 | 120 | # Returns 121 | A generator 122 | """ 123 | while self.is_running(): 124 | if not self.queue.empty(): 125 | inputs = self.queue.get() 126 | if inputs is not None: 127 | yield inputs 128 | else: 129 | time.sleep(self.wait_time) -------------------------------------------------------------------------------- /lanms/.ycm_extra_conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (C) 2014 Google Inc. 4 | # 5 | # This file is part of YouCompleteMe. 6 | # 7 | # YouCompleteMe is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # YouCompleteMe is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with YouCompleteMe. If not, see . 19 | 20 | import os 21 | import sys 22 | import glob 23 | import ycm_core 24 | 25 | # These are the compilation flags that will be used in case there's no 26 | # compilation database set (by default, one is not set). 27 | # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR. 28 | sys.path.append(os.path.dirname(__file__)) 29 | 30 | 31 | BASE_DIR = os.path.dirname(os.path.realpath(__file__)) 32 | 33 | from plumbum.cmd import python_config 34 | 35 | 36 | flags = [ 37 | '-Wall', 38 | '-Wextra', 39 | '-Wnon-virtual-dtor', 40 | '-Winvalid-pch', 41 | '-Wno-unused-local-typedefs', 42 | '-std=c++11', 43 | '-x', 'c++', 44 | '-Iinclude', 45 | ] + python_config('--cflags').split() 46 | 47 | 48 | # Set this to the absolute path to the folder (NOT the file!) containing the 49 | # compile_commands.json file to use that instead of 'flags'. See here for 50 | # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html 51 | # 52 | # Most projects will NOT need to set this to anything; you can just change the 53 | # 'flags' list of compilation flags. 54 | compilation_database_folder = '' 55 | 56 | if os.path.exists( compilation_database_folder ): 57 | database = ycm_core.CompilationDatabase( compilation_database_folder ) 58 | else: 59 | database = None 60 | 61 | SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ] 62 | 63 | def DirectoryOfThisScript(): 64 | return os.path.dirname( os.path.abspath( __file__ ) ) 65 | 66 | 67 | def MakeRelativePathsInFlagsAbsolute( flags, working_directory ): 68 | if not working_directory: 69 | return list( flags ) 70 | new_flags = [] 71 | make_next_absolute = False 72 | path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ] 73 | for flag in flags: 74 | new_flag = flag 75 | 76 | if make_next_absolute: 77 | make_next_absolute = False 78 | if not flag.startswith( '/' ): 79 | new_flag = os.path.join( working_directory, flag ) 80 | 81 | for path_flag in path_flags: 82 | if flag == path_flag: 83 | make_next_absolute = True 84 | break 85 | 86 | if flag.startswith( path_flag ): 87 | path = flag[ len( path_flag ): ] 88 | new_flag = path_flag + os.path.join( working_directory, path ) 89 | break 90 | 91 | if new_flag: 92 | new_flags.append( new_flag ) 93 | return new_flags 94 | 95 | 96 | def IsHeaderFile( filename ): 97 | extension = os.path.splitext( filename )[ 1 ] 98 | return extension in [ '.h', '.hxx', '.hpp', '.hh' ] 99 | 100 | 101 | def GetCompilationInfoForFile( filename ): 102 | # The compilation_commands.json file generated by CMake does not have entries 103 | # for header files. So we do our best by asking the db for flags for a 104 | # corresponding source file, if any. If one exists, the flags for that file 105 | # should be good enough. 106 | if IsHeaderFile( filename ): 107 | basename = os.path.splitext( filename )[ 0 ] 108 | for extension in SOURCE_EXTENSIONS: 109 | replacement_file = basename + extension 110 | if os.path.exists( replacement_file ): 111 | compilation_info = database.GetCompilationInfoForFile( 112 | replacement_file ) 113 | if compilation_info.compiler_flags_: 114 | return compilation_info 115 | return None 116 | return database.GetCompilationInfoForFile( filename ) 117 | 118 | 119 | # This is the entry point; this function is called by ycmd to produce flags for 120 | # a file. 121 | def FlagsForFile( filename, **kwargs ): 122 | if database: 123 | # Bear in mind that compilation_info.compiler_flags_ does NOT return a 124 | # python list, but a "list-like" StringVec object 125 | compilation_info = GetCompilationInfoForFile( filename ) 126 | if not compilation_info: 127 | return None 128 | 129 | final_flags = MakeRelativePathsInFlagsAbsolute( 130 | compilation_info.compiler_flags_, 131 | compilation_info.compiler_working_dir_ ) 132 | else: 133 | relative_to = DirectoryOfThisScript() 134 | final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to ) 135 | 136 | return { 137 | 'flags': final_flags, 138 | 'do_cache': True 139 | } 140 | 141 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/cifarnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the CIFAR-10 model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev) 26 | 27 | 28 | def cifarnet(images, num_classes=10, is_training=False, 29 | dropout_keep_prob=0.5, 30 | prediction_fn=slim.softmax, 31 | scope='CifarNet'): 32 | """Creates a variant of the CifarNet model. 33 | 34 | Note that since the output is a set of 'logits', the values fall in the 35 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 36 | probability distribution over the characters, one will need to convert them 37 | using the softmax function: 38 | 39 | logits = cifarnet.cifarnet(images, is_training=False) 40 | probabilities = tf.nn.softmax(logits) 41 | predictions = tf.argmax(logits, 1) 42 | 43 | Args: 44 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 45 | num_classes: the number of classes in the dataset. If 0 or None, the logits 46 | layer is omitted and the input features to the logits layer are returned 47 | instead. 48 | is_training: specifies whether or not we're currently training the model. 49 | This variable will determine the behaviour of the dropout layer. 50 | dropout_keep_prob: the percentage of activation values that are retained. 51 | prediction_fn: a function to get predictions out of logits. 52 | scope: Optional variable_scope. 53 | 54 | Returns: 55 | net: a 2D Tensor with the logits (pre-softmax activations) if num_classes 56 | is a non-zero integer, or the input to the logits layer if num_classes 57 | is 0 or None. 58 | end_points: a dictionary from components of the network to the corresponding 59 | activation. 60 | """ 61 | end_points = {} 62 | 63 | with tf.variable_scope(scope, 'CifarNet', [images]): 64 | net = slim.conv2d(images, 64, [5, 5], scope='conv1') 65 | end_points['conv1'] = net 66 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 67 | end_points['pool1'] = net 68 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1') 69 | net = slim.conv2d(net, 64, [5, 5], scope='conv2') 70 | end_points['conv2'] = net 71 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2') 72 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 73 | end_points['pool2'] = net 74 | net = slim.flatten(net) 75 | end_points['Flatten'] = net 76 | net = slim.fully_connected(net, 384, scope='fc3') 77 | end_points['fc3'] = net 78 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 79 | scope='dropout3') 80 | net = slim.fully_connected(net, 192, scope='fc4') 81 | end_points['fc4'] = net 82 | if not num_classes: 83 | return net, end_points 84 | logits = slim.fully_connected(net, num_classes, 85 | biases_initializer=tf.zeros_initializer(), 86 | weights_initializer=trunc_normal(1/192.0), 87 | weights_regularizer=None, 88 | activation_fn=None, 89 | scope='logits') 90 | 91 | end_points['Logits'] = logits 92 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 93 | 94 | return logits, end_points 95 | cifarnet.default_image_size = 32 96 | 97 | 98 | def cifarnet_arg_scope(weight_decay=0.004): 99 | """Defines the default cifarnet argument scope. 100 | 101 | Args: 102 | weight_decay: The weight decay to use for regularizing the model. 103 | 104 | Returns: 105 | An `arg_scope` to use for the inception v3 model. 106 | """ 107 | with slim.arg_scope( 108 | [slim.conv2d], 109 | weights_initializer=tf.truncated_normal_initializer(stddev=5e-2), 110 | activation_fn=tf.nn.relu): 111 | with slim.arg_scope( 112 | [slim.fully_connected], 113 | biases_initializer=tf.constant_initializer(0.1), 114 | weights_initializer=trunc_normal(0.04), 115 | weights_regularizer=slim.l2_regularizer(weight_decay), 116 | activation_fn=tf.nn.relu) as sc: 117 | return sc 118 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/mobilenet_v1_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Validate mobilenet_v1 with options for quantization.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import math 22 | import tensorflow as tf 23 | 24 | from datasets import dataset_factory 25 | from nets import mobilenet_v1 26 | from preprocessing import preprocessing_factory 27 | 28 | slim = tf.contrib.slim 29 | 30 | flags = tf.app.flags 31 | 32 | flags.DEFINE_string('master', '', 'Session master') 33 | flags.DEFINE_integer('batch_size', 250, 'Batch size') 34 | flags.DEFINE_integer('num_classes', 1001, 'Number of classes to distinguish') 35 | flags.DEFINE_integer('num_examples', 50000, 'Number of examples to evaluate') 36 | flags.DEFINE_integer('image_size', 224, 'Input image resolution') 37 | flags.DEFINE_float('depth_multiplier', 1.0, 'Depth multiplier for mobilenet') 38 | flags.DEFINE_bool('quantize', False, 'Quantize training') 39 | flags.DEFINE_string('checkpoint_dir', '', 'The directory for checkpoints') 40 | flags.DEFINE_string('eval_dir', '', 'Directory for writing eval event logs') 41 | flags.DEFINE_string('dataset_dir', '', 'Location of dataset') 42 | 43 | FLAGS = flags.FLAGS 44 | 45 | 46 | def imagenet_input(is_training): 47 | """Data reader for imagenet. 48 | 49 | Reads in imagenet data and performs pre-processing on the images. 50 | 51 | Args: 52 | is_training: bool specifying if train or validation dataset is needed. 53 | Returns: 54 | A batch of images and labels. 55 | """ 56 | if is_training: 57 | dataset = dataset_factory.get_dataset('imagenet', 'train', 58 | FLAGS.dataset_dir) 59 | else: 60 | dataset = dataset_factory.get_dataset('imagenet', 'validation', 61 | FLAGS.dataset_dir) 62 | 63 | provider = slim.dataset_data_provider.DatasetDataProvider( 64 | dataset, 65 | shuffle=is_training, 66 | common_queue_capacity=2 * FLAGS.batch_size, 67 | common_queue_min=FLAGS.batch_size) 68 | [image, label] = provider.get(['image', 'label']) 69 | 70 | image_preprocessing_fn = preprocessing_factory.get_preprocessing( 71 | 'mobilenet_v1', is_training=is_training) 72 | 73 | image = image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size) 74 | 75 | images, labels = tf.train.batch( 76 | tensors=[image, label], 77 | batch_size=FLAGS.batch_size, 78 | num_threads=4, 79 | capacity=5 * FLAGS.batch_size) 80 | return images, labels 81 | 82 | 83 | def metrics(logits, labels): 84 | """Specify the metrics for eval. 85 | 86 | Args: 87 | logits: Logits output from the graph. 88 | labels: Ground truth labels for inputs. 89 | 90 | Returns: 91 | Eval Op for the graph. 92 | """ 93 | labels = tf.squeeze(labels) 94 | names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 95 | 'Accuracy': tf.metrics.accuracy(tf.argmax(logits, 1), labels), 96 | 'Recall_5': tf.metrics.recall_at_k(labels, logits, 5), 97 | }) 98 | for name, value in names_to_values.iteritems(): 99 | slim.summaries.add_scalar_summary( 100 | value, name, prefix='eval', print_summary=True) 101 | return names_to_updates.values() 102 | 103 | 104 | def build_model(): 105 | """Build the mobilenet_v1 model for evaluation. 106 | 107 | Returns: 108 | g: graph with rewrites after insertion of quantization ops and batch norm 109 | folding. 110 | eval_ops: eval ops for inference. 111 | variables_to_restore: List of variables to restore from checkpoint. 112 | """ 113 | g = tf.Graph() 114 | with g.as_default(): 115 | inputs, labels = imagenet_input(is_training=False) 116 | 117 | scope = mobilenet_v1.mobilenet_v1_arg_scope( 118 | is_training=False, weight_decay=0.0) 119 | with slim.arg_scope(scope): 120 | logits, _ = mobilenet_v1.mobilenet_v1( 121 | inputs, 122 | is_training=False, 123 | depth_multiplier=FLAGS.depth_multiplier, 124 | num_classes=FLAGS.num_classes) 125 | 126 | if FLAGS.quantize: 127 | tf.contrib.quantize.create_eval_graph() 128 | 129 | eval_ops = metrics(logits, labels) 130 | 131 | return g, eval_ops 132 | 133 | 134 | def eval_model(): 135 | """Evaluates mobilenet_v1.""" 136 | g, eval_ops = build_model() 137 | with g.as_default(): 138 | num_batches = math.ceil(FLAGS.num_examples / float(FLAGS.batch_size)) 139 | slim.evaluation.evaluate_once( 140 | FLAGS.master, 141 | FLAGS.checkpoint_dir, 142 | logdir=FLAGS.eval_dir, 143 | num_evals=num_batches, 144 | eval_op=eval_ops) 145 | 146 | 147 | def main(unused_arg): 148 | eval_model() 149 | 150 | 151 | if __name__ == '__main__': 152 | tf.app.run(main) 153 | -------------------------------------------------------------------------------- /augmentation/test.py: -------------------------------------------------------------------------------- 1 | import math 2 | import cv2 3 | import numpy as np 4 | 5 | 6 | def rotate_image(image, angle): 7 | """ 8 | Rotates an OpenCV 2 / NumPy image about it's centre by the given angle 9 | (in degrees). The returned image will be large enough to hold the entire 10 | new image, with a black background 11 | """ 12 | 13 | # Get the image size 14 | # No that's not an error - NumPy stores image matricies backwards 15 | image_size = (image.shape[1], image.shape[0]) 16 | image_center = tuple(np.array(image_size) / 2) 17 | 18 | # Convert the OpenCV 3x2 rotation matrix to 3x3 19 | rot_mat = np.vstack( 20 | [cv2.getRotationMatrix2D(image_center, angle, 1.0), [0, 0, 1]] 21 | ) 22 | 23 | rot_mat_notranslate = np.matrix(rot_mat[0:2, 0:2]) 24 | 25 | # Shorthand for below calcs 26 | image_w2 = image_size[0] * 0.5 27 | image_h2 = image_size[1] * 0.5 28 | 29 | # Obtain the rotated coordinates of the image corners 30 | rotated_coords = [ 31 | (np.array([-image_w2, image_h2]) * rot_mat_notranslate).A[0], 32 | (np.array([ image_w2, image_h2]) * rot_mat_notranslate).A[0], 33 | (np.array([-image_w2, -image_h2]) * rot_mat_notranslate).A[0], 34 | (np.array([ image_w2, -image_h2]) * rot_mat_notranslate).A[0] 35 | ] 36 | 37 | # Find the size of the new image 38 | x_coords = [pt[0] for pt in rotated_coords] 39 | x_pos = [x for x in x_coords if x > 0] 40 | x_neg = [x for x in x_coords if x < 0] 41 | 42 | y_coords = [pt[1] for pt in rotated_coords] 43 | y_pos = [y for y in y_coords if y > 0] 44 | y_neg = [y for y in y_coords if y < 0] 45 | 46 | right_bound = max(x_pos) 47 | left_bound = min(x_neg) 48 | top_bound = max(y_pos) 49 | bot_bound = min(y_neg) 50 | 51 | new_w = int(abs(right_bound - left_bound)) 52 | new_h = int(abs(top_bound - bot_bound)) 53 | 54 | # We require a translation matrix to keep the image centred 55 | trans_mat = np.matrix([ 56 | [1, 0, int(new_w * 0.5 - image_w2)], 57 | [0, 1, int(new_h * 0.5 - image_h2)], 58 | [0, 0, 1] 59 | ]) 60 | 61 | # Compute the tranform for the combined rotation and translation 62 | affine_mat = (np.matrix(trans_mat) * np.matrix(rot_mat))[0:2, :] 63 | 64 | # Apply the transform 65 | result = cv2.warpAffine( 66 | image, 67 | affine_mat, 68 | (new_w, new_h), 69 | flags=cv2.INTER_LINEAR 70 | ) 71 | 72 | return result 73 | 74 | 75 | def largest_rotated_rect(w, h, angle): 76 | """ 77 | Given a rectangle of size wxh that has been rotated by 'angle' (in 78 | radians), computes the width and height of the largest possible 79 | axis-aligned rectangle within the rotated rectangle. 80 | 81 | Original JS code by 'Andri' and Magnus Hoff from Stack Overflow 82 | 83 | Converted to Python by Aaron Snoswell 84 | """ 85 | 86 | quadrant = int(math.floor(angle / (math.pi / 2))) & 3 87 | sign_alpha = angle if ((quadrant & 1) == 0) else math.pi - angle 88 | alpha = (sign_alpha % math.pi + math.pi) % math.pi 89 | 90 | bb_w = w * math.cos(alpha) + h * math.sin(alpha) 91 | bb_h = w * math.sin(alpha) + h * math.cos(alpha) 92 | 93 | gamma = math.atan2(bb_w, bb_w) if (w < h) else math.atan2(bb_w, bb_w) 94 | 95 | delta = math.pi - alpha - gamma 96 | 97 | length = h if (w < h) else w 98 | 99 | d = length * math.cos(alpha) 100 | a = d * math.sin(alpha) / math.sin(delta) 101 | 102 | y = a * math.cos(gamma) 103 | x = y * math.tan(gamma) 104 | 105 | return ( 106 | bb_w - 2 * x, 107 | bb_h - 2 * y 108 | ) 109 | 110 | 111 | def crop_around_center(image, width, height): 112 | """ 113 | Given a NumPy / OpenCV 2 image, crops it to the given width and height, 114 | around it's centre point 115 | """ 116 | 117 | image_size = (image.shape[1], image.shape[0]) 118 | image_center = (int(image_size[0] * 0.5), int(image_size[1] * 0.5)) 119 | 120 | if(width > image_size[0]): 121 | width = image_size[0] 122 | 123 | if(height > image_size[1]): 124 | height = image_size[1] 125 | 126 | x1 = int(image_center[0] - width * 0.5) 127 | x2 = int(image_center[0] + width * 0.5) 128 | y1 = int(image_center[1] - height * 0.5) 129 | y2 = int(image_center[1] + height * 0.5) 130 | 131 | return image[y1:y2, x1:x2] 132 | 133 | 134 | def demo(): 135 | """ 136 | Demos the largest_rotated_rect function 137 | """ 138 | 139 | image = cv2.imread("/home/give/Game/OCR/data/ICPR/rename/100/image_100/10.png") 140 | print np.shape(image) 141 | image_height, image_width = image.shape[0:2] 142 | 143 | cv2.imshow("Original Image", image) 144 | 145 | print "Press [enter] to begin the demo" 146 | print "Press [q] or Escape to quit" 147 | 148 | key = cv2.waitKey(0) 149 | if key == ord("q") or key == 27: 150 | exit() 151 | 152 | for i in np.arange(0, 360, 0.5): 153 | image_orig = np.copy(image) 154 | image_rotated = rotate_image(image, i) 155 | image_rotated_cropped = crop_around_center( 156 | image_rotated, 157 | *largest_rotated_rect( 158 | image_width, 159 | image_height, 160 | math.radians(i) 161 | ) 162 | ) 163 | 164 | key = cv2.waitKey(2) 165 | if(key == ord("q") or key == 27): 166 | exit() 167 | 168 | cv2.imshow("Original Image", image_orig) 169 | cv2.imshow("Rotated Image", image_rotated) 170 | cv2.imshow("Cropped Image", image_rotated_cropped) 171 | 172 | print "Done" 173 | 174 | 175 | if __name__ == "__main__": 176 | demo() -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | - ICPR2018举办的文本检测比赛,详细链接[请戳](https://tianchi.aliyun.com/competition/introduction.htm?spm=5176.100066.0.0.6acdd780TvrRix&raceId=231651). 2 | - 任务描述:从一副图像中检测出文本所在的位置. 3 | - 先后尝试了三种做法: 4 | - 基于Faster RCNN的CTPN方法,[代码链接](https://github.com/eragonruan/text-detection-ctpn). 直接加载训练好的模型在数据集上测试F1 score只是0.10数量级. 关于CTPN的详情请看我的这篇博文---[\[论文阅读\]---CTPN](https://blog.csdn.net/liangdong2014/article/details/79690118) 5 | - 基于U-Net的EAST, [代码链接](https://github.com/argman/EAST). 直接加载训练好的模型在数据集上测试F1 score是0.20数量级. 关于East的详情请看我的这篇博文---[\[论文阅读\]---EAST](https://blog.csdn.net/liangdong2014/article/details/79857061) 6 | - 基于U-Net的PixelLink的方法,这个没有开源的代码,论文[请戳](https://arxiv.org/pdf/1801.01315.pdf). 该文章也是基于U-Net的方法,它认为传统的基于Faster RCNN的方法需要设置proposal的大小,对尺度不具有任意性.而基于U-Net的EAST,它计算几何的loss也和CTPN一样,是对localization的一个regression处理.作者认为,我们可以直接从text/ non text的prediction中得到text 的bounding box,所以作者认为没有必要计算这个regression.他们直接从score prediction中通过opencv 的min_areaRect方法计算得到bounding box. 7 | - 选择: 8 | - 我最后选择的第二种,基于U-Net的EAST, 首先我认为基于U-Net的方法可以理论上实现对detection object的尺度任意性. 而PixelLink方法他对处理有大面积overlap的处理方法并不太合适(也许是我理解的有问题),详情参加下面 9 | - PixelLink是怎么处理有Overlap现象的呢?正如论文中提到的,PixelLink方法中有两个ground truth, 一个是label map(1通道),代表每个Pixel是否是text, 一个是link map(8 通道),它代表的是每个pixel的8领域所对应的元素是否和自己在同一个text instance中. 如果在则为1,否则为0. 如果有overlap的画,作者将其处理overlap的score map和link map都置为0,这在一些图片上是没有问题的,例如图1,但是在图2中就会存在问题,会将两个有overlap的bounding box划分成四个bounding box. 这个问题对ICPR的数据集来说影响还是很大的. 10 | ![image](http://ocnsbpp0d.bkt.clouddn.com/PixelLink.png) 11 | 12 | ![image](http://ocnsbpp0d.bkt.clouddn.com/14.png) 13 | - 尝试改进: 14 | - 先后尝试了8个版本 15 | - east_icdar2015_resnet_v1_50_rbox: 首先我们发现将EAST直接反卷积到原图尺寸效果会比较好(原版本是反卷积到原图尺寸的1/4或者是1/2).可能是因为我们的数据集中小的text instance比较少的原因吧 16 | - east_icdar2015_resnet_v1_50_rbox_v1: 在原来的版本中,在处理反卷积这一块作者只使用了resize, 可能是为了避免棋盘效应的, 我是在resize后面增加了一层卷积.保存的是在上面的基础上使用(conv+resize)代替(resize)的版本 17 | - east_icdar2015_resnet_v1_50_rbox_v2: 保存的是在上面的基础上使用OHEM的版本(只对geometry使用OHEM,对score map不使用). 在PixelLink论文中看到他使用OHEM方法来选择hard negative pixel,以避免正负text pixel的个数不平衡的问题. 18 | - east_icdar2015_resnet_v1_50_rbox_v3: 在V2的基础上改用Inception-ResNet model. 将原来的ResNet50 改造成Inception_ResNet model 19 | - east_icdar2015_resnet_v1_50_rbox_v4: 在V3的基础上加上instance-balanced cross entropy loss的结果. 这也是在PixelLink中使用的方法,主要是为了避免不同size的text instance对loss造成的影响不同,大的造成的影响大. 其实后来我也发现了,EAST model is not effective with longer text instance than shorter text instance 20 | - east_icdar2015_resnet_v1_50_rbox_v5: 在V4的基础上使用BLSTM提取全局的特征. 这里是想结合一下CTPN,因为CTPN中使用BLSTM去提取了global的Feature. 出发点是想让每个pixel的感受野更宽广,使得对大的pixel预测的更准. 在这里的做法是对每一层即将进行反卷积的Feature map使用LSTM去提取特征. 21 | - east_icdar2015_resnet_v1_50_rbox_v6: 在V4的基础上又增加了一个优化的branch---IoU Loss, 主要是因为我们在测试阶段只使用score来作为bounding box的得分是有点不公平的. 这里的出发点是因为EAST模型中,在test 阶段,我们是使用该pixel的score得分来作为整个bounding box的得分的,这其实是不公平的,只用一个点代表整个bounding box可能存在一定的偶然性. 为了解决该问题,我们想出了两种解决方案: 22 | - 在测试阶段,使用bounding box内部score的均值作为该bounding box的得分,实验结果表明有改善,但是改善幅度不大. 反而会大幅度增加测试阶段的耗时. 23 | - 正如我们前面所说,我们再EAST的基础上,再增加一个branch,计算每个bounding box的IoU 值,然后与预测得到的IoU计算一个Smooth L1 loss,该方法的问题是在训练阶段耗时会超大大幅度增加,因为假设我们图片的大小是512*512,那么针对一幅图像我们每次都要根据geometry prediction和score prediction计算512*512个IoU,这还是在CPU上计算的(gpu不会...),所以会很慢,由于时间关系没有跑下去. 24 | - east_icdar2015_resnet_v1_50_rbox_v7: 在V4的基础上使用了instance-banlanced的weights. 25 | - 数据增广 26 | - rotate 27 | - Flipud 水平镜像 28 | - Fliplr 垂直镜像 29 | - 随机Dropout 30 | - 随机增加噪声 31 | - 效果 32 | - 最开始的版本 33 | - Evulation by owner method(153578 step) 34 | - Precision is 0.5885 35 | - Recall is 0.4008 36 | - F1 score is 0.4769 37 | - Evulation by ICDAR method (153578 step) 38 | - "precision": 0.7764084507042254 39 | - "recall": 0.43192948090107736 40 | - "hmean": 0.5550660792951542 41 | - 添加反卷积,使其反卷积到原图尺寸 42 | - Evulation by owner method(408899 step) 43 | - Precision is 0.5705 44 | - Recall is 0.4433 45 | - F1 score is 0.4989 46 | - Evulation by ICDAR method (408899 step) 47 | - "recall": 0.5039177277179236 48 | - "precision": 0.7516435354273192 49 | - "hmean": 0.6033421284080915 50 | - 添加反卷积,使其反卷积到原图尺寸+conv代替unpool+score map加入geometry中去 51 | - Evulation by owner method(110930 step) 52 | - Precision is 0.5343 53 | - Recall is 0.4330 54 | - F1 score is 0.4784 55 | - 添加反卷积,使其反卷积到原图尺寸+conv代替unpool+score map加入geometry中去 + OHEM for geometry 56 | - Evulation by owner method(148415 step) 57 | - Precision is 0.5461 58 | - Recall is 0.4589 59 | - F1 score is 0.4987 60 | - Evulation by ICDAR method(148415 step) 61 | - "recall": 0.5269343780607247 62 | - "precision": 0.727027027027027 63 | - "hmean": 0.6110164679159569 64 | - Inceptio-ResNet Version(Modify V3) 65 | - Evulation by owner method(257009 step) 66 | - Precision is 0.5315 67 | - Recall is 0.4306 68 | - F1 score is 0.4758 69 | - Evulation by ICDAR method (257009 step ) 70 | - "recall": 0.5259549461312438 71 | - "precision": 0.7351129363449692\ 72 | - "hmean": 0.6131886954039394 73 | - Inceptio-ResNet Version + balanced cross_entropy loss + without OHEM(Modify V4) 74 | - Evulation by owner method(423777 step) 75 | - Precision is 0.5633 76 | - Recall is 0.4601 77 | - F1 score is 0.5065 78 | - 1000 vresion: 0.51 79 | - Evulation by ICDAR method (423777 step ) 80 | - "recall": 0.539177277179236 81 | - "precision": 0.7582644628099173 82 | - "hmean": 0.6302232398397253 83 | - Future 84 | - 其实个人感觉基于U-Net的方法应该是未来的主流,主要原因: 85 | - 代码简洁易懂,解决同一个问题,如果能取得相同的效果,人们肯定喜欢用简单的方法 86 | - U-Net对object的尺度具有一定的任意性. 87 | - 还有什么可以改进的? 88 | - EAST还是对长的text instance识别的不准确,仅凭我上述说的方法并不能解决该问题. 89 | - 还有一个就是在进行nms时候,pixel的score不能很公平的代表整个bounding box的得分. 90 | - 整体的代码:[UpCoder-EAST](https://github.com/UpCoder/ICPR_TextDection),主要还是根据原始版本的EAST方法改的,原始的EAST版本:[EAST](https://github.com/argman/EAST) 91 | -------------------------------------------------------------------------------- /nets/NASNet/pnasnet_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.pnasnet.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from nets.NASNet import pnasnet 23 | 24 | 25 | 26 | slim = tf.contrib.slim 27 | 28 | 29 | class PNASNetTest(tf.test.TestCase): 30 | 31 | def testBuildLogitsLargeModel(self): 32 | batch_size = 5 33 | height, width = 331, 331 34 | num_classes = 1000 35 | inputs = tf.random_uniform((batch_size, height, width, 3)) 36 | tf.train.create_global_step() 37 | with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()): 38 | logits, end_points = pnasnet.build_pnasnet_large(inputs, num_classes) 39 | auxlogits = end_points['AuxLogits'] 40 | predictions = end_points['Predictions'] 41 | self.assertListEqual(auxlogits.get_shape().as_list(), 42 | [batch_size, num_classes]) 43 | self.assertListEqual(logits.get_shape().as_list(), 44 | [batch_size, num_classes]) 45 | self.assertListEqual(predictions.get_shape().as_list(), 46 | [batch_size, num_classes]) 47 | 48 | def testBuildPreLogitsLargeModel(self): 49 | batch_size = 5 50 | height, width = 331, 331 51 | num_classes = None 52 | inputs = tf.random_uniform((batch_size, height, width, 3)) 53 | tf.train.create_global_step() 54 | with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()): 55 | net, end_points = pnasnet.build_pnasnet_large(inputs, num_classes) 56 | self.assertFalse('AuxLogits' in end_points) 57 | self.assertFalse('Predictions' in end_points) 58 | self.assertTrue(net.op.name.startswith('final_layer/Mean')) 59 | self.assertListEqual(net.get_shape().as_list(), [batch_size, 4320]) 60 | 61 | def testAllEndPointsShapesLargeModel(self): 62 | batch_size = 5 63 | height, width = 331, 331 64 | num_classes = 1000 65 | inputs = tf.random_uniform((batch_size, height, width, 3)) 66 | tf.train.create_global_step() 67 | with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()): 68 | _, end_points = pnasnet.build_pnasnet_large(inputs, num_classes) 69 | 70 | endpoints_shapes = {'Stem': [batch_size, 42, 42, 540], 71 | 'Cell_0': [batch_size, 42, 42, 1080], 72 | 'Cell_1': [batch_size, 42, 42, 1080], 73 | 'Cell_2': [batch_size, 42, 42, 1080], 74 | 'Cell_3': [batch_size, 42, 42, 1080], 75 | 'Cell_4': [batch_size, 21, 21, 2160], 76 | 'Cell_5': [batch_size, 21, 21, 2160], 77 | 'Cell_6': [batch_size, 21, 21, 2160], 78 | 'Cell_7': [batch_size, 21, 21, 2160], 79 | 'Cell_8': [batch_size, 11, 11, 4320], 80 | 'Cell_9': [batch_size, 11, 11, 4320], 81 | 'Cell_10': [batch_size, 11, 11, 4320], 82 | 'Cell_11': [batch_size, 11, 11, 4320], 83 | 'global_pool': [batch_size, 4320], 84 | # Logits and predictions 85 | 'AuxLogits': [batch_size, 1000], 86 | 'Predictions': [batch_size, 1000], 87 | 'Logits': [batch_size, 1000], 88 | } 89 | self.assertEqual(len(end_points), 17) 90 | self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) 91 | for endpoint_name in endpoints_shapes: 92 | tf.logging.info('Endpoint name: {}'.format(endpoint_name)) 93 | expected_shape = endpoints_shapes[endpoint_name] 94 | self.assertIn(endpoint_name, end_points) 95 | self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), 96 | expected_shape) 97 | 98 | def testNoAuxHeadLargeModel(self): 99 | batch_size = 5 100 | height, width = 331, 331 101 | num_classes = 1000 102 | for use_aux_head in (True, False): 103 | tf.reset_default_graph() 104 | inputs = tf.random_uniform((batch_size, height, width, 3)) 105 | tf.train.create_global_step() 106 | config = pnasnet.large_imagenet_config() 107 | config.set_hparam('use_aux_head', int(use_aux_head)) 108 | with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()): 109 | _, end_points = pnasnet.build_pnasnet_large(inputs, num_classes, 110 | config=config) 111 | self.assertEqual('AuxLogits' in end_points, use_aux_head) 112 | 113 | def testOverrideHParamsLargeModel(self): 114 | batch_size = 5 115 | height, width = 331, 331 116 | num_classes = 1000 117 | inputs = tf.random_uniform((batch_size, height, width, 3)) 118 | tf.train.create_global_step() 119 | config = pnasnet.large_imagenet_config() 120 | config.set_hparam('data_format', 'NCHW') 121 | with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()): 122 | _, end_points = pnasnet.build_pnasnet_large( 123 | inputs, num_classes, config=config) 124 | self.assertListEqual( 125 | end_points['Stem'].shape.as_list(), [batch_size, 540, 42, 42]) 126 | 127 | 128 | if __name__ == '__main__': 129 | tf.test.main() 130 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/overfeat.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the model definition for the OverFeat network. 16 | 17 | The definition for the network was obtained from: 18 | OverFeat: Integrated Recognition, Localization and Detection using 19 | Convolutional Networks 20 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 21 | Yann LeCun, 2014 22 | http://arxiv.org/abs/1312.6229 23 | 24 | Usage: 25 | with slim.arg_scope(overfeat.overfeat_arg_scope()): 26 | outputs, end_points = overfeat.overfeat(inputs) 27 | 28 | @@overfeat 29 | """ 30 | from __future__ import absolute_import 31 | from __future__ import division 32 | from __future__ import print_function 33 | 34 | import tensorflow as tf 35 | 36 | slim = tf.contrib.slim 37 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 38 | 39 | 40 | def overfeat_arg_scope(weight_decay=0.0005): 41 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 42 | activation_fn=tf.nn.relu, 43 | weights_regularizer=slim.l2_regularizer(weight_decay), 44 | biases_initializer=tf.zeros_initializer()): 45 | with slim.arg_scope([slim.conv2d], padding='SAME'): 46 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 47 | return arg_sc 48 | 49 | 50 | def overfeat(inputs, 51 | num_classes=1000, 52 | is_training=True, 53 | dropout_keep_prob=0.5, 54 | spatial_squeeze=True, 55 | scope='overfeat', 56 | global_pool=False): 57 | """Contains the model definition for the OverFeat network. 58 | 59 | The definition for the network was obtained from: 60 | OverFeat: Integrated Recognition, Localization and Detection using 61 | Convolutional Networks 62 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 63 | Yann LeCun, 2014 64 | http://arxiv.org/abs/1312.6229 65 | 66 | Note: All the fully_connected layers have been transformed to conv2d layers. 67 | To use in classification mode, resize input to 231x231. To use in fully 68 | convolutional mode, set spatial_squeeze to false. 69 | 70 | Args: 71 | inputs: a tensor of size [batch_size, height, width, channels]. 72 | num_classes: number of predicted classes. If 0 or None, the logits layer is 73 | omitted and the input features to the logits layer are returned instead. 74 | is_training: whether or not the model is being trained. 75 | dropout_keep_prob: the probability that activations are kept in the dropout 76 | layers during training. 77 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 78 | outputs. Useful to remove unnecessary dimensions for classification. 79 | scope: Optional scope for the variables. 80 | global_pool: Optional boolean flag. If True, the input to the classification 81 | layer is avgpooled to size 1x1, for any input size. (This is not part 82 | of the original OverFeat.) 83 | 84 | Returns: 85 | net: the output of the logits layer (if num_classes is a non-zero integer), 86 | or the non-dropped-out input to the logits layer (if num_classes is 0 or 87 | None). 88 | end_points: a dict of tensors with intermediate activations. 89 | """ 90 | with tf.variable_scope(scope, 'overfeat', [inputs]) as sc: 91 | end_points_collection = sc.original_name_scope + '_end_points' 92 | # Collect outputs for conv2d, fully_connected and max_pool2d 93 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 94 | outputs_collections=end_points_collection): 95 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 96 | scope='conv1') 97 | net = slim.max_pool2d(net, [2, 2], scope='pool1') 98 | net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2') 99 | net = slim.max_pool2d(net, [2, 2], scope='pool2') 100 | net = slim.conv2d(net, 512, [3, 3], scope='conv3') 101 | net = slim.conv2d(net, 1024, [3, 3], scope='conv4') 102 | net = slim.conv2d(net, 1024, [3, 3], scope='conv5') 103 | net = slim.max_pool2d(net, [2, 2], scope='pool5') 104 | 105 | # Use conv2d instead of fully_connected layers. 106 | with slim.arg_scope([slim.conv2d], 107 | weights_initializer=trunc_normal(0.005), 108 | biases_initializer=tf.constant_initializer(0.1)): 109 | net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6') 110 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 111 | scope='dropout6') 112 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 113 | # Convert end_points_collection into a end_point dict. 114 | end_points = slim.utils.convert_collection_to_dict( 115 | end_points_collection) 116 | if global_pool: 117 | net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') 118 | end_points['global_pool'] = net 119 | if num_classes: 120 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 121 | scope='dropout7') 122 | net = slim.conv2d(net, num_classes, [1, 1], 123 | activation_fn=None, 124 | normalizer_fn=None, 125 | biases_initializer=tf.zeros_initializer(), 126 | scope='fc8') 127 | if spatial_squeeze: 128 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 129 | end_points[sc.name + '/fc8'] = net 130 | return net, end_points 131 | overfeat.default_image_size = 231 132 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/alexnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a model definition for AlexNet. 16 | 17 | This work was first described in: 18 | ImageNet Classification with Deep Convolutional Neural Networks 19 | Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton 20 | 21 | and later refined in: 22 | One weird trick for parallelizing convolutional neural networks 23 | Alex Krizhevsky, 2014 24 | 25 | Here we provide the implementation proposed in "One weird trick" and not 26 | "ImageNet Classification", as per the paper, the LRN layers have been removed. 27 | 28 | Usage: 29 | with slim.arg_scope(alexnet.alexnet_v2_arg_scope()): 30 | outputs, end_points = alexnet.alexnet_v2(inputs) 31 | 32 | @@alexnet_v2 33 | """ 34 | 35 | from __future__ import absolute_import 36 | from __future__ import division 37 | from __future__ import print_function 38 | 39 | import tensorflow as tf 40 | 41 | slim = tf.contrib.slim 42 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 43 | 44 | 45 | def alexnet_v2_arg_scope(weight_decay=0.0005): 46 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 47 | activation_fn=tf.nn.relu, 48 | biases_initializer=tf.constant_initializer(0.1), 49 | weights_regularizer=slim.l2_regularizer(weight_decay)): 50 | with slim.arg_scope([slim.conv2d], padding='SAME'): 51 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 52 | return arg_sc 53 | 54 | 55 | def alexnet_v2(inputs, 56 | num_classes=1000, 57 | is_training=True, 58 | dropout_keep_prob=0.5, 59 | spatial_squeeze=True, 60 | scope='alexnet_v2', 61 | global_pool=False): 62 | """AlexNet version 2. 63 | 64 | Described in: http://arxiv.org/pdf/1404.5997v2.pdf 65 | Parameters from: 66 | github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ 67 | layers-imagenet-1gpu.cfg 68 | 69 | Note: All the fully_connected layers have been transformed to conv2d layers. 70 | To use in classification mode, resize input to 224x224 or set 71 | global_pool=True. To use in fully convolutional mode, set 72 | spatial_squeeze to false. 73 | The LRN layers have been removed and change the initializers from 74 | random_normal_initializer to xavier_initializer. 75 | 76 | Args: 77 | inputs: a tensor of size [batch_size, height, width, channels]. 78 | num_classes: the number of predicted classes. If 0 or None, the logits layer 79 | is omitted and the input features to the logits layer are returned instead. 80 | is_training: whether or not the model is being trained. 81 | dropout_keep_prob: the probability that activations are kept in the dropout 82 | layers during training. 83 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 84 | logits. Useful to remove unnecessary dimensions for classification. 85 | scope: Optional scope for the variables. 86 | global_pool: Optional boolean flag. If True, the input to the classification 87 | layer is avgpooled to size 1x1, for any input size. (This is not part 88 | of the original AlexNet.) 89 | 90 | Returns: 91 | net: the output of the logits layer (if num_classes is a non-zero integer), 92 | or the non-dropped-out input to the logits layer (if num_classes is 0 93 | or None). 94 | end_points: a dict of tensors with intermediate activations. 95 | """ 96 | with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: 97 | end_points_collection = sc.original_name_scope + '_end_points' 98 | # Collect outputs for conv2d, fully_connected and max_pool2d. 99 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 100 | outputs_collections=[end_points_collection]): 101 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 102 | scope='conv1') 103 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool1') 104 | net = slim.conv2d(net, 192, [5, 5], scope='conv2') 105 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool2') 106 | net = slim.conv2d(net, 384, [3, 3], scope='conv3') 107 | net = slim.conv2d(net, 384, [3, 3], scope='conv4') 108 | net = slim.conv2d(net, 256, [3, 3], scope='conv5') 109 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool5') 110 | 111 | # Use conv2d instead of fully_connected layers. 112 | with slim.arg_scope([slim.conv2d], 113 | weights_initializer=trunc_normal(0.005), 114 | biases_initializer=tf.constant_initializer(0.1)): 115 | net = slim.conv2d(net, 4096, [5, 5], padding='VALID', 116 | scope='fc6') 117 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 118 | scope='dropout6') 119 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 120 | # Convert end_points_collection into a end_point dict. 121 | end_points = slim.utils.convert_collection_to_dict( 122 | end_points_collection) 123 | if global_pool: 124 | net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') 125 | end_points['global_pool'] = net 126 | if num_classes: 127 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 128 | scope='dropout7') 129 | net = slim.conv2d(net, num_classes, [1, 1], 130 | activation_fn=None, 131 | normalizer_fn=None, 132 | biases_initializer=tf.zeros_initializer(), 133 | scope='fc8') 134 | if spatial_squeeze: 135 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 136 | end_points[sc.name + '/fc8'] = net 137 | return net, end_points 138 | alexnet_v2.default_image_size = 224 139 | -------------------------------------------------------------------------------- /utils/tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import numpy as np 3 | from PIL import ImageDraw, Image 4 | from scipy.spatial import distance as dist 5 | import cv2 6 | 7 | def read_from_gt(gt_file): 8 | with open(gt_file) as file: 9 | lines = file.readlines() 10 | gt_bboxs = [] 11 | txts = [] 12 | for line in lines: 13 | splited_line = line.split(',') 14 | splited_line_num = splited_line[:8] 15 | splited_line_num = [int(float(ele)) for ele in splited_line_num] 16 | gt_bboxs.append(splited_line_num) 17 | txts.append(splited_line[8]) 18 | return gt_bboxs, txts 19 | 20 | 21 | def show_image_from_array(image_arr): 22 | from PIL import Image 23 | img = Image.fromarray(image_arr) 24 | img.show() 25 | 26 | 27 | def vis_img_bbox(img_file, gt_file): 28 | img = cv2.imread(img_file)[:, :, ::-1] 29 | gtbboxes = np.asarray(read_from_gt(gt_file)[0]) 30 | print np.shape(gtbboxes) 31 | for box in gtbboxes: 32 | cv2.polylines(img[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), 33 | thickness=1) 34 | show_image_from_array(img) 35 | 36 | def save_gt_file(save_gt_path, coordinations, txts=None): 37 | with open(save_gt_path, 'wb+') as f: 38 | strs = [] 39 | start_index = 0 40 | for idx in range(len(coordinations)): 41 | cur_str = ','.join([str(element) for element in coordinations[start_index]]) 42 | if txts is None: 43 | # cur_str += (',' + txts[idx]) 44 | cur_str += ',TXT\n' 45 | else: 46 | txts[start_index] = str(txts[start_index]).replace('\n', '') 47 | cur_str += (',' + txts[start_index] + '\n') 48 | strs.append(cur_str) 49 | start_index += 1 50 | f.writelines(strs) 51 | f.close() 52 | 53 | def cal_TP(overlaps, threshold=0.7): 54 | shape = list(np.shape(overlaps)) 55 | count = 0 56 | for i in range(shape[0]): 57 | max_val = np.max(overlaps[i]) 58 | if max_val >= threshold: 59 | count += 1 60 | return count 61 | 62 | 63 | def cal_FP(overlaps, len_pred, threshold=0.7): 64 | return len_pred - cal_TP(overlaps, threshold) 65 | 66 | 67 | def cal_FN(overlaps, len_gt, threshold=0.7): 68 | ''' 69 | 找到被漏检的,也就是说实际上是gt,但是没有被任何bbox检测出来 70 | :param overlaps: 71 | :param len_gt: 72 | :param threshold: 73 | :return: 74 | ''' 75 | if (len_gt - cal_TP(overlaps, threshold)) < 0: 76 | print 'Error, FN is negative' 77 | assert False 78 | max_value = np.max(overlaps, axis=0) 79 | return np.sum(max_value < threshold) 80 | # return len_gt - cal_TP(overlaps, threshold) 81 | def draw_rect(img_arr, box): 82 | points = [[0, 0], [0, 0], [0, 0], [0, 0]] 83 | ind = 0 84 | for i in range(4): 85 | for j in range(2): 86 | points[i][j] = box[ind] 87 | ind += 1 88 | points[i] = tuple(points[i]) 89 | img = Image.fromarray(np.array(img_arr)) 90 | img_draw = ImageDraw.Draw(img) 91 | img_draw.polygon(points, fill=128) 92 | return img 93 | def draw_rects(image_arr, boxs): 94 | img = Image.fromarray(image_arr) 95 | img_draw = ImageDraw.Draw(img) 96 | for box in boxs: 97 | points = [[0, 0], [0, 0], [0, 0], [0, 0]] 98 | ind = 0 99 | for i in range(4): 100 | for j in range(2): 101 | points[i][j] = box[ind] 102 | ind += 1 103 | points[i] = tuple(points[i]) 104 | img_draw.polygon(points, fill=128) 105 | return img 106 | def bbox_overlaps(boxes, query_boxes, im_size): 107 | ''' 108 | 109 | :param boxes: (N, 8) ndarray of float, pred 110 | :param query_boxes: (K, 8) ndarray of float, gt 111 | :param im_size: 图像的大小 112 | 四个点的顺序如下所示 113 | 1 4 114 | 2 3 115 | :return: (N, K) ndarray of overlap between boxes and query_boxes 116 | ''' 117 | def cal_overlap(img1, img2): 118 | img1 = np.array(img1) 119 | img2 = np.array(img2) 120 | img1 = (img1 == 128) 121 | img2 = (img2 == 128) 122 | return np.sum(np.logical_and(img1, img2)) 123 | N = boxes.shape[0] 124 | K = query_boxes.shape[0] 125 | overlaps = np.zeros((N, K), dtype=np.float32) 126 | # draw_rects(np.zeros(im_size), query_boxes).show() 127 | # draw_rects(np.zeros(im_size), boxes).show() 128 | for k in range(K): 129 | # 计算ground truth的面积 130 | cur_gt = query_boxes[k] 131 | gt_white_img = np.zeros(im_size, np.uint8) 132 | gt_box_img = draw_rect(gt_white_img, cur_gt) 133 | # gt_box_img.show() 134 | gt_area = np.sum(np.array(gt_box_img) == 128) 135 | for n in range(N): 136 | # 也就是说最小的左下方的横坐标减去最大的右上方的横坐标代表的就是IoU部分的宽度 137 | cur_bbox = boxes[n] 138 | pred_white_img = np.zeros(im_size, np.uint8) 139 | pred_box_img = draw_rect(pred_white_img, cur_bbox) 140 | # pred_box_img.show() 141 | pred_area = np.sum(np.array(pred_box_img) == 128) 142 | overlap_area = cal_overlap(gt_box_img, pred_box_img) 143 | overlaps[n, k] = (overlap_area * 1.0) / ((pred_area + gt_area - overlap_area) * 1.0) 144 | return overlaps 145 | 146 | 147 | def order_points(pts): 148 | # sort the points based on their x-coordinates 149 | xSorted = pts[np.argsort(pts[:, 0]), :] 150 | 151 | # grab the left-most and right-most points from the sorted 152 | # x-roodinate points 153 | leftMost = xSorted[:2, :] 154 | rightMost = xSorted[2:, :] 155 | 156 | # now, sort the left-most coordinates according to their 157 | # y-coordinates so we can grab the top-left and bottom-left 158 | # points, respectively 159 | leftMost = leftMost[np.argsort(leftMost[:, 1]), :] 160 | (tl, bl) = leftMost 161 | 162 | # now that we have the top-left coordinate, use it as an 163 | # anchor to calculate the Euclidean distance between the 164 | # top-left and right-most points; by the Pythagorean 165 | # theorem, the point with the largest distance will be 166 | # our bottom-right point 167 | D = dist.cdist(tl[np.newaxis], rightMost, "euclidean")[0] 168 | (br, tr) = rightMost[np.argsort(D)[::-1], :] 169 | 170 | # return the coordinates in top-left, top-right, 171 | # bottom-right, and bottom-left order 172 | return np.array([tl, tr, br, bl], dtype="float32") 173 | 174 | if __name__ == '__main__': 175 | img_size = [400, 300] 176 | pred_points = np.array( 177 | [ 178 | [0, 0, 0, 100, 100, 100, 100, 0], 179 | [77, 92, 77, 195, 483, 195, 483, 92] 180 | ] 181 | ) 182 | gt_points = np.array( 183 | [ 184 | [0, 0, 0, 50, 50, 50, 50, 0], 185 | [80, 93, 77, 195, 483, 195, 483, 92] 186 | ] 187 | ) 188 | print bbox_overlaps(pred_points, gt_points, img_size) -------------------------------------------------------------------------------- /lanms/lanms.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "clipper/clipper.hpp" 4 | 5 | // locality-aware NMS 6 | namespace lanms { 7 | 8 | namespace cl = ClipperLib; 9 | 10 | struct Polygon { 11 | cl::Path poly; 12 | float score; 13 | }; 14 | 15 | float paths_area(const ClipperLib::Paths &ps) { 16 | float area = 0; 17 | for (auto &&p: ps) 18 | area += cl::Area(p); 19 | return area; 20 | } 21 | 22 | float poly_iou(const Polygon &a, const Polygon &b) { 23 | cl::Clipper clpr; 24 | clpr.AddPath(a.poly, cl::ptSubject, true); 25 | clpr.AddPath(b.poly, cl::ptClip, true); 26 | 27 | cl::Paths inter, uni; 28 | clpr.Execute(cl::ctIntersection, inter, cl::pftEvenOdd); 29 | clpr.Execute(cl::ctUnion, uni, cl::pftEvenOdd); 30 | 31 | auto inter_area = paths_area(inter), 32 | uni_area = paths_area(uni); 33 | return std::abs(inter_area) / std::max(std::abs(uni_area), 1.0f); 34 | } 35 | 36 | bool should_merge(const Polygon &a, const Polygon &b, float iou_threshold) { 37 | return poly_iou(a, b) > iou_threshold; 38 | } 39 | 40 | /** 41 | * Incrementally merge polygons 42 | */ 43 | class PolyMerger { 44 | public: 45 | PolyMerger(): score(0), nr_polys(0) { 46 | memset(data, 0, sizeof(data)); 47 | } 48 | 49 | /** 50 | * Add a new polygon to be merged. 51 | */ 52 | void add(const Polygon &p_given) { 53 | Polygon p; 54 | if (nr_polys > 0) { 55 | // vertices of two polygons to merge may not in the same order; 56 | // we match their vertices by choosing the ordering that 57 | // minimizes the total squared distance. 58 | // see function normalize_poly for details. 59 | p = normalize_poly(get(), p_given); 60 | } else { 61 | p = p_given; 62 | } 63 | assert(p.poly.size() == 4); 64 | auto &poly = p.poly; 65 | auto s = p.score; 66 | data[0] += poly[0].X * s; 67 | data[1] += poly[0].Y * s; 68 | 69 | data[2] += poly[1].X * s; 70 | data[3] += poly[1].Y * s; 71 | 72 | data[4] += poly[2].X * s; 73 | data[5] += poly[2].Y * s; 74 | 75 | data[6] += poly[3].X * s; 76 | data[7] += poly[3].Y * s; 77 | 78 | score += p.score; 79 | 80 | nr_polys += 1; 81 | } 82 | 83 | inline std::int64_t sqr(std::int64_t x) { return x * x; } 84 | 85 | Polygon normalize_poly( 86 | const Polygon &ref, 87 | const Polygon &p) { 88 | 89 | std::int64_t min_d = std::numeric_limits::max(); 90 | size_t best_start = 0, best_order = 0; 91 | 92 | for (size_t start = 0; start < 4; start ++) { 93 | size_t j = start; 94 | std::int64_t d = ( 95 | sqr(ref.poly[(j + 0) % 4].X - p.poly[(j + 0) % 4].X) 96 | + sqr(ref.poly[(j + 0) % 4].Y - p.poly[(j + 0) % 4].Y) 97 | + sqr(ref.poly[(j + 1) % 4].X - p.poly[(j + 1) % 4].X) 98 | + sqr(ref.poly[(j + 1) % 4].Y - p.poly[(j + 1) % 4].Y) 99 | + sqr(ref.poly[(j + 2) % 4].X - p.poly[(j + 2) % 4].X) 100 | + sqr(ref.poly[(j + 2) % 4].Y - p.poly[(j + 2) % 4].Y) 101 | + sqr(ref.poly[(j + 3) % 4].X - p.poly[(j + 3) % 4].X) 102 | + sqr(ref.poly[(j + 3) % 4].Y - p.poly[(j + 3) % 4].Y) 103 | ); 104 | if (d < min_d) { 105 | min_d = d; 106 | best_start = start; 107 | best_order = 0; 108 | } 109 | 110 | d = ( 111 | sqr(ref.poly[(j + 0) % 4].X - p.poly[(j + 3) % 4].X) 112 | + sqr(ref.poly[(j + 0) % 4].Y - p.poly[(j + 3) % 4].Y) 113 | + sqr(ref.poly[(j + 1) % 4].X - p.poly[(j + 2) % 4].X) 114 | + sqr(ref.poly[(j + 1) % 4].Y - p.poly[(j + 2) % 4].Y) 115 | + sqr(ref.poly[(j + 2) % 4].X - p.poly[(j + 1) % 4].X) 116 | + sqr(ref.poly[(j + 2) % 4].Y - p.poly[(j + 1) % 4].Y) 117 | + sqr(ref.poly[(j + 3) % 4].X - p.poly[(j + 0) % 4].X) 118 | + sqr(ref.poly[(j + 3) % 4].Y - p.poly[(j + 0) % 4].Y) 119 | ); 120 | if (d < min_d) { 121 | min_d = d; 122 | best_start = start; 123 | best_order = 1; 124 | } 125 | } 126 | 127 | Polygon r; 128 | r.poly.resize(4); 129 | auto j = best_start; 130 | if (best_order == 0) { 131 | for (size_t i = 0; i < 4; i ++) 132 | r.poly[i] = p.poly[(j + i) % 4]; 133 | } else { 134 | for (size_t i = 0; i < 4; i ++) 135 | r.poly[i] = p.poly[(j + 4 - i - 1) % 4]; 136 | } 137 | r.score = p.score; 138 | return r; 139 | } 140 | 141 | Polygon get() const { 142 | Polygon p; 143 | 144 | auto &poly = p.poly; 145 | poly.resize(4); 146 | auto score_inv = 1.0f / std::max(1e-8f, score); 147 | poly[0].X = data[0] * score_inv; 148 | poly[0].Y = data[1] * score_inv; 149 | poly[1].X = data[2] * score_inv; 150 | poly[1].Y = data[3] * score_inv; 151 | poly[2].X = data[4] * score_inv; 152 | poly[2].Y = data[5] * score_inv; 153 | poly[3].X = data[6] * score_inv; 154 | poly[3].Y = data[7] * score_inv; 155 | 156 | assert(score > 0); 157 | p.score = score; 158 | 159 | return p; 160 | } 161 | 162 | private: 163 | std::int64_t data[8]; 164 | float score; 165 | std::int32_t nr_polys; 166 | }; 167 | 168 | 169 | /** 170 | * The standard NMS algorithm. 171 | */ 172 | std::vector standard_nms(std::vector &polys, float iou_threshold) { 173 | size_t n = polys.size(); 174 | if (n == 0) 175 | return {}; 176 | std::vector indices(n); 177 | std::iota(std::begin(indices), std::end(indices), 0); 178 | std::sort(std::begin(indices), std::end(indices), [&](size_t i, size_t j) { return polys[i].score > polys[j].score; }); 179 | 180 | std::vector keep; 181 | while (indices.size()) { 182 | size_t p = 0, cur = indices[0]; 183 | keep.emplace_back(cur); 184 | for (size_t i = 1; i < indices.size(); i ++) { 185 | if (!should_merge(polys[cur], polys[indices[i]], iou_threshold)) { 186 | indices[p ++] = indices[i]; 187 | } 188 | } 189 | indices.resize(p); 190 | } 191 | 192 | std::vector ret; 193 | for (auto &&i: keep) { 194 | ret.emplace_back(polys[i]); 195 | } 196 | return ret; 197 | } 198 | 199 | std::vector 200 | merge_quadrangle_n9(const float *data, size_t n, float iou_threshold) { 201 | using cInt = cl::cInt; 202 | 203 | // first pass 204 | std::vector polys; 205 | for (size_t i = 0; i < n; i ++) { 206 | auto p = data + i * 9; 207 | Polygon poly{ 208 | { 209 | {cInt(p[0]), cInt(p[1])}, 210 | {cInt(p[2]), cInt(p[3])}, 211 | {cInt(p[4]), cInt(p[5])}, 212 | {cInt(p[6]), cInt(p[7])}, 213 | }, 214 | p[8], 215 | }; 216 | 217 | if (polys.size()) { 218 | // merge with the last one 219 | auto &bpoly = polys.back(); 220 | if (should_merge(poly, bpoly, iou_threshold)) { 221 | PolyMerger merger; 222 | merger.add(bpoly); 223 | merger.add(poly); 224 | bpoly = merger.get(); 225 | } else { 226 | polys.emplace_back(poly); 227 | } 228 | } else { 229 | polys.emplace_back(poly); 230 | } 231 | } 232 | return standard_nms(polys, iou_threshold); 233 | } 234 | } 235 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/pix2pix_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================= 15 | """Tests for pix2pix.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | from nets import pix2pix 23 | 24 | 25 | class GeneratorTest(tf.test.TestCase): 26 | 27 | def test_nonsquare_inputs_raise_exception(self): 28 | batch_size = 2 29 | height, width = 240, 320 30 | num_outputs = 4 31 | 32 | images = tf.ones((batch_size, height, width, 3)) 33 | 34 | with self.assertRaises(ValueError): 35 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 36 | pix2pix.pix2pix_generator( 37 | images, num_outputs, upsample_method='nn_upsample_conv') 38 | 39 | def _reduced_default_blocks(self): 40 | """Returns the default blocks, scaled down to make test run faster.""" 41 | return [pix2pix.Block(b.num_filters // 32, b.decoder_keep_prob) 42 | for b in pix2pix._default_generator_blocks()] 43 | 44 | def test_output_size_nn_upsample_conv(self): 45 | batch_size = 2 46 | height, width = 256, 256 47 | num_outputs = 4 48 | 49 | images = tf.ones((batch_size, height, width, 3)) 50 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 51 | logits, _ = pix2pix.pix2pix_generator( 52 | images, num_outputs, blocks=self._reduced_default_blocks(), 53 | upsample_method='nn_upsample_conv') 54 | 55 | with self.test_session() as session: 56 | session.run(tf.global_variables_initializer()) 57 | np_outputs = session.run(logits) 58 | self.assertListEqual([batch_size, height, width, num_outputs], 59 | list(np_outputs.shape)) 60 | 61 | def test_output_size_conv2d_transpose(self): 62 | batch_size = 2 63 | height, width = 256, 256 64 | num_outputs = 4 65 | 66 | images = tf.ones((batch_size, height, width, 3)) 67 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 68 | logits, _ = pix2pix.pix2pix_generator( 69 | images, num_outputs, blocks=self._reduced_default_blocks(), 70 | upsample_method='conv2d_transpose') 71 | 72 | with self.test_session() as session: 73 | session.run(tf.global_variables_initializer()) 74 | np_outputs = session.run(logits) 75 | self.assertListEqual([batch_size, height, width, num_outputs], 76 | list(np_outputs.shape)) 77 | 78 | def test_block_number_dictates_number_of_layers(self): 79 | batch_size = 2 80 | height, width = 256, 256 81 | num_outputs = 4 82 | 83 | images = tf.ones((batch_size, height, width, 3)) 84 | blocks = [ 85 | pix2pix.Block(64, 0.5), 86 | pix2pix.Block(128, 0), 87 | ] 88 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 89 | _, end_points = pix2pix.pix2pix_generator( 90 | images, num_outputs, blocks) 91 | 92 | num_encoder_layers = 0 93 | num_decoder_layers = 0 94 | for end_point in end_points: 95 | if end_point.startswith('encoder'): 96 | num_encoder_layers += 1 97 | elif end_point.startswith('decoder'): 98 | num_decoder_layers += 1 99 | 100 | self.assertEqual(num_encoder_layers, len(blocks)) 101 | self.assertEqual(num_decoder_layers, len(blocks)) 102 | 103 | 104 | class DiscriminatorTest(tf.test.TestCase): 105 | 106 | def _layer_output_size(self, input_size, kernel_size=4, stride=2, pad=2): 107 | return (input_size + pad * 2 - kernel_size) // stride + 1 108 | 109 | def test_four_layers(self): 110 | batch_size = 2 111 | input_size = 256 112 | 113 | output_size = self._layer_output_size(input_size) 114 | output_size = self._layer_output_size(output_size) 115 | output_size = self._layer_output_size(output_size) 116 | output_size = self._layer_output_size(output_size, stride=1) 117 | output_size = self._layer_output_size(output_size, stride=1) 118 | 119 | images = tf.ones((batch_size, input_size, input_size, 3)) 120 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 121 | logits, end_points = pix2pix.pix2pix_discriminator( 122 | images, num_filters=[64, 128, 256, 512]) 123 | self.assertListEqual([batch_size, output_size, output_size, 1], 124 | logits.shape.as_list()) 125 | self.assertListEqual([batch_size, output_size, output_size, 1], 126 | end_points['predictions'].shape.as_list()) 127 | 128 | def test_four_layers_no_padding(self): 129 | batch_size = 2 130 | input_size = 256 131 | 132 | output_size = self._layer_output_size(input_size, pad=0) 133 | output_size = self._layer_output_size(output_size, pad=0) 134 | output_size = self._layer_output_size(output_size, pad=0) 135 | output_size = self._layer_output_size(output_size, stride=1, pad=0) 136 | output_size = self._layer_output_size(output_size, stride=1, pad=0) 137 | 138 | images = tf.ones((batch_size, input_size, input_size, 3)) 139 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 140 | logits, end_points = pix2pix.pix2pix_discriminator( 141 | images, num_filters=[64, 128, 256, 512], padding=0) 142 | self.assertListEqual([batch_size, output_size, output_size, 1], 143 | logits.shape.as_list()) 144 | self.assertListEqual([batch_size, output_size, output_size, 1], 145 | end_points['predictions'].shape.as_list()) 146 | 147 | def test_four_layers_wrog_paddig(self): 148 | batch_size = 2 149 | input_size = 256 150 | 151 | images = tf.ones((batch_size, input_size, input_size, 3)) 152 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 153 | with self.assertRaises(TypeError): 154 | pix2pix.pix2pix_discriminator( 155 | images, num_filters=[64, 128, 256, 512], padding=1.5) 156 | 157 | def test_four_layers_negative_padding(self): 158 | batch_size = 2 159 | input_size = 256 160 | 161 | images = tf.ones((batch_size, input_size, input_size, 3)) 162 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 163 | with self.assertRaises(ValueError): 164 | pix2pix.pix2pix_discriminator( 165 | images, num_filters=[64, 128, 256, 512], padding=-1) 166 | 167 | if __name__ == '__main__': 168 | tf.test.main() 169 | -------------------------------------------------------------------------------- /lanms/include/pybind11/chrono.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/chrono.h: Transparent conversion between std::chrono and python's datetime 3 | 4 | Copyright (c) 2016 Trent Houliston and 5 | Wenzel Jakob 6 | 7 | All rights reserved. Use of this source code is governed by a 8 | BSD-style license that can be found in the LICENSE file. 9 | */ 10 | 11 | #pragma once 12 | 13 | #include "pybind11.h" 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | // Backport the PyDateTime_DELTA functions from Python3.3 if required 20 | #ifndef PyDateTime_DELTA_GET_DAYS 21 | #define PyDateTime_DELTA_GET_DAYS(o) (((PyDateTime_Delta*)o)->days) 22 | #endif 23 | #ifndef PyDateTime_DELTA_GET_SECONDS 24 | #define PyDateTime_DELTA_GET_SECONDS(o) (((PyDateTime_Delta*)o)->seconds) 25 | #endif 26 | #ifndef PyDateTime_DELTA_GET_MICROSECONDS 27 | #define PyDateTime_DELTA_GET_MICROSECONDS(o) (((PyDateTime_Delta*)o)->microseconds) 28 | #endif 29 | 30 | NAMESPACE_BEGIN(pybind11) 31 | NAMESPACE_BEGIN(detail) 32 | 33 | template class duration_caster { 34 | public: 35 | typedef typename type::rep rep; 36 | typedef typename type::period period; 37 | 38 | typedef std::chrono::duration> days; 39 | 40 | bool load(handle src, bool) { 41 | using namespace std::chrono; 42 | 43 | // Lazy initialise the PyDateTime import 44 | if (!PyDateTimeAPI) { PyDateTime_IMPORT; } 45 | 46 | if (!src) return false; 47 | // If invoked with datetime.delta object 48 | if (PyDelta_Check(src.ptr())) { 49 | value = type(duration_cast>( 50 | days(PyDateTime_DELTA_GET_DAYS(src.ptr())) 51 | + seconds(PyDateTime_DELTA_GET_SECONDS(src.ptr())) 52 | + microseconds(PyDateTime_DELTA_GET_MICROSECONDS(src.ptr())))); 53 | return true; 54 | } 55 | // If invoked with a float we assume it is seconds and convert 56 | else if (PyFloat_Check(src.ptr())) { 57 | value = type(duration_cast>(duration(PyFloat_AsDouble(src.ptr())))); 58 | return true; 59 | } 60 | else return false; 61 | } 62 | 63 | // If this is a duration just return it back 64 | static const std::chrono::duration& get_duration(const std::chrono::duration &src) { 65 | return src; 66 | } 67 | 68 | // If this is a time_point get the time_since_epoch 69 | template static std::chrono::duration get_duration(const std::chrono::time_point> &src) { 70 | return src.time_since_epoch(); 71 | } 72 | 73 | static handle cast(const type &src, return_value_policy /* policy */, handle /* parent */) { 74 | using namespace std::chrono; 75 | 76 | // Use overloaded function to get our duration from our source 77 | // Works out if it is a duration or time_point and get the duration 78 | auto d = get_duration(src); 79 | 80 | // Lazy initialise the PyDateTime import 81 | if (!PyDateTimeAPI) { PyDateTime_IMPORT; } 82 | 83 | // Declare these special duration types so the conversions happen with the correct primitive types (int) 84 | using dd_t = duration>; 85 | using ss_t = duration>; 86 | using us_t = duration; 87 | 88 | auto dd = duration_cast(d); 89 | auto subd = d - dd; 90 | auto ss = duration_cast(subd); 91 | auto us = duration_cast(subd - ss); 92 | return PyDelta_FromDSU(dd.count(), ss.count(), us.count()); 93 | } 94 | 95 | PYBIND11_TYPE_CASTER(type, _("datetime.timedelta")); 96 | }; 97 | 98 | // This is for casting times on the system clock into datetime.datetime instances 99 | template class type_caster> { 100 | public: 101 | typedef std::chrono::time_point type; 102 | bool load(handle src, bool) { 103 | using namespace std::chrono; 104 | 105 | // Lazy initialise the PyDateTime import 106 | if (!PyDateTimeAPI) { PyDateTime_IMPORT; } 107 | 108 | if (!src) return false; 109 | if (PyDateTime_Check(src.ptr())) { 110 | std::tm cal; 111 | cal.tm_sec = PyDateTime_DATE_GET_SECOND(src.ptr()); 112 | cal.tm_min = PyDateTime_DATE_GET_MINUTE(src.ptr()); 113 | cal.tm_hour = PyDateTime_DATE_GET_HOUR(src.ptr()); 114 | cal.tm_mday = PyDateTime_GET_DAY(src.ptr()); 115 | cal.tm_mon = PyDateTime_GET_MONTH(src.ptr()) - 1; 116 | cal.tm_year = PyDateTime_GET_YEAR(src.ptr()) - 1900; 117 | cal.tm_isdst = -1; 118 | 119 | value = system_clock::from_time_t(std::mktime(&cal)) + microseconds(PyDateTime_DATE_GET_MICROSECOND(src.ptr())); 120 | return true; 121 | } 122 | else return false; 123 | } 124 | 125 | static handle cast(const std::chrono::time_point &src, return_value_policy /* policy */, handle /* parent */) { 126 | using namespace std::chrono; 127 | 128 | // Lazy initialise the PyDateTime import 129 | if (!PyDateTimeAPI) { PyDateTime_IMPORT; } 130 | 131 | std::time_t tt = system_clock::to_time_t(src); 132 | // this function uses static memory so it's best to copy it out asap just in case 133 | // otherwise other code that is using localtime may break this (not just python code) 134 | std::tm localtime = *std::localtime(&tt); 135 | 136 | // Declare these special duration types so the conversions happen with the correct primitive types (int) 137 | using us_t = duration; 138 | 139 | return PyDateTime_FromDateAndTime(localtime.tm_year + 1900, 140 | localtime.tm_mon + 1, 141 | localtime.tm_mday, 142 | localtime.tm_hour, 143 | localtime.tm_min, 144 | localtime.tm_sec, 145 | (duration_cast(src.time_since_epoch() % seconds(1))).count()); 146 | } 147 | PYBIND11_TYPE_CASTER(type, _("datetime.datetime")); 148 | }; 149 | 150 | // Other clocks that are not the system clock are not measured as datetime.datetime objects 151 | // since they are not measured on calendar time. So instead we just make them timedeltas 152 | // Or if they have passed us a time as a float we convert that 153 | template class type_caster> 154 | : public duration_caster> { 155 | }; 156 | 157 | template class type_caster> 158 | : public duration_caster> { 159 | }; 160 | 161 | NAMESPACE_END(detail) 162 | NAMESPACE_END(pybind11) 163 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/nets_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a factory for building various models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | import functools 21 | 22 | import tensorflow as tf 23 | 24 | from nets import alexnet 25 | from nets import cifarnet 26 | from nets import inception 27 | from nets import lenet 28 | from nets import mobilenet_v1 29 | from nets import overfeat 30 | from nets import resnet_v1 31 | from nets import resnet_v2 32 | from nets import vgg 33 | from nets.mobilenet import mobilenet_v2 34 | from nets.nasnet import nasnet 35 | from nets.nasnet import pnasnet 36 | 37 | slim = tf.contrib.slim 38 | 39 | networks_map = {'alexnet_v2': alexnet.alexnet_v2, 40 | 'cifarnet': cifarnet.cifarnet, 41 | 'overfeat': overfeat.overfeat, 42 | 'vgg_a': vgg.vgg_a, 43 | 'vgg_16': vgg.vgg_16, 44 | 'vgg_19': vgg.vgg_19, 45 | 'inception_v1': inception.inception_v1, 46 | 'inception_v2': inception.inception_v2, 47 | 'inception_v3': inception.inception_v3, 48 | 'inception_v4': inception.inception_v4, 49 | 'inception_resnet_v2': inception.inception_resnet_v2, 50 | 'lenet': lenet.lenet, 51 | 'resnet_v1_50': resnet_v1.resnet_v1_50, 52 | 'resnet_v1_101': resnet_v1.resnet_v1_101, 53 | 'resnet_v1_152': resnet_v1.resnet_v1_152, 54 | 'resnet_v1_200': resnet_v1.resnet_v1_200, 55 | 'resnet_v2_50': resnet_v2.resnet_v2_50, 56 | 'resnet_v2_101': resnet_v2.resnet_v2_101, 57 | 'resnet_v2_152': resnet_v2.resnet_v2_152, 58 | 'resnet_v2_200': resnet_v2.resnet_v2_200, 59 | 'mobilenet_v1': mobilenet_v1.mobilenet_v1, 60 | 'mobilenet_v1_075': mobilenet_v1.mobilenet_v1_075, 61 | 'mobilenet_v1_050': mobilenet_v1.mobilenet_v1_050, 62 | 'mobilenet_v1_025': mobilenet_v1.mobilenet_v1_025, 63 | 'mobilenet_v2': mobilenet_v2.mobilenet, 64 | 'nasnet_cifar': nasnet.build_nasnet_cifar, 65 | 'nasnet_mobile': nasnet.build_nasnet_mobile, 66 | 'nasnet_large': nasnet.build_nasnet_large, 67 | 'pnasnet_large': pnasnet.build_pnasnet_large, 68 | } 69 | 70 | arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope, 71 | 'cifarnet': cifarnet.cifarnet_arg_scope, 72 | 'overfeat': overfeat.overfeat_arg_scope, 73 | 'vgg_a': vgg.vgg_arg_scope, 74 | 'vgg_16': vgg.vgg_arg_scope, 75 | 'vgg_19': vgg.vgg_arg_scope, 76 | 'inception_v1': inception.inception_v3_arg_scope, 77 | 'inception_v2': inception.inception_v3_arg_scope, 78 | 'inception_v3': inception.inception_v3_arg_scope, 79 | 'inception_v4': inception.inception_v4_arg_scope, 80 | 'inception_resnet_v2': 81 | inception.inception_resnet_v2_arg_scope, 82 | 'lenet': lenet.lenet_arg_scope, 83 | 'resnet_v1_50': resnet_v1.resnet_arg_scope, 84 | 'resnet_v1_101': resnet_v1.resnet_arg_scope, 85 | 'resnet_v1_152': resnet_v1.resnet_arg_scope, 86 | 'resnet_v1_200': resnet_v1.resnet_arg_scope, 87 | 'resnet_v2_50': resnet_v2.resnet_arg_scope, 88 | 'resnet_v2_101': resnet_v2.resnet_arg_scope, 89 | 'resnet_v2_152': resnet_v2.resnet_arg_scope, 90 | 'resnet_v2_200': resnet_v2.resnet_arg_scope, 91 | 'mobilenet_v1': mobilenet_v1.mobilenet_v1_arg_scope, 92 | 'mobilenet_v1_075': mobilenet_v1.mobilenet_v1_arg_scope, 93 | 'mobilenet_v1_050': mobilenet_v1.mobilenet_v1_arg_scope, 94 | 'mobilenet_v1_025': mobilenet_v1.mobilenet_v1_arg_scope, 95 | 'mobilenet_v2': mobilenet_v2.training_scope, 96 | 'nasnet_cifar': nasnet.nasnet_cifar_arg_scope, 97 | 'nasnet_mobile': nasnet.nasnet_mobile_arg_scope, 98 | 'nasnet_large': nasnet.nasnet_large_arg_scope, 99 | 'pnasnet_large': pnasnet.pnasnet_large_arg_scope, 100 | } 101 | 102 | 103 | def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): 104 | """Returns a network_fn such as `logits, end_points = network_fn(images)`. 105 | 106 | Args: 107 | name: The name of the network. 108 | num_classes: The number of classes to use for classification. If 0 or None, 109 | the logits layer is omitted and its input features are returned instead. 110 | weight_decay: The l2 coefficient for the model weights. 111 | is_training: `True` if the model is being used for training and `False` 112 | otherwise. 113 | 114 | Returns: 115 | network_fn: A function that applies the model to a batch of images. It has 116 | the following signature: 117 | net, end_points = network_fn(images) 118 | The `images` input is a tensor of shape [batch_size, height, width, 3] 119 | with height = width = network_fn.default_image_size. (The permissibility 120 | and treatment of other sizes depends on the network_fn.) 121 | The returned `end_points` are a dictionary of intermediate activations. 122 | The returned `net` is the topmost layer, depending on `num_classes`: 123 | If `num_classes` was a non-zero integer, `net` is a logits tensor 124 | of shape [batch_size, num_classes]. 125 | If `num_classes` was 0 or `None`, `net` is a tensor with the input 126 | to the logits layer of shape [batch_size, 1, 1, num_features] or 127 | [batch_size, num_features]. Dropout has not been applied to this 128 | (even if the network's original classification does); it remains for 129 | the caller to do this or not. 130 | 131 | Raises: 132 | ValueError: If network `name` is not recognized. 133 | """ 134 | if name not in networks_map: 135 | raise ValueError('Name of network unknown %s' % name) 136 | func = networks_map[name] 137 | @functools.wraps(func) 138 | def network_fn(images, **kwargs): 139 | arg_scope = arg_scopes_map[name](weight_decay=weight_decay) 140 | with slim.arg_scope(arg_scope): 141 | return func(images, num_classes, is_training=is_training, **kwargs) 142 | if hasattr(func, 'default_image_size'): 143 | network_fn.default_image_size = func.default_image_size 144 | 145 | return network_fn 146 | -------------------------------------------------------------------------------- /run_demo_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | 5 | import time 6 | import datetime 7 | import cv2 8 | import numpy as np 9 | import uuid 10 | import json 11 | 12 | import functools 13 | import logging 14 | import collections 15 | 16 | logger = logging.getLogger(__name__) 17 | logger.setLevel(logging.INFO) 18 | 19 | 20 | @functools.lru_cache(maxsize=1) 21 | def get_host_info(): 22 | ret = {} 23 | with open('/proc/cpuinfo') as f: 24 | ret['cpuinfo'] = f.read() 25 | 26 | with open('/proc/meminfo') as f: 27 | ret['meminfo'] = f.read() 28 | 29 | with open('/proc/loadavg') as f: 30 | ret['loadavg'] = f.read() 31 | 32 | return ret 33 | 34 | 35 | @functools.lru_cache(maxsize=100) 36 | def get_predictor(checkpoint_path): 37 | logger.info('loading model') 38 | import tensorflow as tf 39 | import model 40 | from icdar import restore_rectangle 41 | import lanms 42 | from eval import resize_image, sort_poly, detect 43 | 44 | input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') 45 | global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) 46 | 47 | f_score, f_geometry = model.model(input_images, is_training=False) 48 | 49 | variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) 50 | saver = tf.train.Saver(variable_averages.variables_to_restore()) 51 | 52 | sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) 53 | 54 | ckpt_state = tf.train.get_checkpoint_state(checkpoint_path) 55 | model_path = os.path.join(checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) 56 | logger.info('Restore from {}'.format(model_path)) 57 | saver.restore(sess, model_path) 58 | 59 | def predictor(img): 60 | """ 61 | :return: { 62 | 'text_lines': [ 63 | { 64 | 'score': , 65 | 'x0': , 66 | 'y0': , 67 | 'x1': , 68 | ... 69 | 'y3': , 70 | } 71 | ], 72 | 'rtparams': { # runtime parameters 73 | 'image_size': , 74 | 'working_size': , 75 | }, 76 | 'timing': { 77 | 'net': , 78 | 'restore': , 79 | 'nms': , 80 | 'cpuinfo': , 81 | 'meminfo': , 82 | 'uptime': , 83 | } 84 | } 85 | """ 86 | start_time = time.time() 87 | rtparams = collections.OrderedDict() 88 | rtparams['start_time'] = datetime.datetime.now().isoformat() 89 | rtparams['image_size'] = '{}x{}'.format(img.shape[1], img.shape[0]) 90 | timer = collections.OrderedDict([ 91 | ('net', 0), 92 | ('restore', 0), 93 | ('nms', 0) 94 | ]) 95 | 96 | im_resized, (ratio_h, ratio_w) = resize_image(img) 97 | rtparams['working_size'] = '{}x{}'.format( 98 | im_resized.shape[1], im_resized.shape[0]) 99 | start = time.time() 100 | score, geometry = sess.run( 101 | [f_score, f_geometry], 102 | feed_dict={input_images: [im_resized[:,:,::-1]]}) 103 | timer['net'] = time.time() - start 104 | 105 | boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer) 106 | logger.info('net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format( 107 | timer['net']*1000, timer['restore']*1000, timer['nms']*1000)) 108 | 109 | if boxes is not None: 110 | scores = boxes[:,8].reshape(-1) 111 | boxes = boxes[:, :8].reshape((-1, 4, 2)) 112 | boxes[:, :, 0] /= ratio_w 113 | boxes[:, :, 1] /= ratio_h 114 | 115 | duration = time.time() - start_time 116 | timer['overall'] = duration 117 | logger.info('[timing] {}'.format(duration)) 118 | 119 | text_lines = [] 120 | if boxes is not None: 121 | text_lines = [] 122 | for box, score in zip(boxes, scores): 123 | box = sort_poly(box.astype(np.int32)) 124 | if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5: 125 | continue 126 | tl = collections.OrderedDict(zip( 127 | ['x0', 'y0', 'x1', 'y1', 'x2', 'y2', 'x3', 'y3'], 128 | map(float, box.flatten()))) 129 | tl['score'] = float(score) 130 | text_lines.append(tl) 131 | ret = { 132 | 'text_lines': text_lines, 133 | 'rtparams': rtparams, 134 | 'timing': timer, 135 | } 136 | ret.update(get_host_info()) 137 | return ret 138 | 139 | 140 | return predictor 141 | 142 | 143 | ### the webserver 144 | from flask import Flask, request, render_template 145 | import argparse 146 | 147 | 148 | class Config: 149 | SAVE_DIR = 'static/results' 150 | 151 | 152 | config = Config() 153 | 154 | 155 | app = Flask(__name__) 156 | 157 | @app.route('/') 158 | def index(): 159 | return render_template('index.html', session_id='dummy_session_id') 160 | 161 | 162 | def draw_illu(illu, rst): 163 | for t in rst['text_lines']: 164 | d = np.array([t['x0'], t['y0'], t['x1'], t['y1'], t['x2'], 165 | t['y2'], t['x3'], t['y3']], dtype='int32') 166 | d = d.reshape(-1, 2) 167 | cv2.polylines(illu, [d], isClosed=True, color=(255, 255, 0)) 168 | return illu 169 | 170 | 171 | def save_result(img, rst): 172 | session_id = str(uuid.uuid1()) 173 | dirpath = os.path.join(config.SAVE_DIR, session_id) 174 | os.makedirs(dirpath) 175 | 176 | # save input image 177 | output_path = os.path.join(dirpath, 'input.png') 178 | cv2.imwrite(output_path, img) 179 | 180 | # save illustration 181 | output_path = os.path.join(dirpath, 'output.png') 182 | cv2.imwrite(output_path, draw_illu(img.copy(), rst)) 183 | 184 | # save json data 185 | output_path = os.path.join(dirpath, 'result.json') 186 | with open(output_path, 'w') as f: 187 | json.dump(rst, f) 188 | 189 | rst['session_id'] = session_id 190 | return rst 191 | 192 | 193 | 194 | checkpoint_path = './east_icdar2015_resnet_v1_50_rbox' 195 | 196 | 197 | @app.route('/', methods=['POST']) 198 | def index_post(): 199 | global predictor 200 | import io 201 | bio = io.BytesIO() 202 | request.files['image'].save(bio) 203 | img = cv2.imdecode(np.frombuffer(bio.getvalue(), dtype='uint8'), 1) 204 | rst = get_predictor(checkpoint_path)(img) 205 | 206 | save_result(img, rst) 207 | return render_template('index.html', session_id=rst['session_id']) 208 | 209 | 210 | def main(): 211 | global checkpoint_path 212 | parser = argparse.ArgumentParser() 213 | parser.add_argument('--port', default=8769, type=int) 214 | parser.add_argument('--checkpoint-path', default=checkpoint_path) 215 | parser.add_argument('--debug', action='store_true') 216 | args = parser.parse_args() 217 | checkpoint_path = args.checkpoint_path 218 | 219 | if not os.path.exists(args.checkpoint_path): 220 | raise RuntimeError( 221 | 'Checkpoint `{}` not found'.format(args.checkpoint_path)) 222 | 223 | app.debug = args.debug 224 | app.run('0.0.0.0', args.port) 225 | 226 | if __name__ == '__main__': 227 | main() 228 | 229 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/overfeat_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.overfeat.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from nets import overfeat 23 | 24 | slim = tf.contrib.slim 25 | 26 | 27 | class OverFeatTest(tf.test.TestCase): 28 | 29 | def testBuild(self): 30 | batch_size = 5 31 | height, width = 231, 231 32 | num_classes = 1000 33 | with self.test_session(): 34 | inputs = tf.random_uniform((batch_size, height, width, 3)) 35 | logits, _ = overfeat.overfeat(inputs, num_classes) 36 | self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed') 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | 40 | def testFullyConvolutional(self): 41 | batch_size = 1 42 | height, width = 281, 281 43 | num_classes = 1000 44 | with self.test_session(): 45 | inputs = tf.random_uniform((batch_size, height, width, 3)) 46 | logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False) 47 | self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd') 48 | self.assertListEqual(logits.get_shape().as_list(), 49 | [batch_size, 2, 2, num_classes]) 50 | 51 | def testGlobalPool(self): 52 | batch_size = 1 53 | height, width = 281, 281 54 | num_classes = 1000 55 | with self.test_session(): 56 | inputs = tf.random_uniform((batch_size, height, width, 3)) 57 | logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False, 58 | global_pool=True) 59 | self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd') 60 | self.assertListEqual(logits.get_shape().as_list(), 61 | [batch_size, 1, 1, num_classes]) 62 | 63 | def testEndPoints(self): 64 | batch_size = 5 65 | height, width = 231, 231 66 | num_classes = 1000 67 | with self.test_session(): 68 | inputs = tf.random_uniform((batch_size, height, width, 3)) 69 | _, end_points = overfeat.overfeat(inputs, num_classes) 70 | expected_names = ['overfeat/conv1', 71 | 'overfeat/pool1', 72 | 'overfeat/conv2', 73 | 'overfeat/pool2', 74 | 'overfeat/conv3', 75 | 'overfeat/conv4', 76 | 'overfeat/conv5', 77 | 'overfeat/pool5', 78 | 'overfeat/fc6', 79 | 'overfeat/fc7', 80 | 'overfeat/fc8' 81 | ] 82 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 83 | 84 | def testNoClasses(self): 85 | batch_size = 5 86 | height, width = 231, 231 87 | num_classes = None 88 | with self.test_session(): 89 | inputs = tf.random_uniform((batch_size, height, width, 3)) 90 | net, end_points = overfeat.overfeat(inputs, num_classes) 91 | expected_names = ['overfeat/conv1', 92 | 'overfeat/pool1', 93 | 'overfeat/conv2', 94 | 'overfeat/pool2', 95 | 'overfeat/conv3', 96 | 'overfeat/conv4', 97 | 'overfeat/conv5', 98 | 'overfeat/pool5', 99 | 'overfeat/fc6', 100 | 'overfeat/fc7' 101 | ] 102 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 103 | self.assertTrue(net.op.name.startswith('overfeat/fc7')) 104 | 105 | def testModelVariables(self): 106 | batch_size = 5 107 | height, width = 231, 231 108 | num_classes = 1000 109 | with self.test_session(): 110 | inputs = tf.random_uniform((batch_size, height, width, 3)) 111 | overfeat.overfeat(inputs, num_classes) 112 | expected_names = ['overfeat/conv1/weights', 113 | 'overfeat/conv1/biases', 114 | 'overfeat/conv2/weights', 115 | 'overfeat/conv2/biases', 116 | 'overfeat/conv3/weights', 117 | 'overfeat/conv3/biases', 118 | 'overfeat/conv4/weights', 119 | 'overfeat/conv4/biases', 120 | 'overfeat/conv5/weights', 121 | 'overfeat/conv5/biases', 122 | 'overfeat/fc6/weights', 123 | 'overfeat/fc6/biases', 124 | 'overfeat/fc7/weights', 125 | 'overfeat/fc7/biases', 126 | 'overfeat/fc8/weights', 127 | 'overfeat/fc8/biases', 128 | ] 129 | model_variables = [v.op.name for v in slim.get_model_variables()] 130 | self.assertSetEqual(set(model_variables), set(expected_names)) 131 | 132 | def testEvaluation(self): 133 | batch_size = 2 134 | height, width = 231, 231 135 | num_classes = 1000 136 | with self.test_session(): 137 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 138 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False) 139 | self.assertListEqual(logits.get_shape().as_list(), 140 | [batch_size, num_classes]) 141 | predictions = tf.argmax(logits, 1) 142 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 143 | 144 | def testTrainEvalWithReuse(self): 145 | train_batch_size = 2 146 | eval_batch_size = 1 147 | train_height, train_width = 231, 231 148 | eval_height, eval_width = 281, 281 149 | num_classes = 1000 150 | with self.test_session(): 151 | train_inputs = tf.random_uniform( 152 | (train_batch_size, train_height, train_width, 3)) 153 | logits, _ = overfeat.overfeat(train_inputs) 154 | self.assertListEqual(logits.get_shape().as_list(), 155 | [train_batch_size, num_classes]) 156 | tf.get_variable_scope().reuse_variables() 157 | eval_inputs = tf.random_uniform( 158 | (eval_batch_size, eval_height, eval_width, 3)) 159 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False, 160 | spatial_squeeze=False) 161 | self.assertListEqual(logits.get_shape().as_list(), 162 | [eval_batch_size, 2, 2, num_classes]) 163 | logits = tf.reduce_mean(logits, [1, 2]) 164 | predictions = tf.argmax(logits, 1) 165 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 166 | 167 | def testForward(self): 168 | batch_size = 1 169 | height, width = 231, 231 170 | with self.test_session() as sess: 171 | inputs = tf.random_uniform((batch_size, height, width, 3)) 172 | logits, _ = overfeat.overfeat(inputs) 173 | sess.run(tf.global_variables_initializer()) 174 | output = sess.run(logits) 175 | self.assertTrue(output.any()) 176 | 177 | if __name__ == '__main__': 178 | tf.test.main() 179 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/alexnet_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.alexnet.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from nets import alexnet 23 | 24 | slim = tf.contrib.slim 25 | 26 | 27 | class AlexnetV2Test(tf.test.TestCase): 28 | 29 | def testBuild(self): 30 | batch_size = 5 31 | height, width = 224, 224 32 | num_classes = 1000 33 | with self.test_session(): 34 | inputs = tf.random_uniform((batch_size, height, width, 3)) 35 | logits, _ = alexnet.alexnet_v2(inputs, num_classes) 36 | self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed') 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | 40 | def testFullyConvolutional(self): 41 | batch_size = 1 42 | height, width = 300, 400 43 | num_classes = 1000 44 | with self.test_session(): 45 | inputs = tf.random_uniform((batch_size, height, width, 3)) 46 | logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False) 47 | self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd') 48 | self.assertListEqual(logits.get_shape().as_list(), 49 | [batch_size, 4, 7, num_classes]) 50 | 51 | def testGlobalPool(self): 52 | batch_size = 1 53 | height, width = 256, 256 54 | num_classes = 1000 55 | with self.test_session(): 56 | inputs = tf.random_uniform((batch_size, height, width, 3)) 57 | logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False, 58 | global_pool=True) 59 | self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd') 60 | self.assertListEqual(logits.get_shape().as_list(), 61 | [batch_size, 1, 1, num_classes]) 62 | 63 | def testEndPoints(self): 64 | batch_size = 5 65 | height, width = 224, 224 66 | num_classes = 1000 67 | with self.test_session(): 68 | inputs = tf.random_uniform((batch_size, height, width, 3)) 69 | _, end_points = alexnet.alexnet_v2(inputs, num_classes) 70 | expected_names = ['alexnet_v2/conv1', 71 | 'alexnet_v2/pool1', 72 | 'alexnet_v2/conv2', 73 | 'alexnet_v2/pool2', 74 | 'alexnet_v2/conv3', 75 | 'alexnet_v2/conv4', 76 | 'alexnet_v2/conv5', 77 | 'alexnet_v2/pool5', 78 | 'alexnet_v2/fc6', 79 | 'alexnet_v2/fc7', 80 | 'alexnet_v2/fc8' 81 | ] 82 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 83 | 84 | def testNoClasses(self): 85 | batch_size = 5 86 | height, width = 224, 224 87 | num_classes = None 88 | with self.test_session(): 89 | inputs = tf.random_uniform((batch_size, height, width, 3)) 90 | net, end_points = alexnet.alexnet_v2(inputs, num_classes) 91 | expected_names = ['alexnet_v2/conv1', 92 | 'alexnet_v2/pool1', 93 | 'alexnet_v2/conv2', 94 | 'alexnet_v2/pool2', 95 | 'alexnet_v2/conv3', 96 | 'alexnet_v2/conv4', 97 | 'alexnet_v2/conv5', 98 | 'alexnet_v2/pool5', 99 | 'alexnet_v2/fc6', 100 | 'alexnet_v2/fc7' 101 | ] 102 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 103 | self.assertTrue(net.op.name.startswith('alexnet_v2/fc7')) 104 | self.assertListEqual(net.get_shape().as_list(), 105 | [batch_size, 1, 1, 4096]) 106 | 107 | def testModelVariables(self): 108 | batch_size = 5 109 | height, width = 224, 224 110 | num_classes = 1000 111 | with self.test_session(): 112 | inputs = tf.random_uniform((batch_size, height, width, 3)) 113 | alexnet.alexnet_v2(inputs, num_classes) 114 | expected_names = ['alexnet_v2/conv1/weights', 115 | 'alexnet_v2/conv1/biases', 116 | 'alexnet_v2/conv2/weights', 117 | 'alexnet_v2/conv2/biases', 118 | 'alexnet_v2/conv3/weights', 119 | 'alexnet_v2/conv3/biases', 120 | 'alexnet_v2/conv4/weights', 121 | 'alexnet_v2/conv4/biases', 122 | 'alexnet_v2/conv5/weights', 123 | 'alexnet_v2/conv5/biases', 124 | 'alexnet_v2/fc6/weights', 125 | 'alexnet_v2/fc6/biases', 126 | 'alexnet_v2/fc7/weights', 127 | 'alexnet_v2/fc7/biases', 128 | 'alexnet_v2/fc8/weights', 129 | 'alexnet_v2/fc8/biases', 130 | ] 131 | model_variables = [v.op.name for v in slim.get_model_variables()] 132 | self.assertSetEqual(set(model_variables), set(expected_names)) 133 | 134 | def testEvaluation(self): 135 | batch_size = 2 136 | height, width = 224, 224 137 | num_classes = 1000 138 | with self.test_session(): 139 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 140 | logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False) 141 | self.assertListEqual(logits.get_shape().as_list(), 142 | [batch_size, num_classes]) 143 | predictions = tf.argmax(logits, 1) 144 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 145 | 146 | def testTrainEvalWithReuse(self): 147 | train_batch_size = 2 148 | eval_batch_size = 1 149 | train_height, train_width = 224, 224 150 | eval_height, eval_width = 300, 400 151 | num_classes = 1000 152 | with self.test_session(): 153 | train_inputs = tf.random_uniform( 154 | (train_batch_size, train_height, train_width, 3)) 155 | logits, _ = alexnet.alexnet_v2(train_inputs) 156 | self.assertListEqual(logits.get_shape().as_list(), 157 | [train_batch_size, num_classes]) 158 | tf.get_variable_scope().reuse_variables() 159 | eval_inputs = tf.random_uniform( 160 | (eval_batch_size, eval_height, eval_width, 3)) 161 | logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False, 162 | spatial_squeeze=False) 163 | self.assertListEqual(logits.get_shape().as_list(), 164 | [eval_batch_size, 4, 7, num_classes]) 165 | logits = tf.reduce_mean(logits, [1, 2]) 166 | predictions = tf.argmax(logits, 1) 167 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 168 | 169 | def testForward(self): 170 | batch_size = 1 171 | height, width = 224, 224 172 | with self.test_session() as sess: 173 | inputs = tf.random_uniform((batch_size, height, width, 3)) 174 | logits, _ = alexnet.alexnet_v2(inputs) 175 | sess.run(tf.global_variables_initializer()) 176 | output = sess.run(logits) 177 | self.assertTrue(output.any()) 178 | 179 | if __name__ == '__main__': 180 | tf.test.main() 181 | -------------------------------------------------------------------------------- /lanms/include/pybind11/embed.h: -------------------------------------------------------------------------------- 1 | /* 2 | pybind11/embed.h: Support for embedding the interpreter 3 | 4 | Copyright (c) 2017 Wenzel Jakob 5 | 6 | All rights reserved. Use of this source code is governed by a 7 | BSD-style license that can be found in the LICENSE file. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "pybind11.h" 13 | #include "eval.h" 14 | 15 | #if defined(PYPY_VERSION) 16 | # error Embedding the interpreter is not supported with PyPy 17 | #endif 18 | 19 | #if PY_MAJOR_VERSION >= 3 20 | # define PYBIND11_EMBEDDED_MODULE_IMPL(name) \ 21 | extern "C" PyObject *pybind11_init_impl_##name() { \ 22 | return pybind11_init_wrapper_##name(); \ 23 | } 24 | #else 25 | # define PYBIND11_EMBEDDED_MODULE_IMPL(name) \ 26 | extern "C" void pybind11_init_impl_##name() { \ 27 | pybind11_init_wrapper_##name(); \ 28 | } 29 | #endif 30 | 31 | /** \rst 32 | Add a new module to the table of builtins for the interpreter. Must be 33 | defined in global scope. The first macro parameter is the name of the 34 | module (without quotes). The second parameter is the variable which will 35 | be used as the interface to add functions and classes to the module. 36 | 37 | .. code-block:: cpp 38 | 39 | PYBIND11_EMBEDDED_MODULE(example, m) { 40 | // ... initialize functions and classes here 41 | m.def("foo", []() { 42 | return "Hello, World!"; 43 | }); 44 | } 45 | \endrst */ 46 | #define PYBIND11_EMBEDDED_MODULE(name, variable) \ 47 | static void pybind11_init_##name(pybind11::module &); \ 48 | static PyObject *pybind11_init_wrapper_##name() { \ 49 | auto m = pybind11::module(#name); \ 50 | try { \ 51 | pybind11_init_##name(m); \ 52 | return m.ptr(); \ 53 | } catch (pybind11::error_already_set &e) { \ 54 | PyErr_SetString(PyExc_ImportError, e.what()); \ 55 | return nullptr; \ 56 | } catch (const std::exception &e) { \ 57 | PyErr_SetString(PyExc_ImportError, e.what()); \ 58 | return nullptr; \ 59 | } \ 60 | } \ 61 | PYBIND11_EMBEDDED_MODULE_IMPL(name) \ 62 | pybind11::detail::embedded_module name(#name, pybind11_init_impl_##name); \ 63 | void pybind11_init_##name(pybind11::module &variable) 64 | 65 | 66 | NAMESPACE_BEGIN(pybind11) 67 | NAMESPACE_BEGIN(detail) 68 | 69 | /// Python 2.7/3.x compatible version of `PyImport_AppendInittab` and error checks. 70 | struct embedded_module { 71 | #if PY_MAJOR_VERSION >= 3 72 | using init_t = PyObject *(*)(); 73 | #else 74 | using init_t = void (*)(); 75 | #endif 76 | embedded_module(const char *name, init_t init) { 77 | if (Py_IsInitialized()) 78 | pybind11_fail("Can't add new modules after the interpreter has been initialized"); 79 | 80 | auto result = PyImport_AppendInittab(name, init); 81 | if (result == -1) 82 | pybind11_fail("Insufficient memory to add a new module"); 83 | } 84 | }; 85 | 86 | NAMESPACE_END(detail) 87 | 88 | /** \rst 89 | Initialize the Python interpreter. No other pybind11 or CPython API functions can be 90 | called before this is done; with the exception of `PYBIND11_EMBEDDED_MODULE`. The 91 | optional parameter can be used to skip the registration of signal handlers (see the 92 | Python documentation for details). Calling this function again after the interpreter 93 | has already been initialized is a fatal error. 94 | \endrst */ 95 | inline void initialize_interpreter(bool init_signal_handlers = true) { 96 | if (Py_IsInitialized()) 97 | pybind11_fail("The interpreter is already running"); 98 | 99 | Py_InitializeEx(init_signal_handlers ? 1 : 0); 100 | 101 | // Make .py files in the working directory available by default 102 | auto sys_path = reinterpret_borrow(module::import("sys").attr("path")); 103 | sys_path.append("."); 104 | } 105 | 106 | /** \rst 107 | Shut down the Python interpreter. No pybind11 or CPython API functions can be called 108 | after this. In addition, pybind11 objects must not outlive the interpreter: 109 | 110 | .. code-block:: cpp 111 | 112 | { // BAD 113 | py::initialize_interpreter(); 114 | auto hello = py::str("Hello, World!"); 115 | py::finalize_interpreter(); 116 | } // <-- BOOM, hello's destructor is called after interpreter shutdown 117 | 118 | { // GOOD 119 | py::initialize_interpreter(); 120 | { // scoped 121 | auto hello = py::str("Hello, World!"); 122 | } // <-- OK, hello is cleaned up properly 123 | py::finalize_interpreter(); 124 | } 125 | 126 | { // BETTER 127 | py::scoped_interpreter guard{}; 128 | auto hello = py::str("Hello, World!"); 129 | } 130 | 131 | .. warning:: 132 | 133 | The interpreter can be restarted by calling `initialize_interpreter` again. 134 | Modules created using pybind11 can be safely re-initialized. However, Python 135 | itself cannot completely unload binary extension modules and there are several 136 | caveats with regard to interpreter restarting. All the details can be found 137 | in the CPython documentation. In short, not all interpreter memory may be 138 | freed, either due to reference cycles or user-created global data. 139 | 140 | \endrst */ 141 | inline void finalize_interpreter() { 142 | handle builtins(PyEval_GetBuiltins()); 143 | const char *id = PYBIND11_INTERNALS_ID; 144 | 145 | // Get the internals pointer (without creating it if it doesn't exist). It's possible for the 146 | // internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()` 147 | // during destruction), so we get the pointer-pointer here and check it after Py_Finalize(). 148 | detail::internals **internals_ptr_ptr = &detail::get_internals_ptr(); 149 | // It could also be stashed in builtins, so look there too: 150 | if (builtins.contains(id) && isinstance(builtins[id])) 151 | internals_ptr_ptr = capsule(builtins[id]); 152 | 153 | Py_Finalize(); 154 | 155 | if (internals_ptr_ptr) { 156 | delete *internals_ptr_ptr; 157 | *internals_ptr_ptr = nullptr; 158 | } 159 | } 160 | 161 | /** \rst 162 | Scope guard version of `initialize_interpreter` and `finalize_interpreter`. 163 | This a move-only guard and only a single instance can exist. 164 | 165 | .. code-block:: cpp 166 | 167 | #include 168 | 169 | int main() { 170 | py::scoped_interpreter guard{}; 171 | py::print(Hello, World!); 172 | } // <-- interpreter shutdown 173 | \endrst */ 174 | class scoped_interpreter { 175 | public: 176 | scoped_interpreter(bool init_signal_handlers = true) { 177 | initialize_interpreter(init_signal_handlers); 178 | } 179 | 180 | scoped_interpreter(const scoped_interpreter &) = delete; 181 | scoped_interpreter(scoped_interpreter &&other) noexcept { other.is_valid = false; } 182 | scoped_interpreter &operator=(const scoped_interpreter &) = delete; 183 | scoped_interpreter &operator=(scoped_interpreter &&) = delete; 184 | 185 | ~scoped_interpreter() { 186 | if (is_valid) 187 | finalize_interpreter(); 188 | } 189 | 190 | private: 191 | bool is_valid = true; 192 | }; 193 | 194 | NAMESPACE_END(pybind11) 195 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import time 3 | import math 4 | import os 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | import locality_aware_nms as nms_locality 9 | import lanms 10 | 11 | tf.app.flags.DEFINE_string('test_data_path', '/tmp/ch4_test_images/images/', '') 12 | tf.app.flags.DEFINE_string('gpu_list', '0', '') 13 | tf.app.flags.DEFINE_string('checkpoint_path', '/tmp/east_icdar2015_resnet_v1_50_rbox/', '') 14 | tf.app.flags.DEFINE_string('output_dir', '/tmp/ch4_test_images/images/', '') 15 | tf.app.flags.DEFINE_bool('no_write_images', False, 'do not write images') 16 | 17 | import model 18 | from icdar import restore_rectangle 19 | 20 | FLAGS = tf.app.flags.FLAGS 21 | 22 | def get_images(): 23 | ''' 24 | find image files in test data path 25 | :return: list of files found 26 | ''' 27 | files = [] 28 | exts = ['jpg', 'png', 'jpeg', 'JPG'] 29 | for parent, dirnames, filenames in os.walk(FLAGS.test_data_path): 30 | for filename in filenames: 31 | for ext in exts: 32 | if filename.endswith(ext): 33 | files.append(os.path.join(parent, filename)) 34 | break 35 | print('Find {} images'.format(len(files))) 36 | return files 37 | 38 | 39 | def resize_image(im, max_side_len=2400): 40 | ''' 41 | resize image to a size multiple of 32 which is required by the network 42 | :param im: the resized image 43 | :param max_side_len: limit of max image size to avoid out of memory in gpu 44 | :return: the resized image and the resize ratio 45 | ''' 46 | h, w, _ = im.shape 47 | 48 | resize_w = w 49 | resize_h = h 50 | 51 | # limit the max side 52 | if max(resize_h, resize_w) > max_side_len: 53 | ratio = float(max_side_len) / resize_h if resize_h > resize_w else float(max_side_len) / resize_w 54 | else: 55 | ratio = 1. 56 | resize_h = int(resize_h * ratio) 57 | resize_w = int(resize_w * ratio) 58 | 59 | resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32 60 | resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32 61 | im = cv2.resize(im, (int(resize_w), int(resize_h))) 62 | 63 | ratio_h = resize_h / float(h) 64 | ratio_w = resize_w / float(w) 65 | 66 | return im, (ratio_h, ratio_w) 67 | 68 | 69 | def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): 70 | ''' 71 | restore text boxes from score map and geo map 72 | :param score_map: 73 | :param geo_map: 74 | :param timer: 75 | :param score_map_thresh: threshhold for score map 76 | :param box_thresh: threshhold for boxes 77 | :param nms_thres: threshold for nms 78 | :return: 79 | ''' 80 | if len(score_map.shape) == 4: 81 | score_map = score_map[0, :, :, 0] 82 | geo_map = geo_map[0, :, :, ] 83 | # filter the score map 84 | xy_text = np.argwhere(score_map > score_map_thresh) 85 | # sort the text boxes via the y axis 86 | xy_text = xy_text[np.argsort(xy_text[:, 0])] 87 | # restore 88 | start = time.time() 89 | text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 90 | print('{} text boxes before nms'.format(text_box_restored.shape[0])) 91 | boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) 92 | boxes[:, :8] = text_box_restored.reshape((-1, 8)) 93 | boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] 94 | timer['restore'] = time.time() - start 95 | # nms part 96 | start = time.time() 97 | # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) 98 | boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) 99 | timer['nms'] = time.time() - start 100 | 101 | if boxes.shape[0] == 0: 102 | return None, timer 103 | 104 | # here we filter some low score boxes by the average score map, this is different from the orginal paper 105 | for i, box in enumerate(boxes): 106 | mask = np.zeros_like(score_map, dtype=np.uint8) 107 | cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) 108 | boxes[i, 8] = cv2.mean(score_map, mask)[0] 109 | boxes = boxes[boxes[:, 8] > box_thresh] 110 | 111 | return boxes, timer 112 | 113 | 114 | def sort_poly(p): 115 | min_axis = np.argmin(np.sum(p, axis=1)) 116 | p = p[[min_axis, (min_axis+1)%4, (min_axis+2)%4, (min_axis+3)%4]] 117 | if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]): 118 | return p 119 | else: 120 | return p[[0, 3, 2, 1]] 121 | 122 | 123 | def main(argv=None): 124 | import os 125 | os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list 126 | 127 | 128 | try: 129 | os.makedirs(FLAGS.output_dir) 130 | except OSError as e: 131 | if e.errno != 17: 132 | raise 133 | 134 | with tf.get_default_graph().as_default(): 135 | input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') 136 | global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) 137 | 138 | f_score, f_geometry = model.model(input_images, is_training=False) 139 | 140 | variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) 141 | saver = tf.train.Saver(variable_averages.variables_to_restore()) 142 | 143 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: 144 | ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) 145 | model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) 146 | print('Restore from {}'.format(model_path)) 147 | saver.restore(sess, model_path) 148 | 149 | im_fn_list = get_images() 150 | for im_fn in im_fn_list: 151 | im = cv2.imread(im_fn)[:, :, ::-1] 152 | start_time = time.time() 153 | im_resized, (ratio_h, ratio_w) = resize_image(im) 154 | 155 | timer = {'net': 0, 'restore': 0, 'nms': 0} 156 | start = time.time() 157 | score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: [im_resized]}) 158 | timer['net'] = time.time() - start 159 | 160 | boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer) 161 | print('{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format( 162 | im_fn, timer['net']*1000, timer['restore']*1000, timer['nms']*1000)) 163 | 164 | if boxes is not None: 165 | boxes = boxes[:, :8].reshape((-1, 4, 2)) 166 | boxes[:, :, 0] /= ratio_w 167 | boxes[:, :, 1] /= ratio_h 168 | 169 | duration = time.time() - start_time 170 | print('[timing] {}'.format(duration)) 171 | 172 | # save to file 173 | if boxes is not None: 174 | res_file = os.path.join( 175 | FLAGS.output_dir, 176 | '{}.txt'.format( 177 | os.path.basename(im_fn).split('.')[0])) 178 | 179 | with open(res_file, 'w') as f: 180 | for box in boxes: 181 | # to avoid submitting errors 182 | box = sort_poly(box.astype(np.int32)) 183 | print np.shape(box) 184 | if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5: 185 | continue 186 | f.write('{},{},{},{},{},{},{},{}\r\n'.format( 187 | box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1], 188 | )) 189 | cv2.polylines(im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=1) 190 | if not FLAGS.no_write_images: 191 | img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn)) 192 | cv2.imwrite(img_path, im[:, :, ::-1]) 193 | 194 | if __name__ == '__main__': 195 | tf.app.run() 196 | -------------------------------------------------------------------------------- /nets/NASNet/pnasnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the definition for the PNASNet classification networks. 16 | 17 | Paper: https://arxiv.org/abs/1712.00559 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | 24 | import copy 25 | import tensorflow as tf 26 | 27 | from nets.NASNet import nasnet 28 | from nets.NASNet import nasnet_utils 29 | 30 | arg_scope = tf.contrib.framework.arg_scope 31 | slim = tf.contrib.slim 32 | 33 | 34 | def large_imagenet_config(): 35 | """Large ImageNet configuration based on PNASNet-5.""" 36 | return tf.contrib.training.HParams( 37 | stem_multiplier=3.0, 38 | dense_dropout_keep_prob=0.5, 39 | num_cells=12, 40 | filter_scaling_rate=2.0, 41 | num_conv_filters=216, 42 | drop_path_keep_prob=0.6, 43 | use_aux_head=1, 44 | num_reduction_layers=2, 45 | data_format='NHWC', 46 | total_training_steps=250000, 47 | ) 48 | 49 | 50 | def pnasnet_large_arg_scope(weight_decay=4e-5, batch_norm_decay=0.9997, 51 | batch_norm_epsilon=0.001): 52 | """Default arg scope for the PNASNet Large ImageNet model.""" 53 | return nasnet.nasnet_large_arg_scope( 54 | weight_decay, batch_norm_decay, batch_norm_epsilon) 55 | 56 | 57 | def _build_pnasnet_base(images, 58 | normal_cell, 59 | num_classes, 60 | hparams, 61 | is_training, 62 | final_endpoint=None): 63 | """Constructs a PNASNet image model.""" 64 | 65 | end_points = {} 66 | 67 | def add_and_check_endpoint(endpoint_name, net): 68 | end_points[endpoint_name] = net 69 | return final_endpoint and (endpoint_name == final_endpoint) 70 | 71 | # Find where to place the reduction cells or stride normal cells 72 | reduction_indices = nasnet_utils.calc_reduction_layers( 73 | hparams.num_cells, hparams.num_reduction_layers) 74 | print('reduction_indices: ', reduction_indices) 75 | # pylint: disable=protected-access 76 | stem = lambda: nasnet._imagenet_stem(images, hparams, normal_cell) 77 | # pylint: enable=protected-access 78 | net, cell_outputs = stem() 79 | end_points['scale-1'] = cell_outputs[1] 80 | end_points['scale-2'] = cell_outputs[2] 81 | end_points['scale-3'] = cell_outputs[3] 82 | if add_and_check_endpoint('Stem', net): 83 | return net, end_points 84 | 85 | # Setup for building in the auxiliary head. 86 | aux_head_cell_idxes = [] 87 | if len(reduction_indices) >= 2: 88 | aux_head_cell_idxes.append(reduction_indices[1] - 1) 89 | 90 | # Run the cells 91 | filter_scaling = 1.0 92 | # true_cell_num accounts for the stem cells 93 | true_cell_num = 2 94 | print('hparams.num_cells is ', hparams.num_cells) 95 | for cell_num in range(hparams.num_cells): 96 | is_reduction = cell_num in reduction_indices 97 | stride = 2 if is_reduction else 1 98 | if is_reduction: filter_scaling *= hparams.filter_scaling_rate 99 | prev_layer = cell_outputs[-2] 100 | net = normal_cell( 101 | net, 102 | scope='cell_{}'.format(cell_num), 103 | filter_scaling=filter_scaling, 104 | stride=stride, 105 | prev_layer=prev_layer, 106 | cell_num=true_cell_num) 107 | if add_and_check_endpoint('Cell_{}'.format(cell_num), net): 108 | return net, end_points 109 | true_cell_num += 1 110 | cell_outputs.append(net) 111 | 112 | if (hparams.use_aux_head and cell_num in aux_head_cell_idxes and 113 | num_classes and is_training): 114 | aux_net = tf.nn.relu(net) 115 | # pylint: disable=protected-access 116 | nasnet._build_aux_head(aux_net, end_points, num_classes, hparams, 117 | scope='aux_{}'.format(cell_num)) 118 | # pylint: enable=protected-access 119 | 120 | # Final softmax layer 121 | with tf.variable_scope('final_layer'): 122 | net = tf.nn.relu(net) 123 | net = nasnet_utils.global_avg_pool(net) 124 | if add_and_check_endpoint('global_pool', net) or not num_classes: 125 | return net, end_points 126 | net = slim.dropout(net, hparams.dense_dropout_keep_prob, scope='dropout') 127 | logits = slim.fully_connected(net, num_classes) 128 | 129 | if add_and_check_endpoint('Logits', logits): 130 | return net, end_points 131 | 132 | predictions = tf.nn.softmax(logits, name='predictions') 133 | if add_and_check_endpoint('Predictions', predictions): 134 | return net, end_points 135 | return logits, end_points 136 | 137 | 138 | def build_pnasnet_large(images, 139 | num_classes, 140 | is_training=True, 141 | final_endpoint=None, 142 | config=None): 143 | """Build PNASNet Large model for the ImageNet Dataset.""" 144 | hparams = copy.deepcopy(config) if config else large_imagenet_config() 145 | # pylint: disable=protected-access 146 | nasnet._update_hparams(hparams, is_training) 147 | # pylint: enable=protected-access 148 | 149 | if tf.test.is_gpu_available() and hparams.data_format == 'NHWC': 150 | tf.logging.info('A GPU is available on the machine, consider using NCHW ' 151 | 'data format for increased speed on GPU.') 152 | 153 | if hparams.data_format == 'NCHW': 154 | images = tf.transpose(images, [0, 3, 1, 2]) 155 | 156 | # Calculate the total number of cells in the network. 157 | # There is no distinction between reduction and normal cells in PNAS so the 158 | # total number of cells is equal to the number normal cells plus the number 159 | # of stem cells (two by default). 160 | total_num_cells = hparams.num_cells + 2 161 | 162 | normal_cell = PNasNetNormalCell(hparams.num_conv_filters, 163 | hparams.drop_path_keep_prob, total_num_cells, 164 | hparams.total_training_steps) 165 | with arg_scope( 166 | [slim.dropout, nasnet_utils.drop_path, slim.batch_norm], 167 | is_training=is_training): 168 | with arg_scope([slim.avg_pool2d, slim.max_pool2d, slim.conv2d, 169 | slim.batch_norm, slim.separable_conv2d, 170 | nasnet_utils.factorized_reduction, 171 | nasnet_utils.global_avg_pool, 172 | nasnet_utils.get_channel_index, 173 | nasnet_utils.get_channel_dim], 174 | data_format=hparams.data_format): 175 | return _build_pnasnet_base( 176 | images, 177 | normal_cell=normal_cell, 178 | num_classes=num_classes, 179 | hparams=hparams, 180 | is_training=is_training, 181 | final_endpoint=final_endpoint) 182 | build_pnasnet_large.default_image_size = 331 183 | 184 | 185 | 186 | class PNasNetNormalCell(nasnet_utils.NasNetABaseCell): 187 | """PNASNet Normal Cell.""" 188 | 189 | def __init__(self, num_conv_filters, drop_path_keep_prob, total_num_cells, 190 | total_training_steps): 191 | # Configuration for the PNASNet-5 model. 192 | operations = [ 193 | 'separable_5x5_2', 'max_pool_3x3', 'separable_7x7_2', 'max_pool_3x3', 194 | 'separable_5x5_2', 'separable_3x3_2', 'separable_3x3_2', 'max_pool_3x3', 195 | 'separable_3x3_2', 'none' 196 | ] 197 | used_hiddenstates = [1, 1, 0, 0, 0, 0, 0] 198 | hiddenstate_indices = [1, 1, 0, 0, 0, 0, 4, 0, 1, 0] 199 | 200 | super(PNasNetNormalCell, self).__init__( 201 | num_conv_filters, operations, used_hiddenstates, hiddenstate_indices, 202 | drop_path_keep_prob, total_num_cells, total_training_steps) 203 | -------------------------------------------------------------------------------- /nets/Inception_ResNet_V2/nets/dcgan.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """DCGAN generator and discriminator from https://arxiv.org/abs/1511.06434.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from math import log 21 | 22 | from six.moves import xrange # pylint: disable=redefined-builtin 23 | import tensorflow as tf 24 | 25 | slim = tf.contrib.slim 26 | 27 | 28 | def _validate_image_inputs(inputs): 29 | inputs.get_shape().assert_has_rank(4) 30 | inputs.get_shape()[1:3].assert_is_fully_defined() 31 | if inputs.get_shape()[1] != inputs.get_shape()[2]: 32 | raise ValueError('Input tensor does not have equal width and height: ', 33 | inputs.get_shape()[1:3]) 34 | width = inputs.get_shape().as_list()[1] 35 | if log(width, 2) != int(log(width, 2)): 36 | raise ValueError('Input tensor `width` is not a power of 2: ', width) 37 | 38 | 39 | # TODO(joelshor): Use fused batch norm by default. Investigate why some GAN 40 | # setups need the gradient of gradient FusedBatchNormGrad. 41 | def discriminator(inputs, 42 | depth=64, 43 | is_training=True, 44 | reuse=None, 45 | scope='Discriminator', 46 | fused_batch_norm=False): 47 | """Discriminator network for DCGAN. 48 | 49 | Construct discriminator network from inputs to the final endpoint. 50 | 51 | Args: 52 | inputs: A tensor of size [batch_size, height, width, channels]. Must be 53 | floating point. 54 | depth: Number of channels in first convolution layer. 55 | is_training: Whether the network is for training or not. 56 | reuse: Whether or not the network variables should be reused. `scope` 57 | must be given to be reused. 58 | scope: Optional variable_scope. 59 | fused_batch_norm: If `True`, use a faster, fused implementation of 60 | batch norm. 61 | 62 | Returns: 63 | logits: The pre-softmax activations, a tensor of size [batch_size, 1] 64 | end_points: a dictionary from components of the network to their activation. 65 | 66 | Raises: 67 | ValueError: If the input image shape is not 4-dimensional, if the spatial 68 | dimensions aren't defined at graph construction time, if the spatial 69 | dimensions aren't square, or if the spatial dimensions aren't a power of 70 | two. 71 | """ 72 | 73 | normalizer_fn = slim.batch_norm 74 | normalizer_fn_args = { 75 | 'is_training': is_training, 76 | 'zero_debias_moving_mean': True, 77 | 'fused': fused_batch_norm, 78 | } 79 | 80 | _validate_image_inputs(inputs) 81 | inp_shape = inputs.get_shape().as_list()[1] 82 | 83 | end_points = {} 84 | with tf.variable_scope(scope, values=[inputs], reuse=reuse) as scope: 85 | with slim.arg_scope([normalizer_fn], **normalizer_fn_args): 86 | with slim.arg_scope([slim.conv2d], 87 | stride=2, 88 | kernel_size=4, 89 | activation_fn=tf.nn.leaky_relu): 90 | net = inputs 91 | for i in xrange(int(log(inp_shape, 2))): 92 | scope = 'conv%i' % (i + 1) 93 | current_depth = depth * 2**i 94 | normalizer_fn_ = None if i == 0 else normalizer_fn 95 | net = slim.conv2d( 96 | net, current_depth, normalizer_fn=normalizer_fn_, scope=scope) 97 | end_points[scope] = net 98 | 99 | logits = slim.conv2d(net, 1, kernel_size=1, stride=1, padding='VALID', 100 | normalizer_fn=None, activation_fn=None) 101 | logits = tf.reshape(logits, [-1, 1]) 102 | end_points['logits'] = logits 103 | 104 | return logits, end_points 105 | 106 | 107 | # TODO(joelshor): Use fused batch norm by default. Investigate why some GAN 108 | # setups need the gradient of gradient FusedBatchNormGrad. 109 | def generator(inputs, 110 | depth=64, 111 | final_size=32, 112 | num_outputs=3, 113 | is_training=True, 114 | reuse=None, 115 | scope='Generator', 116 | fused_batch_norm=False): 117 | """Generator network for DCGAN. 118 | 119 | Construct generator network from inputs to the final endpoint. 120 | 121 | Args: 122 | inputs: A tensor with any size N. [batch_size, N] 123 | depth: Number of channels in last deconvolution layer. 124 | final_size: The shape of the final output. 125 | num_outputs: Number of output features. For images, this is the number of 126 | channels. 127 | is_training: whether is training or not. 128 | reuse: Whether or not the network has its variables should be reused. scope 129 | must be given to be reused. 130 | scope: Optional variable_scope. 131 | fused_batch_norm: If `True`, use a faster, fused implementation of 132 | batch norm. 133 | 134 | Returns: 135 | logits: the pre-softmax activations, a tensor of size 136 | [batch_size, 32, 32, channels] 137 | end_points: a dictionary from components of the network to their activation. 138 | 139 | Raises: 140 | ValueError: If `inputs` is not 2-dimensional. 141 | ValueError: If `final_size` isn't a power of 2 or is less than 8. 142 | """ 143 | normalizer_fn = slim.batch_norm 144 | normalizer_fn_args = { 145 | 'is_training': is_training, 146 | 'zero_debias_moving_mean': True, 147 | 'fused': fused_batch_norm, 148 | } 149 | 150 | inputs.get_shape().assert_has_rank(2) 151 | if log(final_size, 2) != int(log(final_size, 2)): 152 | raise ValueError('`final_size` (%i) must be a power of 2.' % final_size) 153 | if final_size < 8: 154 | raise ValueError('`final_size` (%i) must be greater than 8.' % final_size) 155 | 156 | end_points = {} 157 | num_layers = int(log(final_size, 2)) - 1 158 | with tf.variable_scope(scope, values=[inputs], reuse=reuse) as scope: 159 | with slim.arg_scope([normalizer_fn], **normalizer_fn_args): 160 | with slim.arg_scope([slim.conv2d_transpose], 161 | normalizer_fn=normalizer_fn, 162 | stride=2, 163 | kernel_size=4): 164 | net = tf.expand_dims(tf.expand_dims(inputs, 1), 1) 165 | 166 | # First upscaling is different because it takes the input vector. 167 | current_depth = depth * 2 ** (num_layers - 1) 168 | scope = 'deconv1' 169 | net = slim.conv2d_transpose( 170 | net, current_depth, stride=1, padding='VALID', scope=scope) 171 | end_points[scope] = net 172 | 173 | for i in xrange(2, num_layers): 174 | scope = 'deconv%i' % (i) 175 | current_depth = depth * 2 ** (num_layers - i) 176 | net = slim.conv2d_transpose(net, current_depth, scope=scope) 177 | end_points[scope] = net 178 | 179 | # Last layer has different normalizer and activation. 180 | scope = 'deconv%i' % (num_layers) 181 | net = slim.conv2d_transpose( 182 | net, depth, normalizer_fn=None, activation_fn=None, scope=scope) 183 | end_points[scope] = net 184 | 185 | # Convert to proper channels. 186 | scope = 'logits' 187 | logits = slim.conv2d( 188 | net, 189 | num_outputs, 190 | normalizer_fn=None, 191 | activation_fn=None, 192 | kernel_size=1, 193 | stride=1, 194 | padding='VALID', 195 | scope=scope) 196 | end_points[scope] = logits 197 | 198 | logits.get_shape().assert_has_rank(4) 199 | logits.get_shape().assert_is_compatible_with( 200 | [None, final_size, final_size, num_outputs]) 201 | 202 | return logits, end_points 203 | -------------------------------------------------------------------------------- /cal_IoU_gt_py.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import numpy as np 3 | import logging 4 | from icdar import restore_rectangle 5 | from multiprocessing import Pool 6 | import logging 7 | from shapely.geometry import Polygon 8 | import multiprocessing 9 | 10 | def cal_IoU_gt_py_multiprocess(pred_geo, pred_cls, gt, threshold=0.8): 11 | def compute_IoU(polygon1, polygon2): 12 | ''' 13 | 计算两个rect的IoU值 14 | :param polygon1: 4, 2 15 | :param polygon2: 4, 2 16 | :return: 0~1 value 17 | ''' 18 | polygon1 = Polygon(polygon1) 19 | if not polygon1.is_valid: 20 | polygon1 = polygon1.buffer(0) 21 | polygon2 = Polygon(polygon2) 22 | if not polygon2.is_valid: 23 | polygon2 = polygon2.buffer(0) 24 | intersection_polygon = polygon1.intersection(polygon2) 25 | if not intersection_polygon.is_valid: 26 | return 0.0 27 | intersection_area = intersection_polygon.area 28 | uion_area = polygon1.area + polygon2.area - intersection_area 29 | return (1.0 * intersection_area) / (1.0 * uion_area) 30 | 31 | ''' 32 | 根据预测得到的pred_geo 和 pred_cls 我们针对每个pixel都可以计算他和ground truth的IoU值 33 | :param pred_geo: N, W, H, 5 34 | :param pred_cls: N, W, H, 1 35 | :param gt: N, M, 4, 2 36 | :param threshold: 0.8 37 | :return: 38 | ''' 39 | # 删除纬度数是1的纬度 40 | 41 | print 'hello0' 42 | pred_cls = np.squeeze(pred_cls) 43 | shape = np.shape(pred_geo) 44 | IoU_gt = np.zeros([shape[0], shape[1], shape[2], 1], np.float32) 45 | 46 | for batch_id in range(shape[0]): 47 | process_num = 8 48 | pool = Pool(processes=process_num) 49 | print 'hello1' 50 | score_map = pred_cls[batch_id] 51 | geo_map = pred_geo[batch_id] 52 | cur_gt = gt[batch_id] 53 | 54 | print 'hello2' 55 | # print 'the shape of score_map is ', np.shape(score_map) 56 | # print 'the shape of geo_map is ', np.shape(geo_map) 57 | if len(np.shape(score_map)) != 2: 58 | logging.log(logging.ERROR, 'score map shape isn\'t correct!') 59 | assert False 60 | xy_text = np.argwhere(score_map > threshold) 61 | # sort the text boxes via the y axis 62 | xy_text = xy_text[np.argsort(xy_text[:, 0])] 63 | # print 'The number of points that satisfy the condition is ', len(xy_text) 64 | text_box_restored = restore_rectangle(xy_text[:, ::-1], geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 65 | # print np.shape(text_box_restored) 66 | 67 | pre_process_num = len(xy_text) / process_num + 1 68 | xss = {} 69 | yss = {} 70 | boxss = {} 71 | 72 | print 'hello3' 73 | for idx, ((x, y), box) in enumerate(zip(xy_text, text_box_restored)): 74 | process_id = idx / pre_process_num 75 | if process_id not in xss.keys(): 76 | xss[process_id] = [] 77 | yss[process_id] = [] 78 | boxss[process_id] = [] 79 | xss[process_id].append(x) 80 | yss[process_id].append(y) 81 | boxss[process_id].append(box) 82 | else: 83 | xss[process_id].append(x) 84 | yss[process_id].append(y) 85 | boxss[process_id].append(box) 86 | 87 | print 'hello4' 88 | 89 | def process_single_test(): 90 | return 1.0 91 | def process_single(boxs, cur_gt): 92 | print 'hello4-0' 93 | IoU_values = [] 94 | print 'hello4-1' 95 | return np.random.random(len(boxs)) 96 | for box in boxs: 97 | cur_IoU_value = 0.0 98 | print 'hello4-2' 99 | for gt_id in range(len(cur_gt)): 100 | if np.sum(cur_gt[gt_id]) == -8: 101 | break 102 | cur_IoU_value = max(cur_IoU_value, compute_IoU(np.asarray(box), np.asarray(cur_gt[gt_id]))) 103 | IoU_values.append(cur_IoU_value) 104 | print 'hello4-3' 105 | print 'hello4-3' 106 | return IoU_values 107 | results = [] 108 | 109 | print 'hello5' 110 | for process_id in range(process_num): 111 | print 'hello6' 112 | # results.append(pool.apply_async(func=process_single, args=(boxss[process_id], cur_gt, ))) 113 | results.append(pool.apply_async(func=process_single_test, args=())) 114 | print 'hello7' 115 | pool.close() 116 | pool.join() 117 | 118 | print 'hello8' 119 | for process_id, res in enumerate(results): 120 | xs = xss[process_id] 121 | ys = yss[process_id] 122 | 123 | print 'hello9' 124 | xs = np.asarray(xs) 125 | ys = np.asarray(ys) 126 | print np.shape(xs) 127 | print np.shape(ys) 128 | IoU_values = res.get() 129 | xs = np.asarray(xs) 130 | ys = np.asarray(ys) 131 | print np.shape(IoU_values) 132 | print np.shape(xs) 133 | print np.shape(ys) 134 | IoU_gt[batch_id, xs, ys, 0] = IoU_values 135 | 136 | print 'hello10' 137 | 138 | print 'hello11' 139 | return IoU_gt 140 | 141 | def cal_IoU_gt_py(pred_geo, pred_cls, gt, threshold=0.8): 142 | def compute_IoU(polygon1, polygon2): 143 | ''' 144 | 计算两个rect的IoU值 145 | :param polygon1: 4, 2 146 | :param polygon2: 4, 2 147 | :return: 0~1 value 148 | ''' 149 | polygon1 = Polygon(polygon1) 150 | if not polygon1.is_valid: 151 | polygon1 = polygon1.buffer(0) 152 | polygon2 = Polygon(polygon2) 153 | if not polygon2.is_valid: 154 | polygon2 = polygon2.buffer(0) 155 | intersection_polygon = polygon1.intersection(polygon2) 156 | if not intersection_polygon.is_valid: 157 | return 0.0 158 | intersection_area = intersection_polygon.area 159 | uion_area = polygon1.area + polygon2.area - intersection_area 160 | return (1.0 * intersection_area) / (1.0 * uion_area) 161 | 162 | ''' 163 | 根据预测得到的pred_geo 和 pred_cls 我们针对每个pixel都可以计算他和ground truth的IoU值 164 | :param pred_geo: N, W, H, 5 165 | :param pred_cls: N, W, H, 1 166 | :param gt: N, M, 4, 2 167 | :param threshold: 0.8 168 | :return: 169 | ''' 170 | # 删除纬度数是1的纬度 171 | 172 | pred_cls = np.squeeze(pred_cls) 173 | shape = np.shape(pred_geo) 174 | IoU_gt = np.zeros([shape[0], shape[1], shape[2], 1], np.float32) 175 | 176 | for batch_id in range(shape[0]): 177 | score_map = pred_cls[batch_id] 178 | geo_map = pred_geo[batch_id] 179 | cur_gt = gt[batch_id] 180 | 181 | if len(np.shape(score_map)) != 2: 182 | logging.log(logging.ERROR, 'score map shape isn\'t correct!') 183 | assert False 184 | xy_text = np.argwhere(score_map > threshold) 185 | # sort the text boxes via the y axis 186 | xy_text = xy_text[np.argsort(xy_text[:, 0])] 187 | # print 'The number of points that satisfy the condition is ', len(xy_text) 188 | text_box_restored = restore_rectangle(xy_text[:, ::-1], geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 189 | # print np.shape(text_box_restored) 190 | 191 | for idx, ((x, y), box) in enumerate(zip(xy_text, text_box_restored)): 192 | cur_IoU_value = 0.0 193 | for gt_id in range(len(cur_gt)): 194 | if np.sum(cur_gt[gt_id]) == -8: 195 | break 196 | cur_IoU_value = max(cur_IoU_value, compute_IoU(np.asarray(box), np.asarray(cur_gt[gt_id]))) 197 | IoU_gt[batch_id, x, y, 0] = cur_IoU_value 198 | return IoU_gt 199 | 200 | 201 | if __name__ == '__main__': 202 | # pred_geo = np.random.random([2, 512, 512, 5]) 203 | # pred_cls = np.random.random([2, 512, 512, 1]) 204 | # cal_IoU_gt_py(pred_geo, pred_cls, None) 205 | def process_single_test(): 206 | print 'test' 207 | return 1.0 208 | process_num = 8 209 | pool = Pool(processes=process_num) 210 | results = [] 211 | for i in range(process_num): 212 | results.append(pool.apply_async(func=process_single_test, args=())) 213 | pool.close() 214 | pool.join() 215 | for i in range(process_num): 216 | print results[i].get() 217 | 218 | --------------------------------------------------------------------------------