├── __init__.py
├── nets
    ├── __init__.py
    ├── NASNet
    │   ├── __init__.py
    │   ├── nasnet_utils_test.py
    │   ├── pnasnet_test.py
    │   └── pnasnet.py
    └── Inception_ResNet_V2
    │   ├── __init__.py
    │   └── nets
    │       ├── __init__.py
    │       ├── mobilenet_v1.png
    │       ├── inception.py
    │       ├── nets_factory_test.py
    │       ├── inception_utils.py
    │       ├── lenet.py
    │       ├── cyclegan_test.py
    │       ├── dcgan_test.py
    │       ├── cifarnet.py
    │       ├── mobilenet_v1_eval.py
    │       ├── overfeat.py
    │       ├── alexnet.py
    │       ├── pix2pix_test.py
    │       ├── nets_factory.py
    │       ├── overfeat_test.py
    │       ├── alexnet_test.py
    │       └── dcgan.py
├── utils
    ├── __init__.py
    └── tools.py
├── augmentation
    ├── __init__.py
    ├── rotated_10.png
    ├── rotated_10.txt
    ├── data_agumentation.py
    └── test.py
├── lanms
    ├── .gitignore
    ├── include
    │   ├── clipper
    │   │   └── clipper.cpp
    │   └── pybind11
    │   │   ├── typeid.h
    │   │   ├── complex.h
    │   │   ├── options.h
    │   │   ├── functional.h
    │   │   ├── eval.h
    │   │   ├── buffer_info.h
    │   │   ├── chrono.h
    │   │   └── embed.h
    ├── __main__.py
    ├── Makefile
    ├── __init__.py
    ├── adaptor.cpp
    ├── .ycm_extra_conf.py
    └── lanms.h
├── notice_sample
├── .idea
    ├── markdown-navigator
    │   └── profiles_settings.xml
    ├── vcs.xml
    ├── misc.xml
    ├── preferred-vcs.xml
    ├── inspectionProfiles
    │   └── profiles_settings.xml
    ├── modules.xml
    ├── deployment.xml
    ├── webServers.xml
    ├── EAST-master.iml
    └── markdown-navigator.xml
├── requirements.txt
├── static
    └── css
    │   └── app.css
├── deploy.sh
├── train_flowchart.md
├── test_flowchart.md
├── locality_aware_nms.py
├── .gitignore
├── tools.py
├── data_util.py
├── readme.md
├── run_demo_server.py
├── eval.py
└── cal_IoU_gt_py.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nets/NASNet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/augmentation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lanms/.gitignore:
--------------------------------------------------------------------------------
1 | adaptor.so
2 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/notice_sample:
--------------------------------------------------------------------------------
1 | 10
2 | 17
3 | 32
4 | 43


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/augmentation/rotated_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UpCoder/ICPR_TextDection/HEAD/augmentation/rotated_10.png


--------------------------------------------------------------------------------
/lanms/include/clipper/clipper.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UpCoder/ICPR_TextDection/HEAD/lanms/include/clipper/clipper.cpp


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/mobilenet_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UpCoder/ICPR_TextDection/HEAD/nets/Inception_ResNet_V2/nets/mobilenet_v1.png


--------------------------------------------------------------------------------
/.idea/markdown-navigator/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="MarkdownNavigator.ProfileManager">
2 |   <settings default="" pdf-export="" />
3 | </component>


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Shapely==1.5.13
2 | Flask==0.10.1
3 | matplotlib==1.5.1
4 | scipy==0.19.0
5 | plumbum==1.6.2
6 | numpy==1.12.1
7 | ipython==6.1.0
8 | Pillow==4.2.1
9 | 


--------------------------------------------------------------------------------
/static/css/app.css:
--------------------------------------------------------------------------------
 1 | #app {
 2 | 	padding: 20px;
 3 | }
 4 | 
 5 | #result .item {
 6 | 	padding-bottom: 20px;
 7 | }
 8 | 
 9 | .form-content-container {
10 | 	padding-left: 20px;
11 | }
12 | 


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | mkdir -p server_log
3 | gunicorn -w 3 run_demo_server:app -b 0.0.0.0:8769 -t 120 \
4 | 	--error-logfile server_log/error.log \
5 | 	--access-logfile server_log/access.log
6 | 


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.12 (/usr/bin/python2.7)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/preferred-vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="PreferredVcsStorage">
4 |     <preferredVcsName>Git</preferredVcsName>
5 |     <shelvedMapping directory="" vcs="" />
6 |   </component>
7 | </project>


--------------------------------------------------------------------------------
/train_flowchart.md:
--------------------------------------------------------------------------------
1 | graph TD;
2 |    A[输入图像]-->B[基于ResNet的U-Net结构];
3 |    A-->G[计算ground truth]
4 |    B-->C[使用卷积得到预测的pixel-based的像素信息]
5 |    B-->D[使用卷积得到预测的集合信息]
6 |    G-->E
7 |    C-->E[和ground truth计算balanced cross-entropy loss]
8 |    G-->F
9 |    D-->F[和ground truth计算geometry loss]


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="useProjectProfile" value="false" />
4 |     <option name="USE_PROJECT_PROFILE" value="false" />
5 |     <version value="1.0" />
6 |   </settings>
7 | </component>


--------------------------------------------------------------------------------
/lanms/__main__.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | from . import merge_quadrangle_n9
 5 | 
 6 | if __name__ == '__main__':
 7 |     # unit square with confidence 1
 8 |     q = np.array([0, 0, 0, 1, 1, 1, 1, 0, 1], dtype='float32')
 9 | 
10 |     print(merge_quadrangle_n9(np.array([q, q + 0.1, q + 2])))
11 | 


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/EAST-master.iml" filepath="$PROJECT_DIR$/.idea/EAST-master.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/test_flowchart.md:
--------------------------------------------------------------------------------
 1 | graph TD;
 2 |    A[输入图像]-->B[基于ResNet的U-Net结构];
 3 |    B-->C[使用卷积得到预测的pixel-based的像素信息]
 4 |    B-->D[使用卷积得到预测的几何信息]
 5 |    C-->E["挑选出大于阈值(比如说0.8)的pixel"]
 6 |    D-->F   
 7 |    E-->F["选择满足阈值条件的pixel的score和几何信息"]
 8 |    F-->G[针对每个pixel计算得到一个bounding box]
 9 |    G-->H[针对上述得到的bounding box进行非最大抑制]
10 |    H-->I[得到text的bounding box]


--------------------------------------------------------------------------------
/lanms/Makefile:
--------------------------------------------------------------------------------
 1 | CXXFLAGS = -I include  -std=c++11 -O3 $(shell python2-config --cflags)
 2 | LDFLAGS = $(shell python3-config --ldflags)
 3 | 
 4 | DEPS = lanms.h $(shell find include -xtype f)
 5 | CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp
 6 | 
 7 | LIB_SO = adaptor.so
 8 | 
 9 | $(LIB_SO): $(CXX_SOURCES) $(DEPS)
10 | 	$(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC
11 | 
12 | clean:
13 | 	rm -rf $(LIB_SO)
14 | 


--------------------------------------------------------------------------------
/.idea/deployment.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="PublishConfigData" serverName="SFTP">
 4 |     <serverData>
 5 |       <paths name="SFTP">
 6 |         <serverdata>
 7 |           <mappings>
 8 |             <mapping deploy="/" local="$PROJECT_DIR$" web="/" />
 9 |           </mappings>
10 |         </serverdata>
11 |       </paths>
12 |     </serverData>
13 |   </component>
14 | </project>


--------------------------------------------------------------------------------
/.idea/webServers.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="WebServers">
 4 |     <option name="servers">
 5 |       <webServer id="8fcc9def-eb37-4d7e-8661-96163f232177" name="SFTP" url="http://10.214.161.111">
 6 |         <fileTransfer host="10.214.161.111" port="22" rootFolder="/home/give/Game/OCR/Papers-code/EAST-Modify" accessType="SFTP">
 7 |           <advancedOptions>
 8 |             <advancedOptions dataProtectionLevel="Private" />
 9 |           </advancedOptions>
10 |           <option name="port" value="22" />
11 |         </fileTransfer>
12 |       </webServer>
13 |     </option>
14 |   </component>
15 | </project>


--------------------------------------------------------------------------------
/lanms/__init__.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | import numpy as np
 4 | 
 5 | BASE_DIR = os.path.dirname(os.path.realpath(__file__))
 6 | 
 7 | if subprocess.call(['make', '-C', BASE_DIR]) != 0:  # return value
 8 |     raise RuntimeError('Cannot compile lanms: {}'.format(BASE_DIR))
 9 | 
10 | 
11 | def merge_quadrangle_n9(polys, thres=0.3, precision=10000):
12 |     from .adaptor import merge_quadrangle_n9 as nms_impl
13 |     if len(polys) == 0:
14 |         return np.array([], dtype='float32')
15 |     p = polys.copy()
16 |     p[:,:8] *= precision
17 |     ret = np.array(nms_impl(p, thres), dtype='float32')
18 |     ret[:,:8] /= precision
19 |     return ret
20 | 
21 | 


--------------------------------------------------------------------------------
/.idea/EAST-master.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TemplatesService">
 9 |     <option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
10 |     <option name="TEMPLATE_FOLDERS">
11 |       <list>
12 |         <option value="$MODULE_DIR$/templates" />
13 |       </list>
14 |     </option>
15 |   </component>
16 |   <component name="TestRunnerService">
17 |     <option name="projectConfiguration" value="Nosetests" />
18 |     <option name="PROJECT_TEST_RUNNER" value="Nosetests" />
19 |   </component>
20 | </module>


--------------------------------------------------------------------------------
/augmentation/rotated_10.txt:
--------------------------------------------------------------------------------
 1 | 101,3,101,9,194,9,194,3,###
 2 | 109,63,104,47,221,5,221,26,找书：en55@
 3 | 0,34,1,52,127,8,121,0,n55@qq.com
 4 | 0,84,0,101,72,77,70,60,604682
 5 | 171,76,177,94,219,78,214,62,找书：
 6 | 42,94,42,116,212,116,212,94,###
 7 | 157,132,162,152,221,128,221,110,QQ：29
 8 | 5,153,1,137,132,90,139,108,：2956604682
 9 | 42,121,42,136,114,136,114,118,###
10 | 99,143,99,157,152,157,152,143,TheLCAO
11 | 101,160,101,174,180,174,180,160,FirstPrincipes
12 | 100,177,100,190,158,190,158,177,Treatment
13 | 99,192,99,209,157,209,157,192,###
14 | 0,190,0,208,125,167,120,152,h55@qq.com
15 | 0,240,3,256,193,191,188,178,找书：en55@qq.com
16 | 7,310,0,289,131,245,140,261,：2956604682
17 | 115,316,115,329,157,329,157,316,Springer
18 | 171,231,179,248,220,234,215,218,找书：
19 | 21,337,48,349,204,291,197,273,QQ：2956604682
20 | 150,346,148,349,190,349,185,336,###
21 | 101,72,101,83,169,83,169,72,R.A.Evarestov
22 | 11,0,11,0,0,0,0,0,###
23 | 


--------------------------------------------------------------------------------
/lanms/include/pybind11/typeid.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     pybind11/typeid.h: Compiler-independent access to type identifiers
 3 | 
 4 |     Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
 5 | 
 6 |     All rights reserved. Use of this source code is governed by a
 7 |     BSD-style license that can be found in the LICENSE file.
 8 | */
 9 | 
10 | #pragma once
11 | 
12 | #include <cstdio>
13 | #include <cstdlib>
14 | 
15 | #if defined(__GNUG__)
16 | #include <cxxabi.h>
17 | #endif
18 | 
19 | NAMESPACE_BEGIN(pybind11)
20 | NAMESPACE_BEGIN(detail)
21 | /// Erase all occurrences of a substring
22 | inline void erase_all(std::string &string, const std::string &search) {
23 |     for (size_t pos = 0;;) {
24 |         pos = string.find(search, pos);
25 |         if (pos == std::string::npos) break;
26 |         string.erase(pos, search.length());
27 |     }
28 | }
29 | 
30 | PYBIND11_NOINLINE inline void clean_type_id(std::string &name) {
31 | #if defined(__GNUG__)
32 |     int status = 0;
33 |     std::unique_ptr<char, void (*)(void *)> res {
34 |         abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status), std::free };
35 |     if (status == 0)
36 |         name = res.get();
37 | #else
38 |     detail::erase_all(name, "class ");
39 |     detail::erase_all(name, "struct ");
40 |     detail::erase_all(name, "enum ");
41 | #endif
42 |     detail::erase_all(name, "pybind11::");
43 | }
44 | NAMESPACE_END(detail)
45 | 
46 | /// Return a string representation of a C++ type
47 | template <typename T> static std::string type_id() {
48 |     std::string name(typeid(T).name());
49 |     detail::clean_type_id(name);
50 |     return name;
51 | }
52 | 
53 | NAMESPACE_END(pybind11)
54 | 


--------------------------------------------------------------------------------
/locality_aware_nms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from shapely.geometry import Polygon
 3 | 
 4 | 
 5 | def intersection(g, p):
 6 |     g = Polygon(g[:8].reshape((4, 2)))
 7 |     p = Polygon(p[:8].reshape((4, 2)))
 8 |     if not g.is_valid or not p.is_valid:
 9 |         return 0
10 |     inter = Polygon(g).intersection(Polygon(p)).area
11 |     union = g.area + p.area - inter
12 |     if union == 0:
13 |         return 0
14 |     else:
15 |         return inter/union
16 | 
17 | 
18 | def weighted_merge(g, p):
19 |     g[:8] = (g[8] * g[:8] + p[8] * p[:8])/(g[8] + p[8])
20 |     g[8] = (g[8] + p[8])
21 |     return g
22 | 
23 | 
24 | def standard_nms(S, thres):
25 |     order = np.argsort(S[:, 8])[::-1]
26 |     keep = []
27 |     while order.size > 0:
28 |         i = order[0]
29 |         keep.append(i)
30 |         ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
31 | 
32 |         inds = np.where(ovr <= thres)[0]
33 |         order = order[inds+1]
34 | 
35 |     return S[keep]
36 | 
37 | 
38 | def nms_locality(polys, thres=0.3):
39 |     '''
40 |     locality aware nms of EAST
41 |     :param polys: a N*9 numpy array. first 8 coordinates, then prob
42 |     :return: boxes after nms
43 |     '''
44 |     S = []
45 |     p = None
46 |     for g in polys:
47 |         if p is not None and intersection(g, p) > thres:
48 |             p = weighted_merge(g, p)
49 |         else:
50 |             if p is not None:
51 |                 S.append(p)
52 |             p = g
53 |     if p is not None:
54 |         S.append(p)
55 | 
56 |     if len(S) == 0:
57 |         return np.array([])
58 |     return standard_nms(np.array(S), thres)
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     # 343,350,448,135,474,143,369,359
63 |     print(Polygon(np.array([[343, 350], [448, 135],
64 |                             [474, 143], [369, 359]])).area)
65 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/inception.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Brings all inception models under one namespace."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | # pylint: disable=unused-import
22 | from nets.inception_resnet_v2 import inception_resnet_v2
23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope
24 | from nets.inception_resnet_v2 import inception_resnet_v2_base
25 | from nets.inception_v1 import inception_v1
26 | from nets.inception_v1 import inception_v1_arg_scope
27 | from nets.inception_v1 import inception_v1_base
28 | from nets.inception_v2 import inception_v2
29 | from nets.inception_v2 import inception_v2_arg_scope
30 | from nets.inception_v2 import inception_v2_base
31 | from nets.inception_v3 import inception_v3
32 | from nets.inception_v3 import inception_v3_arg_scope
33 | from nets.inception_v3 import inception_v3_base
34 | from nets.inception_v4 import inception_v4
35 | from nets.inception_v4 import inception_v4_arg_scope
36 | from nets.inception_v4 import inception_v4_base
37 | # pylint: enable=unused-import
38 | 


--------------------------------------------------------------------------------
/lanms/adaptor.cpp:
--------------------------------------------------------------------------------
 1 | #include "pybind11/pybind11.h"
 2 | #include "pybind11/numpy.h"
 3 | #include "pybind11/stl.h"
 4 | #include "pybind11/stl_bind.h"
 5 | 
 6 | #include "lanms.h"
 7 | 
 8 | namespace py = pybind11;
 9 | 
10 | 
11 | namespace lanms_adaptor {
12 | 
13 | 	std::vector<std::vector<float>> polys2floats(const std::vector<lanms::Polygon> &polys) {
14 | 		std::vector<std::vector<float>> ret;
15 | 		for (size_t i = 0; i < polys.size(); i ++) {
16 | 			auto &p = polys[i];
17 | 			auto &poly = p.poly;
18 | 			ret.emplace_back(std::vector<float>{
19 | 					float(poly[0].X), float(poly[0].Y),
20 | 					float(poly[1].X), float(poly[1].Y),
21 | 					float(poly[2].X), float(poly[2].Y),
22 | 					float(poly[3].X), float(poly[3].Y),
23 | 					float(p.score),
24 | 					});
25 | 		}
26 | 
27 | 		return ret;
28 | 	}
29 | 
30 | 
31 | 	/**
32 | 	 *
33 | 	 * \param quad_n9 an n-by-9 numpy array, where first 8 numbers denote the
34 | 	 *		quadrangle, and the last one is the score
35 | 	 * \param iou_threshold two quadrangles with iou score above this threshold
36 | 	 *		will be merged
37 | 	 *
38 | 	 * \return an n-by-9 numpy array, the merged quadrangles
39 | 	 */
40 | 	std::vector<std::vector<float>> merge_quadrangle_n9(
41 | 			py::array_t<float, py::array::c_style | py::array::forcecast> quad_n9,
42 | 			float iou_threshold) {
43 | 		auto pbuf = quad_n9.request();
44 | 		if (pbuf.ndim != 2 || pbuf.shape[1] != 9)
45 | 			throw std::runtime_error("quadrangles must have a shape of (n, 9)");
46 | 		auto n = pbuf.shape[0];
47 | 		auto ptr = static_cast<float *>(pbuf.ptr);
48 | 		return polys2floats(lanms::merge_quadrangle_n9(ptr, n, iou_threshold));
49 | 	}
50 | 
51 | }
52 | 
53 | PYBIND11_PLUGIN(adaptor) {
54 | 	py::module m("adaptor", "NMS");
55 | 
56 | 	m.def("merge_quadrangle_n9", &lanms_adaptor::merge_quadrangle_n9,
57 | 			"merge quadrangels");
58 | 
59 | 	return m.ptr();
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | # vim swapfile
104 | *.swp
105 | 
106 | # result files for demo
107 | static/results
108 | 
109 | 
110 | # tmp
111 | tmp/*
112 | 
113 | demo_images/*
114 | demo_result/*
115 | templates/*
116 | training_samples/*
117 | 
118 | east_icdar2015_resnet_v1_50_rbox (1).zip
119 | resnet_v1_50_2016_08_28.tar.gz
120 | 
121 | log/*
122 | 
123 | 


--------------------------------------------------------------------------------
/lanms/include/pybind11/complex.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     pybind11/complex.h: Complex number support
 3 | 
 4 |     Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
 5 | 
 6 |     All rights reserved. Use of this source code is governed by a
 7 |     BSD-style license that can be found in the LICENSE file.
 8 | */
 9 | 
10 | #pragma once
11 | 
12 | #include "pybind11.h"
13 | #include <complex>
14 | 
15 | /// glibc defines I as a macro which breaks things, e.g., boost template names
16 | #ifdef I
17 | #  undef I
18 | #endif
19 | 
20 | NAMESPACE_BEGIN(pybind11)
21 | 
22 | template <typename T> struct format_descriptor<std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>> {
23 |     static constexpr const char c = format_descriptor<T>::c;
24 |     static constexpr const char value[3] = { 'Z', c, '\0' };
25 |     static std::string format() { return std::string(value); }
26 | };
27 | 
28 | template <typename T> constexpr const char format_descriptor<
29 |     std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>>::value[3];
30 | 
31 | NAMESPACE_BEGIN(detail)
32 | 
33 | template <typename T> struct is_fmt_numeric<std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>> {
34 |     static constexpr bool value = true;
35 |     static constexpr int index = is_fmt_numeric<T>::index + 3;
36 | };
37 | 
38 | template <typename T> class type_caster<std::complex<T>> {
39 | public:
40 |     bool load(handle src, bool convert) {
41 |         if (!src)
42 |             return false;
43 |         if (!convert && !PyComplex_Check(src.ptr()))
44 |             return false;
45 |         Py_complex result = PyComplex_AsCComplex(src.ptr());
46 |         if (result.real == -1.0 && PyErr_Occurred()) {
47 |             PyErr_Clear();
48 |             return false;
49 |         }
50 |         value = std::complex<T>((T) result.real, (T) result.imag);
51 |         return true;
52 |     }
53 | 
54 |     static handle cast(const std::complex<T> &src, return_value_policy /* policy */, handle /* parent */) {
55 |         return PyComplex_FromDoubles((double) src.real(), (double) src.imag());
56 |     }
57 | 
58 |     PYBIND11_TYPE_CASTER(std::complex<T>, _("complex"));
59 | };
60 | NAMESPACE_END(detail)
61 | NAMESPACE_END(pybind11)
62 | 


--------------------------------------------------------------------------------
/lanms/include/pybind11/options.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     pybind11/options.h: global settings that are configurable at runtime.
 3 | 
 4 |     Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
 5 | 
 6 |     All rights reserved. Use of this source code is governed by a
 7 |     BSD-style license that can be found in the LICENSE file.
 8 | */
 9 | 
10 | #pragma once
11 | 
12 | #include "common.h"
13 | 
14 | NAMESPACE_BEGIN(pybind11)
15 | 
16 | class options {
17 | public:
18 | 
19 |     // Default RAII constructor, which leaves settings as they currently are.
20 |     options() : previous_state(global_state()) {}
21 | 
22 |     // Class is non-copyable.
23 |     options(const options&) = delete;
24 |     options& operator=(const options&) = delete;
25 | 
26 |     // Destructor, which restores settings that were in effect before.
27 |     ~options() {
28 |         global_state() = previous_state;
29 |     }
30 | 
31 |     // Setter methods (affect the global state):
32 | 
33 |     options& disable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = false; return *this; }
34 | 
35 |     options& enable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = true; return *this; }
36 | 
37 |     options& disable_function_signatures() & { global_state().show_function_signatures = false; return *this; }
38 | 
39 |     options& enable_function_signatures() & { global_state().show_function_signatures = true; return *this; }
40 | 
41 |     // Getter methods (return the global state):
42 | 
43 |     static bool show_user_defined_docstrings() { return global_state().show_user_defined_docstrings; }
44 | 
45 |     static bool show_function_signatures() { return global_state().show_function_signatures; }
46 | 
47 |     // This type is not meant to be allocated on the heap.
48 |     void* operator new(size_t) = delete;
49 | 
50 | private:
51 | 
52 |     struct state {
53 |         bool show_user_defined_docstrings = true;  //< Include user-supplied texts in docstrings.
54 |         bool show_function_signatures = true;      //< Include auto-generated function signatures in docstrings.
55 |     };
56 | 
57 |     static state &global_state() {
58 |         static state instance;
59 |         return instance;
60 |     }
61 | 
62 |     state previous_state;
63 | };
64 | 
65 | NAMESPACE_END(pybind11)
66 | 


--------------------------------------------------------------------------------
/nets/NASNet/nasnet_utils_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for slim.nets.nasnet.nasnet_utils."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | 
24 | from nets.NASNet import nasnet_utils
25 | 
26 | 
27 | class NasnetUtilsTest(tf.test.TestCase):
28 | 
29 |   def testCalcReductionLayers(self):
30 |     num_cells = 18
31 |     num_reduction_layers = 2
32 |     reduction_layers = nasnet_utils.calc_reduction_layers(
33 |         num_cells, num_reduction_layers)
34 |     self.assertEqual(len(reduction_layers), 2)
35 |     self.assertEqual(reduction_layers[0], 6)
36 |     self.assertEqual(reduction_layers[1], 12)
37 | 
38 |   def testGetChannelIndex(self):
39 |     data_formats = ['NHWC', 'NCHW']
40 |     for data_format in data_formats:
41 |       index = nasnet_utils.get_channel_index(data_format)
42 |       correct_index = 3 if data_format == 'NHWC' else 1
43 |       self.assertEqual(index, correct_index)
44 | 
45 |   def testGetChannelDim(self):
46 |     data_formats = ['NHWC', 'NCHW']
47 |     shape = [10, 20, 30, 40]
48 |     for data_format in data_formats:
49 |       dim = nasnet_utils.get_channel_dim(shape, data_format)
50 |       correct_dim = shape[3] if data_format == 'NHWC' else shape[1]
51 |       self.assertEqual(dim, correct_dim)
52 | 
53 |   def testGlobalAvgPool(self):
54 |     data_formats = ['NHWC', 'NCHW']
55 |     inputs = tf.placeholder(tf.float32, (5, 10, 20, 10))
56 |     for data_format in data_formats:
57 |       output = nasnet_utils.global_avg_pool(
58 |           inputs, data_format)
59 |       self.assertEqual(output.shape, [5, 10])
60 | 
61 | 
62 | if __name__ == '__main__':
63 |   tf.test.main()
64 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/nets_factory_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Tests for slim.inception."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | 
22 | 
23 | import tensorflow as tf
24 | 
25 | from nets import nets_factory
26 | 
27 | 
28 | class NetworksTest(tf.test.TestCase):
29 | 
30 |   def testGetNetworkFnFirstHalf(self):
31 |     batch_size = 5
32 |     num_classes = 1000
33 |     for net in list(nets_factory.networks_map.keys())[:10]:
34 |       with tf.Graph().as_default() as g, self.test_session(g):
35 |         net_fn = nets_factory.get_network_fn(net, num_classes)
36 |         # Most networks use 224 as their default_image_size
37 |         image_size = getattr(net_fn, 'default_image_size', 224)
38 |         inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
39 |         logits, end_points = net_fn(inputs)
40 |         self.assertTrue(isinstance(logits, tf.Tensor))
41 |         self.assertTrue(isinstance(end_points, dict))
42 |         self.assertEqual(logits.get_shape().as_list()[0], batch_size)
43 |         self.assertEqual(logits.get_shape().as_list()[-1], num_classes)
44 | 
45 |   def testGetNetworkFnSecondHalf(self):
46 |     batch_size = 5
47 |     num_classes = 1000
48 |     for net in list(nets_factory.networks_map.keys())[10:]:
49 |       with tf.Graph().as_default() as g, self.test_session(g):
50 |         net_fn = nets_factory.get_network_fn(net, num_classes)
51 |         # Most networks use 224 as their default_image_size
52 |         image_size = getattr(net_fn, 'default_image_size', 224)
53 |         inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
54 |         logits, end_points = net_fn(inputs)
55 |         self.assertTrue(isinstance(logits, tf.Tensor))
56 |         self.assertTrue(isinstance(end_points, dict))
57 |         self.assertEqual(logits.get_shape().as_list()[0], batch_size)
58 |         self.assertEqual(logits.get_shape().as_list()[-1], num_classes)
59 | 
60 | if __name__ == '__main__':
61 |   tf.test.main()
62 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/inception_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains common code shared by all inception models.
16 | 
17 | Usage of arg scope:
18 |   with slim.arg_scope(inception_arg_scope()):
19 |     logits, end_points = inception.inception_v3(images, num_classes,
20 |                                                 is_training=is_training)
21 | 
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 | 
27 | import tensorflow as tf
28 | 
29 | slim = tf.contrib.slim
30 | 
31 | 
32 | def inception_arg_scope(weight_decay=0.00004,
33 |                         use_batch_norm=True,
34 |                         batch_norm_decay=0.9997,
35 |                         batch_norm_epsilon=0.001,
36 |                         activation_fn=tf.nn.relu):
37 |   """Defines the default arg scope for inception models.
38 | 
39 |   Args:
40 |     weight_decay: The weight decay to use for regularizing the model.
41 |     use_batch_norm: "If `True`, batch_norm is applied after each convolution.
42 |     batch_norm_decay: Decay for batch norm moving average.
43 |     batch_norm_epsilon: Small float added to variance to avoid dividing by zero
44 |       in batch norm.
45 |     activation_fn: Activation function for conv2d.
46 | 
47 |   Returns:
48 |     An `arg_scope` to use for the inception models.
49 |   """
50 |   batch_norm_params = {
51 |       # Decay for the moving averages.
52 |       'decay': batch_norm_decay,
53 |       # epsilon to prevent 0s in variance.
54 |       'epsilon': batch_norm_epsilon,
55 |       # collection containing update_ops.
56 |       'updates_collections': tf.GraphKeys.UPDATE_OPS,
57 |       # use fused batch norm if possible.
58 |       'fused': None,
59 |   }
60 |   if use_batch_norm:
61 |     normalizer_fn = slim.batch_norm
62 |     normalizer_params = batch_norm_params
63 |   else:
64 |     normalizer_fn = None
65 |     normalizer_params = {}
66 |   # Set weight_decay for weights in Conv and FC layers.
67 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
68 |                       weights_regularizer=slim.l2_regularizer(weight_decay)):
69 |     with slim.arg_scope(
70 |         [slim.conv2d],
71 |         weights_initializer=slim.variance_scaling_initializer(),
72 |         activation_fn=activation_fn,
73 |         normalizer_fn=normalizer_fn,
74 |         normalizer_params=normalizer_params) as sc:
75 |       return sc
76 | 


--------------------------------------------------------------------------------
/lanms/include/pybind11/functional.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     pybind11/functional.h: std::function<> support
 3 | 
 4 |     Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
 5 | 
 6 |     All rights reserved. Use of this source code is governed by a
 7 |     BSD-style license that can be found in the LICENSE file.
 8 | */
 9 | 
10 | #pragma once
11 | 
12 | #include "pybind11.h"
13 | #include <functional>
14 | 
15 | NAMESPACE_BEGIN(pybind11)
16 | NAMESPACE_BEGIN(detail)
17 | 
18 | template <typename Return, typename... Args>
19 | struct type_caster<std::function<Return(Args...)>> {
20 |     using type = std::function<Return(Args...)>;
21 |     using retval_type = conditional_t<std::is_same<Return, void>::value, void_type, Return>;
22 |     using function_type = Return (*) (Args...);
23 | 
24 | public:
25 |     bool load(handle src, bool convert) {
26 |         if (src.is_none()) {
27 |             // Defer accepting None to other overloads (if we aren't in convert mode):
28 |             if (!convert) return false;
29 |             return true;
30 |         }
31 | 
32 |         if (!isinstance<function>(src))
33 |             return false;
34 | 
35 |         auto func = reinterpret_borrow<function>(src);
36 | 
37 |         /*
38 |            When passing a C++ function as an argument to another C++
39 |            function via Python, every function call would normally involve
40 |            a full C++ -> Python -> C++ roundtrip, which can be prohibitive.
41 |            Here, we try to at least detect the case where the function is
42 |            stateless (i.e. function pointer or lambda function without
43 |            captured variables), in which case the roundtrip can be avoided.
44 |          */
45 |         if (auto cfunc = func.cpp_function()) {
46 |             auto c = reinterpret_borrow<capsule>(PyCFunction_GET_SELF(cfunc.ptr()));
47 |             auto rec = (function_record *) c;
48 | 
49 |             if (rec && rec->is_stateless &&
50 |                     same_type(typeid(function_type), *reinterpret_cast<const std::type_info *>(rec->data[1]))) {
51 |                 struct capture { function_type f; };
52 |                 value = ((capture *) &rec->data)->f;
53 |                 return true;
54 |             }
55 |         }
56 | 
57 |         value = [func](Args... args) -> Return {
58 |             gil_scoped_acquire acq;
59 |             object retval(func(std::forward<Args>(args)...));
60 |             /* Visual studio 2015 parser issue: need parentheses around this expression */
61 |             return (retval.template cast<Return>());
62 |         };
63 |         return true;
64 |     }
65 | 
66 |     template <typename Func>
67 |     static handle cast(Func &&f_, return_value_policy policy, handle /* parent */) {
68 |         if (!f_)
69 |             return none().inc_ref();
70 | 
71 |         auto result = f_.template target<function_type>();
72 |         if (result)
73 |             return cpp_function(*result, policy).release();
74 |         else
75 |             return cpp_function(std::forward<Func>(f_), policy).release();
76 |     }
77 | 
78 |     PYBIND11_TYPE_CASTER(type, _("Callable[[") +
79 |             argument_loader<Args...>::arg_names() + _("], ") +
80 |             make_caster<retval_type>::name() +
81 |             _("]"));
82 | };
83 | 
84 | NAMESPACE_END(detail)
85 | NAMESPACE_END(pybind11)
86 | 


--------------------------------------------------------------------------------
/tools.py:
--------------------------------------------------------------------------------
 1 | # -*- coding=utf-8 -*-
 2 | from PIL import Image
 3 | import numpy as np
 4 | import cv2
 5 | import sys
 6 | from utils.tools import draw_rect, order_points
 7 | from icdar import expand_poly
 8 | sys.setrecursionlimit(300000000)
 9 | 
10 | 
11 | def show_image(image_arr):
12 |     img = Image.fromarray(image_arr)
13 |     img.show()
14 | 
15 | def find_connected(score_map, threshold=0.7):
16 |     binary_map = (score_map > threshold).astype(np.uint8)
17 |     connectivity = 8
18 |     output = cv2.connectedComponentsWithStats(binary_map, connectivity=connectivity, ltype=cv2.CV_32S)
19 |     label_map = output[1]
20 |     # show_image(np.asarray(label_map * 100.0, np.uint8))
21 |     return np.max(label_map), label_map
22 | 
23 | 
24 | def calculate_boundingbox_score(score_map, threshold=0.7):
25 |     # score_map = score_map[::-1, :]
26 |     score_map[score_map < threshold] = 0.0
27 |     h, w = np.shape(score_map)
28 |     # show_image(np.asarray(score_map * 255, np.uint8))
29 |     flag = np.zeros([h, w])
30 |     boundingboxs = []
31 |     rects = []
32 |     count_connecter, label_map = find_connected(score_map, threshold)
33 |     label_map = np.array(label_map)
34 |     bbox_image = np.zeros(np.shape(label_map), np.uint8)
35 |     expand_image = np.zeros(np.shape(label_map), np.uint8)
36 |     for idx in range(1, count_connecter+1):
37 |         connected = np.array(np.where(label_map == idx)).transpose((1, 0))
38 |         rect = cv2.minAreaRect(np.array(connected))
39 |         rects.append(rect)
40 |         bbox = order_points(cv2.boxPoints(rect)[:, ::-1])
41 |         r = [None, None, None, None]
42 |         for i in range(4):
43 |             r[i] = min(np.linalg.norm(bbox[i] - bbox[(i + 1) % 4]),
44 |                        np.linalg.norm(bbox[i] - bbox[(i - 1) % 4]))
45 |         expand_bbox = expand_poly(bbox.copy(), r).astype(np.int32)
46 |         boundingboxs.append(expand_bbox)
47 |         cur_points = []
48 |         expand_points = []
49 |         for i in range(4):
50 |             for j in range(2):
51 |                 cur_points.append(bbox[i, j])
52 |                 expand_points.append(expand_bbox[i, j])
53 |         expand_image = draw_rect(expand_image, expand_points)
54 |         bbox_image = draw_rect(bbox_image, cur_points)
55 | 
56 |     for i in range(len(rects)):
57 |         for j in range(len(rects)):
58 |             if i == j:
59 |                 continue
60 |             rect1 = rects[i]
61 |             rect2 = rects[j]
62 |             theta1 = rect1[2]
63 |             theta2 = rect2[2]
64 |             if abs(theta1 - theta2) < 5:
65 |                 center1 = rect1[0]
66 |                 center2 = rect2[1]
67 |                 center_distance = (center1[0] - center2[0])**2 + (center1[1] - center2[1])**2
68 |                 # dis_sub_width = center_distance - rect1[1][]
69 |                 # print 'ok'
70 |     points = []
71 |     for bbox in boundingboxs:
72 |         cur_points = []
73 |         for i in range(4):
74 |             for j in range(2):
75 |                 cur_points.append(bbox[i, j])
76 |         points.append(cur_points)
77 |     return np.array(points)
78 | 
79 | if __name__ == '__main__':
80 |     test = np.zeros([100, 100])
81 |     test[10:21, 10:81] = 1.0
82 | 
83 |     test[30:40, 10:81] = 1.0
84 | 
85 |     test[42:50, 10:81] = 1.0
86 | 
87 |     test[62:70, 10:81] = 1.0
88 | 
89 |     test[82:90, 10:81] = 1.0
90 |     find_connected(test)
91 |     show_image(np.asarray(test * 255, np.uint8))
92 |     # xys = np.argwhere(test != 0)
93 |     # rect = cv2.minAreaRect(xys)
94 |     # print rect


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/lenet.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a variant of the LeNet model definition."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | slim = tf.contrib.slim
24 | 
25 | 
26 | def lenet(images, num_classes=10, is_training=False,
27 |           dropout_keep_prob=0.5,
28 |           prediction_fn=slim.softmax,
29 |           scope='LeNet'):
30 |   """Creates a variant of the LeNet model.
31 | 
32 |   Note that since the output is a set of 'logits', the values fall in the
33 |   interval of (-infinity, infinity). Consequently, to convert the outputs to a
34 |   probability distribution over the characters, one will need to convert them
35 |   using the softmax function:
36 | 
37 |         logits = lenet.lenet(images, is_training=False)
38 |         probabilities = tf.nn.softmax(logits)
39 |         predictions = tf.argmax(logits, 1)
40 | 
41 |   Args:
42 |     images: A batch of `Tensors` of size [batch_size, height, width, channels].
43 |     num_classes: the number of classes in the dataset. If 0 or None, the logits
44 |       layer is omitted and the input features to the logits layer are returned
45 |       instead.
46 |     is_training: specifies whether or not we're currently training the model.
47 |       This variable will determine the behaviour of the dropout layer.
48 |     dropout_keep_prob: the percentage of activation values that are retained.
49 |     prediction_fn: a function to get predictions out of logits.
50 |     scope: Optional variable_scope.
51 | 
52 |   Returns:
53 |      net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
54 |       is a non-zero integer, or the inon-dropped-out nput to the logits layer
55 |       if num_classes is 0 or None.
56 |     end_points: a dictionary from components of the network to the corresponding
57 |       activation.
58 |   """
59 |   end_points = {}
60 | 
61 |   with tf.variable_scope(scope, 'LeNet', [images]):
62 |     net = end_points['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1')
63 |     net = end_points['pool1'] = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
64 |     net = end_points['conv2'] = slim.conv2d(net, 64, [5, 5], scope='conv2')
65 |     net = end_points['pool2'] = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
66 |     net = slim.flatten(net)
67 |     end_points['Flatten'] = net
68 | 
69 |     net = end_points['fc3'] = slim.fully_connected(net, 1024, scope='fc3')
70 |     if not num_classes:
71 |       return net, end_points
72 |     net = end_points['dropout3'] = slim.dropout(
73 |         net, dropout_keep_prob, is_training=is_training, scope='dropout3')
74 |     logits = end_points['Logits'] = slim.fully_connected(
75 |         net, num_classes, activation_fn=None, scope='fc4')
76 | 
77 |   end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
78 | 
79 |   return logits, end_points
80 | lenet.default_image_size = 28
81 | 
82 | 
83 | def lenet_arg_scope(weight_decay=0.0):
84 |   """Defines the default lenet argument scope.
85 | 
86 |   Args:
87 |     weight_decay: The weight decay to use for regularizing the model.
88 | 
89 |   Returns:
90 |     An `arg_scope` to use for the inception v3 model.
91 |   """
92 |   with slim.arg_scope(
93 |       [slim.conv2d, slim.fully_connected],
94 |       weights_regularizer=slim.l2_regularizer(weight_decay),
95 |       weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
96 |       activation_fn=tf.nn.relu) as sc:
97 |     return sc
98 | 


--------------------------------------------------------------------------------
/.idea/markdown-navigator.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="MarkdownProjectSettings">
 4 |     <PreviewSettings splitEditorLayout="SPLIT" splitEditorPreview="PREVIEW" useGrayscaleRendering="false" zoomFactor="1.0" maxImageWidth="0" showGitHubPageIfSynced="false" allowBrowsingInPreview="false" synchronizePreviewPosition="true" highlightPreviewType="NONE" highlightFadeOut="5" highlightOnTyping="true" synchronizeSourcePosition="true" verticallyAlignSourceAndPreviewSyncPosition="true" showSearchHighlightsInPreview="false" showSelectionInPreview="true">
 5 |       <PanelProvider>
 6 |         <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.panel" providerName="Default - Swing" />
 7 |       </PanelProvider>
 8 |     </PreviewSettings>
 9 |     <ParserSettings gitHubSyntaxChange="false">
10 |       <PegdownExtensions>
11 |         <option name="ABBREVIATIONS" value="false" />
12 |         <option name="ANCHORLINKS" value="true" />
13 |         <option name="ASIDE" value="false" />
14 |         <option name="ATXHEADERSPACE" value="true" />
15 |         <option name="AUTOLINKS" value="true" />
16 |         <option name="DEFINITIONS" value="false" />
17 |         <option name="DEFINITION_BREAK_DOUBLE_BLANK_LINE" value="false" />
18 |         <option name="FENCED_CODE_BLOCKS" value="true" />
19 |         <option name="FOOTNOTES" value="false" />
20 |         <option name="HARDWRAPS" value="false" />
21 |         <option name="HTML_DEEP_PARSER" value="false" />
22 |         <option name="INSERTED" value="false" />
23 |         <option name="QUOTES" value="false" />
24 |         <option name="RELAXEDHRULES" value="true" />
25 |         <option name="SMARTS" value="false" />
26 |         <option name="STRIKETHROUGH" value="true" />
27 |         <option name="SUBSCRIPT" value="false" />
28 |         <option name="SUPERSCRIPT" value="false" />
29 |         <option name="SUPPRESS_HTML_BLOCKS" value="false" />
30 |         <option name="SUPPRESS_INLINE_HTML" value="false" />
31 |         <option name="TABLES" value="true" />
32 |         <option name="TASKLISTITEMS" value="true" />
33 |         <option name="TOC" value="false" />
34 |         <option name="WIKILINKS" value="true" />
35 |       </PegdownExtensions>
36 |       <ParserOptions>
37 |         <option name="COMMONMARK_LISTS" value="true" />
38 |         <option name="DUMMY" value="false" />
39 |         <option name="EMOJI_SHORTCUTS" value="true" />
40 |         <option name="FLEXMARK_FRONT_MATTER" value="false" />
41 |         <option name="GFM_LOOSE_BLANK_LINE_AFTER_ITEM_PARA" value="false" />
42 |         <option name="GFM_TABLE_RENDERING" value="true" />
43 |         <option name="GITBOOK_URL_ENCODING" value="false" />
44 |         <option name="GITHUB_EMOJI_URL" value="false" />
45 |         <option name="GITHUB_LISTS" value="false" />
46 |         <option name="GITHUB_WIKI_LINKS" value="true" />
47 |         <option name="JEKYLL_FRONT_MATTER" value="false" />
48 |         <option name="SIM_TOC_BLANK_LINE_SPACER" value="true" />
49 |       </ParserOptions>
50 |     </ParserSettings>
51 |     <HtmlSettings headerTopEnabled="false" headerBottomEnabled="false" bodyTopEnabled="false" bodyBottomEnabled="false" embedUrlContent="false" addPageHeader="true">
52 |       <GeneratorProvider>
53 |         <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.generator" providerName="Default Swing HTML Generator" />
54 |       </GeneratorProvider>
55 |       <headerTop />
56 |       <headerBottom />
57 |       <bodyTop />
58 |       <bodyBottom />
59 |     </HtmlSettings>
60 |     <CssSettings previewScheme="UI_SCHEME" cssUri="" isCssUriEnabled="false" isCssTextEnabled="false" isDynamicPageWidth="true">
61 |       <StylesheetProvider>
62 |         <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.css" providerName="Default Swing Stylesheet" />
63 |       </StylesheetProvider>
64 |       <ScriptProviders />
65 |       <cssText />
66 |     </CssSettings>
67 |     <HtmlExportSettings updateOnSave="false" parentDir="$ProjectFileDir$" targetDir="$ProjectFileDir$" cssDir="" scriptDir="" plainHtml="false" imageDir="" copyLinkedImages="false" imageUniquifyType="0" targetExt="" useTargetExt="false" noCssNoScripts="false" linkToExportedHtml="true" exportOnSettingsChange="true" regenerateOnProjectOpen="false" />
68 |     <LinkMapSettings>
69 |       <textMaps />
70 |     </LinkMapSettings>
71 |   </component>
72 | </project>


--------------------------------------------------------------------------------
/lanms/include/pybind11/eval.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     pybind11/exec.h: Support for evaluating Python expressions and statements
  3 |     from strings and files
  4 | 
  5 |     Copyright (c) 2016 Klemens Morgenstern <klemens.morgenstern@ed-chemnitz.de> and
  6 |                        Wenzel Jakob <wenzel.jakob@epfl.ch>
  7 | 
  8 |     All rights reserved. Use of this source code is governed by a
  9 |     BSD-style license that can be found in the LICENSE file.
 10 | */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include "pybind11.h"
 15 | 
 16 | NAMESPACE_BEGIN(pybind11)
 17 | 
 18 | enum eval_mode {
 19 |     /// Evaluate a string containing an isolated expression
 20 |     eval_expr,
 21 | 
 22 |     /// Evaluate a string containing a single statement. Returns \c none
 23 |     eval_single_statement,
 24 | 
 25 |     /// Evaluate a string containing a sequence of statement. Returns \c none
 26 |     eval_statements
 27 | };
 28 | 
 29 | template <eval_mode mode = eval_expr>
 30 | object eval(str expr, object global = globals(), object local = object()) {
 31 |     if (!local)
 32 |         local = global;
 33 | 
 34 |     /* PyRun_String does not accept a PyObject / encoding specifier,
 35 |        this seems to be the only alternative */
 36 |     std::string buffer = "# -*- coding: utf-8 -*-\n" + (std::string) expr;
 37 | 
 38 |     int start;
 39 |     switch (mode) {
 40 |         case eval_expr:             start = Py_eval_input;   break;
 41 |         case eval_single_statement: start = Py_single_input; break;
 42 |         case eval_statements:       start = Py_file_input;   break;
 43 |         default: pybind11_fail("invalid evaluation mode");
 44 |     }
 45 | 
 46 |     PyObject *result = PyRun_String(buffer.c_str(), start, global.ptr(), local.ptr());
 47 |     if (!result)
 48 |         throw error_already_set();
 49 |     return reinterpret_steal<object>(result);
 50 | }
 51 | 
 52 | template <eval_mode mode = eval_expr, size_t N>
 53 | object eval(const char (&s)[N], object global = globals(), object local = object()) {
 54 |     /* Support raw string literals by removing common leading whitespace */
 55 |     auto expr = (s[0] == '\n') ? str(module::import("textwrap").attr("dedent")(s))
 56 |                                : str(s);
 57 |     return eval<mode>(expr, global, local);
 58 | }
 59 | 
 60 | inline void exec(str expr, object global = globals(), object local = object()) {
 61 |     eval<eval_statements>(expr, global, local);
 62 | }
 63 | 
 64 | template <size_t N>
 65 | void exec(const char (&s)[N], object global = globals(), object local = object()) {
 66 |     eval<eval_statements>(s, global, local);
 67 | }
 68 | 
 69 | template <eval_mode mode = eval_statements>
 70 | object eval_file(str fname, object global = globals(), object local = object()) {
 71 |     if (!local)
 72 |         local = global;
 73 | 
 74 |     int start;
 75 |     switch (mode) {
 76 |         case eval_expr:             start = Py_eval_input;   break;
 77 |         case eval_single_statement: start = Py_single_input; break;
 78 |         case eval_statements:       start = Py_file_input;   break;
 79 |         default: pybind11_fail("invalid evaluation mode");
 80 |     }
 81 | 
 82 |     int closeFile = 1;
 83 |     std::string fname_str = (std::string) fname;
 84 | #if PY_VERSION_HEX >= 0x03040000
 85 |     FILE *f = _Py_fopen_obj(fname.ptr(), "r");
 86 | #elif PY_VERSION_HEX >= 0x03000000
 87 |     FILE *f = _Py_fopen(fname.ptr(), "r");
 88 | #else
 89 |     /* No unicode support in open() :( */
 90 |     auto fobj = reinterpret_steal<object>(PyFile_FromString(
 91 |         const_cast<char *>(fname_str.c_str()),
 92 |         const_cast<char*>("r")));
 93 |     FILE *f = nullptr;
 94 |     if (fobj)
 95 |         f = PyFile_AsFile(fobj.ptr());
 96 |     closeFile = 0;
 97 | #endif
 98 |     if (!f) {
 99 |         PyErr_Clear();
100 |         pybind11_fail("File \"" + fname_str + "\" could not be opened!");
101 |     }
102 | 
103 | #if PY_VERSION_HEX < 0x03000000 && defined(PYPY_VERSION)
104 |     PyObject *result = PyRun_File(f, fname_str.c_str(), start, global.ptr(),
105 |                                   local.ptr());
106 |     (void) closeFile;
107 | #else
108 |     PyObject *result = PyRun_FileEx(f, fname_str.c_str(), start, global.ptr(),
109 |                                     local.ptr(), closeFile);
110 | #endif
111 | 
112 |     if (!result)
113 |         throw error_already_set();
114 |     return reinterpret_steal<object>(result);
115 | }
116 | 
117 | NAMESPACE_END(pybind11)
118 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/cyclegan_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for tensorflow.contrib.slim.nets.cyclegan."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import tensorflow as tf
 22 | 
 23 | from nets import cyclegan
 24 | 
 25 | 
 26 | # TODO(joelshor): Add a test to check generator endpoints.
 27 | class CycleganTest(tf.test.TestCase):
 28 | 
 29 |   def test_generator_inference(self):
 30 |     """Check one inference step."""
 31 |     img_batch = tf.zeros([2, 32, 32, 3])
 32 |     model_output, _ = cyclegan.cyclegan_generator_resnet(img_batch)
 33 |     with self.test_session() as sess:
 34 |       sess.run(tf.global_variables_initializer())
 35 |       sess.run(model_output)
 36 | 
 37 |   def _test_generator_graph_helper(self, shape):
 38 |     """Check that generator can take small and non-square inputs."""
 39 |     output_imgs, _ = cyclegan.cyclegan_generator_resnet(tf.ones(shape))
 40 |     self.assertAllEqual(shape, output_imgs.shape.as_list())
 41 | 
 42 |   def test_generator_graph_small(self):
 43 |     self._test_generator_graph_helper([4, 32, 32, 3])
 44 | 
 45 |   def test_generator_graph_medium(self):
 46 |     self._test_generator_graph_helper([3, 128, 128, 3])
 47 | 
 48 |   def test_generator_graph_nonsquare(self):
 49 |     self._test_generator_graph_helper([2, 80, 400, 3])
 50 | 
 51 |   def test_generator_unknown_batch_dim(self):
 52 |     """Check that generator can take unknown batch dimension inputs."""
 53 |     img = tf.placeholder(tf.float32, shape=[None, 32, None, 3])
 54 |     output_imgs, _ = cyclegan.cyclegan_generator_resnet(img)
 55 | 
 56 |     self.assertAllEqual([None, 32, None, 3], output_imgs.shape.as_list())
 57 | 
 58 |   def _input_and_output_same_shape_helper(self, kernel_size):
 59 |     img_batch = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])
 60 |     output_img_batch, _ = cyclegan.cyclegan_generator_resnet(
 61 |         img_batch, kernel_size=kernel_size)
 62 | 
 63 |     self.assertAllEqual(img_batch.shape.as_list(),
 64 |                         output_img_batch.shape.as_list())
 65 | 
 66 |   def input_and_output_same_shape_kernel3(self):
 67 |     self._input_and_output_same_shape_helper(3)
 68 | 
 69 |   def input_and_output_same_shape_kernel4(self):
 70 |     self._input_and_output_same_shape_helper(4)
 71 | 
 72 |   def input_and_output_same_shape_kernel5(self):
 73 |     self._input_and_output_same_shape_helper(5)
 74 | 
 75 |   def input_and_output_same_shape_kernel6(self):
 76 |     self._input_and_output_same_shape_helper(6)
 77 | 
 78 |   def _error_if_height_not_multiple_of_four_helper(self, height):
 79 |     self.assertRaisesRegexp(
 80 |         ValueError,
 81 |         'The input height must be a multiple of 4.',
 82 |         cyclegan.cyclegan_generator_resnet,
 83 |         tf.placeholder(tf.float32, shape=[None, height, 32, 3]))
 84 | 
 85 |   def test_error_if_height_not_multiple_of_four_height29(self):
 86 |     self._error_if_height_not_multiple_of_four_helper(29)
 87 | 
 88 |   def test_error_if_height_not_multiple_of_four_height30(self):
 89 |     self._error_if_height_not_multiple_of_four_helper(30)
 90 | 
 91 |   def test_error_if_height_not_multiple_of_four_height31(self):
 92 |     self._error_if_height_not_multiple_of_four_helper(31)
 93 | 
 94 |   def _error_if_width_not_multiple_of_four_helper(self, width):
 95 |     self.assertRaisesRegexp(
 96 |         ValueError,
 97 |         'The input width must be a multiple of 4.',
 98 |         cyclegan.cyclegan_generator_resnet,
 99 |         tf.placeholder(tf.float32, shape=[None, 32, width, 3]))
100 | 
101 |   def test_error_if_width_not_multiple_of_four_width29(self):
102 |     self._error_if_width_not_multiple_of_four_helper(29)
103 | 
104 |   def test_error_if_width_not_multiple_of_four_width30(self):
105 |     self._error_if_width_not_multiple_of_four_helper(30)
106 | 
107 |   def test_error_if_width_not_multiple_of_four_width31(self):
108 |     self._error_if_width_not_multiple_of_four_helper(31)
109 | 
110 | 
111 | if __name__ == '__main__':
112 |   tf.test.main()
113 | 


--------------------------------------------------------------------------------
/augmentation/data_agumentation.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | from utils.tools import read_from_gt
  3 | import imutils
  4 | import numpy as np
  5 | import imgaug as ia
  6 | from imgaug import augmenters as iaa
  7 | import time
  8 | from utils.tools import save_gt_file, vis_img_bbox
  9 | 
 10 | 
 11 | def data_agumentation(img, gt_bbox, operation_obj, txts=None, save_flag=None):
 12 |     ia.seed(int((time.time() * 1000) % 100000))
 13 |     shape = np.shape(gt_bbox)
 14 |     [h, w, _] = np.shape(img)
 15 |     if shape[1] == 8:
 16 |         bboxes = np.reshape(gt_bbox, [-1, 4, 2])
 17 |     else:
 18 |         bboxes = gt_bbox
 19 |     keypoints_on_images = []
 20 |     keypoints_imgaug_obj = []
 21 |     # print bboxes
 22 |     # print np.shape(bboxes)
 23 |     for key_points in bboxes:
 24 |         # print key_points
 25 |         for key_point in key_points:
 26 |             keypoints_imgaug_obj.append(ia.Keypoint(x=key_point[0], y=key_point[1]))
 27 |     keypoints_on_images.append(ia.KeypointsOnImage(keypoints_imgaug_obj, shape=img.shape))
 28 | 
 29 |     seq_det = operation_obj.to_deterministic()
 30 | 
 31 |     img_aug = seq_det.augment_image(img)
 32 |     key_points_aug = seq_det.augment_keypoints(keypoints_on_images)
 33 |     key_points_after = []
 34 |     for idx, (keypoints_before, keypoints_after) in enumerate(zip(keypoints_on_images, key_points_aug)):
 35 |         for kp_idx, keypoint in enumerate(keypoints_after.keypoints):
 36 |             keypoint.x = keypoint.x if keypoint.x < w else w
 37 |             keypoint.x = keypoint.x if keypoint.x > 0 else 0
 38 |             keypoint.y = keypoint.y if keypoint.y < h else h
 39 |             keypoint.y = keypoint.y if keypoint.y > 0 else 0
 40 |             key_points_after.append([keypoint.x, keypoint.y])
 41 |     # print np.shape(key_points_after)
 42 |     key_points_after = np.reshape(key_points_after, [-1, 4, 2])
 43 |     if save_flag:
 44 |         save_gt_file('./rotated_10.txt', np.reshape(key_points_after, [-1, 8]), txts=txts)
 45 |         cv2.imwrite('./rotated_10.png', img_aug)
 46 |         vis_img_bbox('./rotated_10.png', './rotated_10.txt')
 47 |     return img_aug, np.asarray(key_points_after, np.float32)
 48 | 
 49 | if __name__ == '__main__':
 50 | 
 51 | 
 52 |     # using imgaug package
 53 |     import random
 54 |     img = cv2.imread('/home/give/Game/OCR/data/ICPR/rename/100/image_100/10.png')
 55 |     gt_data = read_from_gt('/home/give/Game/OCR/data/ICPR/rename/100/txt_100/10.txt')
 56 |     coords = gt_data[0]
 57 |     bboxes = np.reshape(coords, [-1, 4, 2])
 58 | 
 59 |     angle = np.random.random() * 90
 60 |     operation_obj = iaa.Crop(px=(0, 10), random_state=np.random.randint(0, 10000))
 61 |     # operation_obj = iaa.Affine(rotate=(-angle, angle))
 62 |     # operation_obj = iaa.Sequential([iaa.Flipud(1.0)])
 63 |     # operation_obj = iaa.Sequential([iaa.Fliplr(1.0)])
 64 |     # operation_obj = iaa.Sequential([iaa.Dropout(p=(0, 0.1), random_state=np.random.randint(0, 10000))])
 65 |     # operation_obj = iaa.Sequential([iaa.AdditiveGaussianNoise(scale=np.random.random() * 30)])
 66 |     # operation_obj = iaa.Affine(shear=(-10, 10))
 67 |     # fliplr_rate = 0.5
 68 |     # angle = 10
 69 |     # additive, contrast_norm = (45, 0.1)
 70 |     # gaussian_noise, dropout = (0.05, 0.01)
 71 |     # shear, shift = (2, 20)
 72 |     # operation_obj = iaa.Sequential([
 73 |     #     iaa.Sometimes(0.5, iaa.OneOf([
 74 |     #         iaa.Affine(rotate=(-angle, angle)),
 75 |     #         iaa.ContrastNormalization((1 - contrast_norm, 1 + contrast_norm))
 76 |     #     ])),
 77 |     #     iaa.Sometimes(0.5, iaa.OneOf([
 78 |     #         iaa.Sequential([iaa.Flipud(0.5)]),
 79 |     #         iaa.Dropout(dropout)
 80 |     #     ]))
 81 |     # ])
 82 |     data_agumentation(img, bboxes, operation_obj, save_flag=True, txts=gt_data[1])
 83 | 
 84 | 
 85 |     keypoints_on_images = []
 86 |     start = 0
 87 |     keypoints_imgaug_obj = []
 88 |     for key_points in bboxes:
 89 |         for key_point in key_points:
 90 |             keypoints_imgaug_obj.append(ia.Keypoint(x=key_point[0], y=key_point[1]))
 91 |     keypoints_on_images.append(ia.KeypointsOnImage(keypoints_imgaug_obj, shape=img.shape))
 92 |     # seq = iaa.Sequential([iaa.GaussianBlur((0, 3.0))])
 93 |     # seq = iaa.Sequential([iaa.AdditiveGaussianNoise(scale=10)])
 94 |     # seq = iaa.Sequential([iaa.Flipud(0.5)])
 95 |     # seq = iaa.Sequential([iaa.Fliplr(0.5)])
 96 |     # seq = iaa.Sequential([iaa.Dropout(p=(0, 0.2))])
 97 |     # seq = iaa.Sequential([iaa.Affine(rotate=(-10, 10), shear=(-10, 10))])
 98 |     seq = iaa.Affine(rotate=(-60, 60), shear=(-20, 20))
 99 |     # seq = iaa.Sometimes(
100 |     #     0.5,
101 |     #     iaa.GaussianBlur(sigma=2.0),
102 |     #     iaa.Sequential([iaa.Affine(rotate=45), iaa.Sharpen(alpha=1.0)])
103 |     # )
104 |     seq_det = seq.to_deterministic()


--------------------------------------------------------------------------------
/lanms/include/pybind11/buffer_info.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     pybind11/buffer_info.h: Python buffer object interface
  3 | 
  4 |     Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
  5 | 
  6 |     All rights reserved. Use of this source code is governed by a
  7 |     BSD-style license that can be found in the LICENSE file.
  8 | */
  9 | 
 10 | #pragma once
 11 | 
 12 | #include "common.h"
 13 | 
 14 | NAMESPACE_BEGIN(pybind11)
 15 | 
 16 | /// Information record describing a Python buffer object
 17 | struct buffer_info {
 18 |     void *ptr = nullptr;          // Pointer to the underlying storage
 19 |     ssize_t itemsize = 0;         // Size of individual items in bytes
 20 |     ssize_t size = 0;             // Total number of entries
 21 |     std::string format;           // For homogeneous buffers, this should be set to format_descriptor<T>::format()
 22 |     ssize_t ndim = 0;             // Number of dimensions
 23 |     std::vector<ssize_t> shape;   // Shape of the tensor (1 entry per dimension)
 24 |     std::vector<ssize_t> strides; // Number of entries between adjacent entries (for each per dimension)
 25 | 
 26 |     buffer_info() { }
 27 | 
 28 |     buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
 29 |                 detail::any_container<ssize_t> shape_in, detail::any_container<ssize_t> strides_in)
 30 |     : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim),
 31 |       shape(std::move(shape_in)), strides(std::move(strides_in)) {
 32 |         if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size())
 33 |             pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length");
 34 |         for (size_t i = 0; i < (size_t) ndim; ++i)
 35 |             size *= shape[i];
 36 |     }
 37 | 
 38 |     template <typename T>
 39 |     buffer_info(T *ptr, detail::any_container<ssize_t> shape_in, detail::any_container<ssize_t> strides_in)
 40 |     : buffer_info(private_ctr_tag(), ptr, sizeof(T), format_descriptor<T>::format(), static_cast<ssize_t>(shape_in->size()), std::move(shape_in), std::move(strides_in)) { }
 41 | 
 42 |     buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t size)
 43 |     : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}) { }
 44 | 
 45 |     template <typename T>
 46 |     buffer_info(T *ptr, ssize_t size)
 47 |     : buffer_info(ptr, sizeof(T), format_descriptor<T>::format(), size) { }
 48 | 
 49 |     explicit buffer_info(Py_buffer *view, bool ownview = true)
 50 |     : buffer_info(view->buf, view->itemsize, view->format, view->ndim,
 51 |             {view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}) {
 52 |         this->view = view;
 53 |         this->ownview = ownview;
 54 |     }
 55 | 
 56 |     buffer_info(const buffer_info &) = delete;
 57 |     buffer_info& operator=(const buffer_info &) = delete;
 58 | 
 59 |     buffer_info(buffer_info &&other) {
 60 |         (*this) = std::move(other);
 61 |     }
 62 | 
 63 |     buffer_info& operator=(buffer_info &&rhs) {
 64 |         ptr = rhs.ptr;
 65 |         itemsize = rhs.itemsize;
 66 |         size = rhs.size;
 67 |         format = std::move(rhs.format);
 68 |         ndim = rhs.ndim;
 69 |         shape = std::move(rhs.shape);
 70 |         strides = std::move(rhs.strides);
 71 |         std::swap(view, rhs.view);
 72 |         std::swap(ownview, rhs.ownview);
 73 |         return *this;
 74 |     }
 75 | 
 76 |     ~buffer_info() {
 77 |         if (view && ownview) { PyBuffer_Release(view); delete view; }
 78 |     }
 79 | 
 80 | private:
 81 |     struct private_ctr_tag { };
 82 | 
 83 |     buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
 84 |                 detail::any_container<ssize_t> &&shape_in, detail::any_container<ssize_t> &&strides_in)
 85 |     : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in)) { }
 86 | 
 87 |     Py_buffer *view = nullptr;
 88 |     bool ownview = false;
 89 | };
 90 | 
 91 | NAMESPACE_BEGIN(detail)
 92 | 
 93 | template <typename T, typename SFINAE = void> struct compare_buffer_info {
 94 |     static bool compare(const buffer_info& b) {
 95 |         return b.format == format_descriptor<T>::format() && b.itemsize == (ssize_t) sizeof(T);
 96 |     }
 97 | };
 98 | 
 99 | template <typename T> struct compare_buffer_info<T, detail::enable_if_t<std::is_integral<T>::value>> {
100 |     static bool compare(const buffer_info& b) {
101 |         return (size_t) b.itemsize == sizeof(T) && (b.format == format_descriptor<T>::value ||
102 |             ((sizeof(T) == sizeof(long)) && b.format == (std::is_unsigned<T>::value ? "L" : "l")) ||
103 |             ((sizeof(T) == sizeof(size_t)) && b.format == (std::is_unsigned<T>::value ? "N" : "n")));
104 |     }
105 | };
106 | 
107 | NAMESPACE_END(detail)
108 | NAMESPACE_END(pybind11)
109 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/dcgan_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for dcgan."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | from six.moves import xrange  # pylint: disable=redefined-builtin
 22 | import tensorflow as tf
 23 | 
 24 | from nets import dcgan
 25 | 
 26 | 
 27 | class DCGANTest(tf.test.TestCase):
 28 | 
 29 |   def test_generator_run(self):
 30 |     tf.set_random_seed(1234)
 31 |     noise = tf.random_normal([100, 64])
 32 |     image, _ = dcgan.generator(noise)
 33 |     with self.test_session() as sess:
 34 |       sess.run(tf.global_variables_initializer())
 35 |       image.eval()
 36 | 
 37 |   def test_generator_graph(self):
 38 |     tf.set_random_seed(1234)
 39 |     # Check graph construction for a number of image size/depths and batch
 40 |     # sizes.
 41 |     for i, batch_size in zip(xrange(3, 7), xrange(3, 8)):
 42 |       tf.reset_default_graph()
 43 |       final_size = 2 ** i
 44 |       noise = tf.random_normal([batch_size, 64])
 45 |       image, end_points = dcgan.generator(
 46 |           noise,
 47 |           depth=32,
 48 |           final_size=final_size)
 49 | 
 50 |       self.assertAllEqual([batch_size, final_size, final_size, 3],
 51 |                           image.shape.as_list())
 52 | 
 53 |       expected_names = ['deconv%i' % j for j in xrange(1, i)] + ['logits']
 54 |       self.assertSetEqual(set(expected_names), set(end_points.keys()))
 55 | 
 56 |       # Check layer depths.
 57 |       for j in range(1, i):
 58 |         layer = end_points['deconv%i' % j]
 59 |         self.assertEqual(32 * 2**(i-j-1), layer.get_shape().as_list()[-1])
 60 | 
 61 |   def test_generator_invalid_input(self):
 62 |     wrong_dim_input = tf.zeros([5, 32, 32])
 63 |     with self.assertRaises(ValueError):
 64 |       dcgan.generator(wrong_dim_input)
 65 | 
 66 |     correct_input = tf.zeros([3, 2])
 67 |     with self.assertRaisesRegexp(ValueError, 'must be a power of 2'):
 68 |       dcgan.generator(correct_input, final_size=30)
 69 | 
 70 |     with self.assertRaisesRegexp(ValueError, 'must be greater than 8'):
 71 |       dcgan.generator(correct_input, final_size=4)
 72 | 
 73 |   def test_discriminator_run(self):
 74 |     image = tf.random_uniform([5, 32, 32, 3], -1, 1)
 75 |     output, _ = dcgan.discriminator(image)
 76 |     with self.test_session() as sess:
 77 |       sess.run(tf.global_variables_initializer())
 78 |       output.eval()
 79 | 
 80 |   def test_discriminator_graph(self):
 81 |     # Check graph construction for a number of image size/depths and batch
 82 |     # sizes.
 83 |     for i, batch_size in zip(xrange(1, 6), xrange(3, 8)):
 84 |       tf.reset_default_graph()
 85 |       img_w = 2 ** i
 86 |       image = tf.random_uniform([batch_size, img_w, img_w, 3], -1, 1)
 87 |       output, end_points = dcgan.discriminator(
 88 |           image,
 89 |           depth=32)
 90 | 
 91 |       self.assertAllEqual([batch_size, 1], output.get_shape().as_list())
 92 | 
 93 |       expected_names = ['conv%i' % j for j in xrange(1, i+1)] + ['logits']
 94 |       self.assertSetEqual(set(expected_names), set(end_points.keys()))
 95 | 
 96 |       # Check layer depths.
 97 |       for j in range(1, i+1):
 98 |         layer = end_points['conv%i' % j]
 99 |         self.assertEqual(32 * 2**(j-1), layer.get_shape().as_list()[-1])
100 | 
101 |   def test_discriminator_invalid_input(self):
102 |     wrong_dim_img = tf.zeros([5, 32, 32])
103 |     with self.assertRaises(ValueError):
104 |       dcgan.discriminator(wrong_dim_img)
105 | 
106 |     spatially_undefined_shape = tf.placeholder(tf.float32, [5, 32, None, 3])
107 |     with self.assertRaises(ValueError):
108 |       dcgan.discriminator(spatially_undefined_shape)
109 | 
110 |     not_square = tf.zeros([5, 32, 16, 3])
111 |     with self.assertRaisesRegexp(ValueError, 'not have equal width and height'):
112 |       dcgan.discriminator(not_square)
113 | 
114 |     not_power_2 = tf.zeros([5, 30, 30, 3])
115 |     with self.assertRaisesRegexp(ValueError, 'not a power of 2'):
116 |       dcgan.discriminator(not_power_2)
117 | 
118 | 
119 | if __name__ == '__main__':
120 |   tf.test.main()
121 | 


--------------------------------------------------------------------------------
/data_util.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | this file is modified from keras implemention of data process multi-threading,
  3 | see https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py
  4 | '''
  5 | import time
  6 | import numpy as np
  7 | import threading
  8 | import multiprocessing
  9 | try:
 10 |     import queue
 11 | except ImportError:
 12 |     import Queue as queue
 13 | 
 14 | 
 15 | class GeneratorEnqueuer():
 16 |     """Builds a queue out of a data generator.
 17 | 
 18 |     Used in `fit_generator`, `evaluate_generator`, `predict_generator`.
 19 | 
 20 |     # Arguments
 21 |         generator: a generator function which endlessly yields data
 22 |         use_multiprocessing: use multiprocessing if True, otherwise threading
 23 |         wait_time: time to sleep in-between calls to `put()`
 24 |         random_seed: Initial seed for workers,
 25 |             will be incremented by one for each workers.
 26 |     """
 27 | 
 28 |     def __init__(self, generator,
 29 |                  use_multiprocessing=False,
 30 |                  wait_time=0.05,
 31 |                  random_seed=None):
 32 |         self.wait_time = wait_time
 33 |         self._generator = generator
 34 |         self._use_multiprocessing = use_multiprocessing
 35 |         self._threads = []
 36 |         self._stop_event = None
 37 |         self.queue = None
 38 |         self.random_seed = random_seed
 39 | 
 40 |     def start(self, workers=1, max_queue_size=10):
 41 |         """Kicks off threads which add data from the generator into the queue.
 42 | 
 43 |         # Arguments
 44 |             workers: number of worker threads
 45 |             max_queue_size: queue size
 46 |                 (when full, threads could block on `put()`)
 47 |         """
 48 | 
 49 |         def data_generator_task():
 50 |             while not self._stop_event.is_set():
 51 |                 try:
 52 |                     if self._use_multiprocessing or self.queue.qsize() < max_queue_size:
 53 |                         generator_output = next(self._generator)
 54 |                         self.queue.put(generator_output)
 55 |                     else:
 56 |                         time.sleep(self.wait_time)
 57 |                 except Exception:
 58 |                     self._stop_event.set()
 59 |                     raise
 60 | 
 61 |         try:
 62 |             if self._use_multiprocessing:
 63 |                 self.queue = multiprocessing.Queue(maxsize=max_queue_size)
 64 |                 self._stop_event = multiprocessing.Event()
 65 |             else:
 66 |                 self.queue = queue.Queue()
 67 |                 self._stop_event = threading.Event()
 68 | 
 69 |             for _ in range(workers):
 70 |                 if self._use_multiprocessing:
 71 |                     # Reset random seed else all children processes
 72 |                     # share the same seed
 73 |                     np.random.seed(self.random_seed)
 74 |                     thread = multiprocessing.Process(target=data_generator_task)
 75 |                     thread.daemon = True
 76 |                     if self.random_seed is not None:
 77 |                         self.random_seed += 1
 78 |                 else:
 79 |                     thread = threading.Thread(target=data_generator_task)
 80 |                 self._threads.append(thread)
 81 |                 thread.start()
 82 |         except:
 83 |             self.stop()
 84 |             raise
 85 | 
 86 |     def is_running(self):
 87 |         return self._stop_event is not None and not self._stop_event.is_set()
 88 | 
 89 |     def stop(self, timeout=None):
 90 |         """Stops running threads and wait for them to exit, if necessary.
 91 | 
 92 |         Should be called by the same thread which called `start()`.
 93 | 
 94 |         # Arguments
 95 |             timeout: maximum time to wait on `thread.join()`.
 96 |         """
 97 |         if self.is_running():
 98 |             self._stop_event.set()
 99 | 
100 |         for thread in self._threads:
101 |             if thread.is_alive():
102 |                 if self._use_multiprocessing:
103 |                     thread.terminate()
104 |                 else:
105 |                     thread.join(timeout)
106 | 
107 |         if self._use_multiprocessing:
108 |             if self.queue is not None:
109 |                 self.queue.close()
110 | 
111 |         self._threads = []
112 |         self._stop_event = None
113 |         self.queue = None
114 | 
115 |     def get(self):
116 |         """Creates a generator to extract data from the queue.
117 | 
118 |         Skip the data if it is `None`.
119 | 
120 |         # Returns
121 |             A generator
122 |         """
123 |         while self.is_running():
124 |             if not self.queue.empty():
125 |                 inputs = self.queue.get()
126 |                 if inputs is not None:
127 |                     yield inputs
128 |             else:
129 |                 time.sleep(self.wait_time)


--------------------------------------------------------------------------------
/lanms/.ycm_extra_conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Copyright (C) 2014  Google Inc.
  4 | #
  5 | # This file is part of YouCompleteMe.
  6 | #
  7 | # YouCompleteMe is free software: you can redistribute it and/or modify
  8 | # it under the terms of the GNU General Public License as published by
  9 | # the Free Software Foundation, either version 3 of the License, or
 10 | # (at your option) any later version.
 11 | #
 12 | # YouCompleteMe is distributed in the hope that it will be useful,
 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 | # GNU General Public License for more details.
 16 | #
 17 | # You should have received a copy of the GNU General Public License
 18 | # along with YouCompleteMe.  If not, see <http://www.gnu.org/licenses/>.
 19 | 
 20 | import os
 21 | import sys
 22 | import glob
 23 | import ycm_core
 24 | 
 25 | # These are the compilation flags that will be used in case there's no
 26 | # compilation database set (by default, one is not set).
 27 | # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
 28 | sys.path.append(os.path.dirname(__file__))
 29 | 
 30 | 
 31 | BASE_DIR = os.path.dirname(os.path.realpath(__file__))
 32 | 
 33 | from plumbum.cmd import python_config
 34 | 
 35 | 
 36 | flags = [
 37 |     '-Wall',
 38 |     '-Wextra',
 39 |     '-Wnon-virtual-dtor',
 40 |     '-Winvalid-pch',
 41 |     '-Wno-unused-local-typedefs',
 42 |     '-std=c++11',
 43 |     '-x', 'c++',
 44 |     '-Iinclude',
 45 | ] + python_config('--cflags').split()
 46 | 
 47 | 
 48 | # Set this to the absolute path to the folder (NOT the file!) containing the
 49 | # compile_commands.json file to use that instead of 'flags'. See here for
 50 | # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
 51 | #
 52 | # Most projects will NOT need to set this to anything; you can just change the
 53 | # 'flags' list of compilation flags.
 54 | compilation_database_folder = ''
 55 | 
 56 | if os.path.exists( compilation_database_folder ):
 57 |   database = ycm_core.CompilationDatabase( compilation_database_folder )
 58 | else:
 59 |   database = None
 60 | 
 61 | SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
 62 | 
 63 | def DirectoryOfThisScript():
 64 |   return os.path.dirname( os.path.abspath( __file__ ) )
 65 | 
 66 | 
 67 | def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
 68 |   if not working_directory:
 69 |     return list( flags )
 70 |   new_flags = []
 71 |   make_next_absolute = False
 72 |   path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
 73 |   for flag in flags:
 74 |     new_flag = flag
 75 | 
 76 |     if make_next_absolute:
 77 |       make_next_absolute = False
 78 |       if not flag.startswith( '/' ):
 79 |         new_flag = os.path.join( working_directory, flag )
 80 | 
 81 |     for path_flag in path_flags:
 82 |       if flag == path_flag:
 83 |         make_next_absolute = True
 84 |         break
 85 | 
 86 |       if flag.startswith( path_flag ):
 87 |         path = flag[ len( path_flag ): ]
 88 |         new_flag = path_flag + os.path.join( working_directory, path )
 89 |         break
 90 | 
 91 |     if new_flag:
 92 |       new_flags.append( new_flag )
 93 |   return new_flags
 94 | 
 95 | 
 96 | def IsHeaderFile( filename ):
 97 |   extension = os.path.splitext( filename )[ 1 ]
 98 |   return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
 99 | 
100 | 
101 | def GetCompilationInfoForFile( filename ):
102 |   # The compilation_commands.json file generated by CMake does not have entries
103 |   # for header files. So we do our best by asking the db for flags for a
104 |   # corresponding source file, if any. If one exists, the flags for that file
105 |   # should be good enough.
106 |   if IsHeaderFile( filename ):
107 |     basename = os.path.splitext( filename )[ 0 ]
108 |     for extension in SOURCE_EXTENSIONS:
109 |       replacement_file = basename + extension
110 |       if os.path.exists( replacement_file ):
111 |         compilation_info = database.GetCompilationInfoForFile(
112 |           replacement_file )
113 |         if compilation_info.compiler_flags_:
114 |           return compilation_info
115 |     return None
116 |   return database.GetCompilationInfoForFile( filename )
117 | 
118 | 
119 | # This is the entry point; this function is called by ycmd to produce flags for
120 | # a file.
121 | def FlagsForFile( filename, **kwargs ):
122 |   if database:
123 |     # Bear in mind that compilation_info.compiler_flags_ does NOT return a
124 |     # python list, but a "list-like" StringVec object
125 |     compilation_info = GetCompilationInfoForFile( filename )
126 |     if not compilation_info:
127 |       return None
128 | 
129 |     final_flags = MakeRelativePathsInFlagsAbsolute(
130 |       compilation_info.compiler_flags_,
131 |       compilation_info.compiler_working_dir_ )
132 |   else:
133 |     relative_to = DirectoryOfThisScript()
134 |     final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
135 | 
136 |   return {
137 |     'flags': final_flags,
138 |     'do_cache': True
139 |   }
140 | 
141 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/cifarnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains a variant of the CIFAR-10 model definition."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import tensorflow as tf
 22 | 
 23 | slim = tf.contrib.slim
 24 | 
 25 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev)
 26 | 
 27 | 
 28 | def cifarnet(images, num_classes=10, is_training=False,
 29 |              dropout_keep_prob=0.5,
 30 |              prediction_fn=slim.softmax,
 31 |              scope='CifarNet'):
 32 |   """Creates a variant of the CifarNet model.
 33 | 
 34 |   Note that since the output is a set of 'logits', the values fall in the
 35 |   interval of (-infinity, infinity). Consequently, to convert the outputs to a
 36 |   probability distribution over the characters, one will need to convert them
 37 |   using the softmax function:
 38 | 
 39 |         logits = cifarnet.cifarnet(images, is_training=False)
 40 |         probabilities = tf.nn.softmax(logits)
 41 |         predictions = tf.argmax(logits, 1)
 42 | 
 43 |   Args:
 44 |     images: A batch of `Tensors` of size [batch_size, height, width, channels].
 45 |     num_classes: the number of classes in the dataset. If 0 or None, the logits
 46 |       layer is omitted and the input features to the logits layer are returned
 47 |       instead.
 48 |     is_training: specifies whether or not we're currently training the model.
 49 |       This variable will determine the behaviour of the dropout layer.
 50 |     dropout_keep_prob: the percentage of activation values that are retained.
 51 |     prediction_fn: a function to get predictions out of logits.
 52 |     scope: Optional variable_scope.
 53 | 
 54 |   Returns:
 55 |     net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
 56 |       is a non-zero integer, or the input to the logits layer if num_classes
 57 |       is 0 or None.
 58 |     end_points: a dictionary from components of the network to the corresponding
 59 |       activation.
 60 |   """
 61 |   end_points = {}
 62 | 
 63 |   with tf.variable_scope(scope, 'CifarNet', [images]):
 64 |     net = slim.conv2d(images, 64, [5, 5], scope='conv1')
 65 |     end_points['conv1'] = net
 66 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
 67 |     end_points['pool1'] = net
 68 |     net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
 69 |     net = slim.conv2d(net, 64, [5, 5], scope='conv2')
 70 |     end_points['conv2'] = net
 71 |     net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2')
 72 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
 73 |     end_points['pool2'] = net
 74 |     net = slim.flatten(net)
 75 |     end_points['Flatten'] = net
 76 |     net = slim.fully_connected(net, 384, scope='fc3')
 77 |     end_points['fc3'] = net
 78 |     net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
 79 |                        scope='dropout3')
 80 |     net = slim.fully_connected(net, 192, scope='fc4')
 81 |     end_points['fc4'] = net
 82 |     if not num_classes:
 83 |       return net, end_points
 84 |     logits = slim.fully_connected(net, num_classes,
 85 |                                   biases_initializer=tf.zeros_initializer(),
 86 |                                   weights_initializer=trunc_normal(1/192.0),
 87 |                                   weights_regularizer=None,
 88 |                                   activation_fn=None,
 89 |                                   scope='logits')
 90 | 
 91 |     end_points['Logits'] = logits
 92 |     end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
 93 | 
 94 |   return logits, end_points
 95 | cifarnet.default_image_size = 32
 96 | 
 97 | 
 98 | def cifarnet_arg_scope(weight_decay=0.004):
 99 |   """Defines the default cifarnet argument scope.
100 | 
101 |   Args:
102 |     weight_decay: The weight decay to use for regularizing the model.
103 | 
104 |   Returns:
105 |     An `arg_scope` to use for the inception v3 model.
106 |   """
107 |   with slim.arg_scope(
108 |       [slim.conv2d],
109 |       weights_initializer=tf.truncated_normal_initializer(stddev=5e-2),
110 |       activation_fn=tf.nn.relu):
111 |     with slim.arg_scope(
112 |         [slim.fully_connected],
113 |         biases_initializer=tf.constant_initializer(0.1),
114 |         weights_initializer=trunc_normal(0.04),
115 |         weights_regularizer=slim.l2_regularizer(weight_decay),
116 |         activation_fn=tf.nn.relu) as sc:
117 |       return sc
118 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/mobilenet_v1_eval.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Validate mobilenet_v1 with options for quantization."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import math
 22 | import tensorflow as tf
 23 | 
 24 | from datasets import dataset_factory
 25 | from nets import mobilenet_v1
 26 | from preprocessing import preprocessing_factory
 27 | 
 28 | slim = tf.contrib.slim
 29 | 
 30 | flags = tf.app.flags
 31 | 
 32 | flags.DEFINE_string('master', '', 'Session master')
 33 | flags.DEFINE_integer('batch_size', 250, 'Batch size')
 34 | flags.DEFINE_integer('num_classes', 1001, 'Number of classes to distinguish')
 35 | flags.DEFINE_integer('num_examples', 50000, 'Number of examples to evaluate')
 36 | flags.DEFINE_integer('image_size', 224, 'Input image resolution')
 37 | flags.DEFINE_float('depth_multiplier', 1.0, 'Depth multiplier for mobilenet')
 38 | flags.DEFINE_bool('quantize', False, 'Quantize training')
 39 | flags.DEFINE_string('checkpoint_dir', '', 'The directory for checkpoints')
 40 | flags.DEFINE_string('eval_dir', '', 'Directory for writing eval event logs')
 41 | flags.DEFINE_string('dataset_dir', '', 'Location of dataset')
 42 | 
 43 | FLAGS = flags.FLAGS
 44 | 
 45 | 
 46 | def imagenet_input(is_training):
 47 |   """Data reader for imagenet.
 48 | 
 49 |   Reads in imagenet data and performs pre-processing on the images.
 50 | 
 51 |   Args:
 52 |      is_training: bool specifying if train or validation dataset is needed.
 53 |   Returns:
 54 |      A batch of images and labels.
 55 |   """
 56 |   if is_training:
 57 |     dataset = dataset_factory.get_dataset('imagenet', 'train',
 58 |                                           FLAGS.dataset_dir)
 59 |   else:
 60 |     dataset = dataset_factory.get_dataset('imagenet', 'validation',
 61 |                                           FLAGS.dataset_dir)
 62 | 
 63 |   provider = slim.dataset_data_provider.DatasetDataProvider(
 64 |       dataset,
 65 |       shuffle=is_training,
 66 |       common_queue_capacity=2 * FLAGS.batch_size,
 67 |       common_queue_min=FLAGS.batch_size)
 68 |   [image, label] = provider.get(['image', 'label'])
 69 | 
 70 |   image_preprocessing_fn = preprocessing_factory.get_preprocessing(
 71 |       'mobilenet_v1', is_training=is_training)
 72 | 
 73 |   image = image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size)
 74 | 
 75 |   images, labels = tf.train.batch(
 76 |       tensors=[image, label],
 77 |       batch_size=FLAGS.batch_size,
 78 |       num_threads=4,
 79 |       capacity=5 * FLAGS.batch_size)
 80 |   return images, labels
 81 | 
 82 | 
 83 | def metrics(logits, labels):
 84 |   """Specify the metrics for eval.
 85 | 
 86 |   Args:
 87 |     logits: Logits output from the graph.
 88 |     labels: Ground truth labels for inputs.
 89 | 
 90 |   Returns:
 91 |      Eval Op for the graph.
 92 |   """
 93 |   labels = tf.squeeze(labels)
 94 |   names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
 95 |       'Accuracy': tf.metrics.accuracy(tf.argmax(logits, 1), labels),
 96 |       'Recall_5': tf.metrics.recall_at_k(labels, logits, 5),
 97 |   })
 98 |   for name, value in names_to_values.iteritems():
 99 |     slim.summaries.add_scalar_summary(
100 |         value, name, prefix='eval', print_summary=True)
101 |   return names_to_updates.values()
102 | 
103 | 
104 | def build_model():
105 |   """Build the mobilenet_v1 model for evaluation.
106 | 
107 |   Returns:
108 |     g: graph with rewrites after insertion of quantization ops and batch norm
109 |     folding.
110 |     eval_ops: eval ops for inference.
111 |     variables_to_restore: List of variables to restore from checkpoint.
112 |   """
113 |   g = tf.Graph()
114 |   with g.as_default():
115 |     inputs, labels = imagenet_input(is_training=False)
116 | 
117 |     scope = mobilenet_v1.mobilenet_v1_arg_scope(
118 |         is_training=False, weight_decay=0.0)
119 |     with slim.arg_scope(scope):
120 |       logits, _ = mobilenet_v1.mobilenet_v1(
121 |           inputs,
122 |           is_training=False,
123 |           depth_multiplier=FLAGS.depth_multiplier,
124 |           num_classes=FLAGS.num_classes)
125 | 
126 |     if FLAGS.quantize:
127 |       tf.contrib.quantize.create_eval_graph()
128 | 
129 |     eval_ops = metrics(logits, labels)
130 | 
131 |   return g, eval_ops
132 | 
133 | 
134 | def eval_model():
135 |   """Evaluates mobilenet_v1."""
136 |   g, eval_ops = build_model()
137 |   with g.as_default():
138 |     num_batches = math.ceil(FLAGS.num_examples / float(FLAGS.batch_size))
139 |     slim.evaluation.evaluate_once(
140 |         FLAGS.master,
141 |         FLAGS.checkpoint_dir,
142 |         logdir=FLAGS.eval_dir,
143 |         num_evals=num_batches,
144 |         eval_op=eval_ops)
145 | 
146 | 
147 | def main(unused_arg):
148 |   eval_model()
149 | 
150 | 
151 | if __name__ == '__main__':
152 |   tf.app.run(main)
153 | 


--------------------------------------------------------------------------------
/augmentation/test.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import cv2
  3 | import numpy as np
  4 | 
  5 | 
  6 | def rotate_image(image, angle):
  7 |     """
  8 |     Rotates an OpenCV 2 / NumPy image about it's centre by the given angle
  9 |     (in degrees). The returned image will be large enough to hold the entire
 10 |     new image, with a black background
 11 |     """
 12 | 
 13 |     # Get the image size
 14 |     # No that's not an error - NumPy stores image matricies backwards
 15 |     image_size = (image.shape[1], image.shape[0])
 16 |     image_center = tuple(np.array(image_size) / 2)
 17 | 
 18 |     # Convert the OpenCV 3x2 rotation matrix to 3x3
 19 |     rot_mat = np.vstack(
 20 |         [cv2.getRotationMatrix2D(image_center, angle, 1.0), [0, 0, 1]]
 21 |     )
 22 | 
 23 |     rot_mat_notranslate = np.matrix(rot_mat[0:2, 0:2])
 24 | 
 25 |     # Shorthand for below calcs
 26 |     image_w2 = image_size[0] * 0.5
 27 |     image_h2 = image_size[1] * 0.5
 28 | 
 29 |     # Obtain the rotated coordinates of the image corners
 30 |     rotated_coords = [
 31 |         (np.array([-image_w2,  image_h2]) * rot_mat_notranslate).A[0],
 32 |         (np.array([ image_w2,  image_h2]) * rot_mat_notranslate).A[0],
 33 |         (np.array([-image_w2, -image_h2]) * rot_mat_notranslate).A[0],
 34 |         (np.array([ image_w2, -image_h2]) * rot_mat_notranslate).A[0]
 35 |     ]
 36 | 
 37 |     # Find the size of the new image
 38 |     x_coords = [pt[0] for pt in rotated_coords]
 39 |     x_pos = [x for x in x_coords if x > 0]
 40 |     x_neg = [x for x in x_coords if x < 0]
 41 | 
 42 |     y_coords = [pt[1] for pt in rotated_coords]
 43 |     y_pos = [y for y in y_coords if y > 0]
 44 |     y_neg = [y for y in y_coords if y < 0]
 45 | 
 46 |     right_bound = max(x_pos)
 47 |     left_bound = min(x_neg)
 48 |     top_bound = max(y_pos)
 49 |     bot_bound = min(y_neg)
 50 | 
 51 |     new_w = int(abs(right_bound - left_bound))
 52 |     new_h = int(abs(top_bound - bot_bound))
 53 | 
 54 |     # We require a translation matrix to keep the image centred
 55 |     trans_mat = np.matrix([
 56 |         [1, 0, int(new_w * 0.5 - image_w2)],
 57 |         [0, 1, int(new_h * 0.5 - image_h2)],
 58 |         [0, 0, 1]
 59 |     ])
 60 | 
 61 |     # Compute the tranform for the combined rotation and translation
 62 |     affine_mat = (np.matrix(trans_mat) * np.matrix(rot_mat))[0:2, :]
 63 | 
 64 |     # Apply the transform
 65 |     result = cv2.warpAffine(
 66 |         image,
 67 |         affine_mat,
 68 |         (new_w, new_h),
 69 |         flags=cv2.INTER_LINEAR
 70 |     )
 71 | 
 72 |     return result
 73 | 
 74 | 
 75 | def largest_rotated_rect(w, h, angle):
 76 |     """
 77 |     Given a rectangle of size wxh that has been rotated by 'angle' (in
 78 |     radians), computes the width and height of the largest possible
 79 |     axis-aligned rectangle within the rotated rectangle.
 80 | 
 81 |     Original JS code by 'Andri' and Magnus Hoff from Stack Overflow
 82 | 
 83 |     Converted to Python by Aaron Snoswell
 84 |     """
 85 | 
 86 |     quadrant = int(math.floor(angle / (math.pi / 2))) & 3
 87 |     sign_alpha = angle if ((quadrant & 1) == 0) else math.pi - angle
 88 |     alpha = (sign_alpha % math.pi + math.pi) % math.pi
 89 | 
 90 |     bb_w = w * math.cos(alpha) + h * math.sin(alpha)
 91 |     bb_h = w * math.sin(alpha) + h * math.cos(alpha)
 92 | 
 93 |     gamma = math.atan2(bb_w, bb_w) if (w < h) else math.atan2(bb_w, bb_w)
 94 | 
 95 |     delta = math.pi - alpha - gamma
 96 | 
 97 |     length = h if (w < h) else w
 98 | 
 99 |     d = length * math.cos(alpha)
100 |     a = d * math.sin(alpha) / math.sin(delta)
101 | 
102 |     y = a * math.cos(gamma)
103 |     x = y * math.tan(gamma)
104 | 
105 |     return (
106 |         bb_w - 2 * x,
107 |         bb_h - 2 * y
108 |     )
109 | 
110 | 
111 | def crop_around_center(image, width, height):
112 |     """
113 |     Given a NumPy / OpenCV 2 image, crops it to the given width and height,
114 |     around it's centre point
115 |     """
116 | 
117 |     image_size = (image.shape[1], image.shape[0])
118 |     image_center = (int(image_size[0] * 0.5), int(image_size[1] * 0.5))
119 | 
120 |     if(width > image_size[0]):
121 |         width = image_size[0]
122 | 
123 |     if(height > image_size[1]):
124 |         height = image_size[1]
125 | 
126 |     x1 = int(image_center[0] - width * 0.5)
127 |     x2 = int(image_center[0] + width * 0.5)
128 |     y1 = int(image_center[1] - height * 0.5)
129 |     y2 = int(image_center[1] + height * 0.5)
130 | 
131 |     return image[y1:y2, x1:x2]
132 | 
133 | 
134 | def demo():
135 |     """
136 |     Demos the largest_rotated_rect function
137 |     """
138 | 
139 |     image = cv2.imread("/home/give/Game/OCR/data/ICPR/rename/100/image_100/10.png")
140 |     print np.shape(image)
141 |     image_height, image_width = image.shape[0:2]
142 | 
143 |     cv2.imshow("Original Image", image)
144 | 
145 |     print "Press [enter] to begin the demo"
146 |     print "Press [q] or Escape to quit"
147 | 
148 |     key = cv2.waitKey(0)
149 |     if key == ord("q") or key == 27:
150 |         exit()
151 | 
152 |     for i in np.arange(0, 360, 0.5):
153 |         image_orig = np.copy(image)
154 |         image_rotated = rotate_image(image, i)
155 |         image_rotated_cropped = crop_around_center(
156 |             image_rotated,
157 |             *largest_rotated_rect(
158 |                 image_width,
159 |                 image_height,
160 |                 math.radians(i)
161 |             )
162 |         )
163 | 
164 |         key = cv2.waitKey(2)
165 |         if(key == ord("q") or key == 27):
166 |             exit()
167 | 
168 |         cv2.imshow("Original Image", image_orig)
169 |         cv2.imshow("Rotated Image", image_rotated)
170 |         cv2.imshow("Cropped Image", image_rotated_cropped)
171 | 
172 |     print "Done"
173 | 
174 | 
175 | if __name__ == "__main__":
176 |     demo()


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | - ICPR2018举办的文本检测比赛,详细链接[请戳](https://tianchi.aliyun.com/competition/introduction.htm?spm=5176.100066.0.0.6acdd780TvrRix&raceId=231651).
 2 | - 任务描述:从一副图像中检测出文本所在的位置.
 3 | - 先后尝试了三种做法:
 4 | 	- 基于Faster RCNN的CTPN方法,[代码链接](https://github.com/eragonruan/text-detection-ctpn). 直接加载训练好的模型在数据集上测试F1 score只是0.10数量级. 关于CTPN的详情请看我的这篇博文---[\[论文阅读\]---CTPN](https://blog.csdn.net/liangdong2014/article/details/79690118)
 5 | 	- 基于U-Net的EAST, [代码链接](https://github.com/argman/EAST). 直接加载训练好的模型在数据集上测试F1 score是0.20数量级. 关于East的详情请看我的这篇博文---[\[论文阅读\]---EAST](https://blog.csdn.net/liangdong2014/article/details/79857061)
 6 | 	- 基于U-Net的PixelLink的方法,这个没有开源的代码,论文[请戳](https://arxiv.org/pdf/1801.01315.pdf). 该文章也是基于U-Net的方法,它认为传统的基于Faster RCNN的方法需要设置proposal的大小,对尺度不具有任意性.而基于U-Net的EAST,它计算几何的loss也和CTPN一样,是对localization的一个regression处理.作者认为,我们可以直接从text/ non text的prediction中得到text 的bounding box,所以作者认为没有必要计算这个regression.他们直接从score prediction中通过opencv 的min_areaRect方法计算得到bounding box.
 7 | - 选择:
 8 | 	- 我最后选择的第二种,基于U-Net的EAST, 首先我认为基于U-Net的方法可以理论上实现对detection object的尺度任意性. 而PixelLink方法他对处理有大面积overlap的处理方法并不太合适(也许是我理解的有问题),详情参加下面
 9 | 	- PixelLink是怎么处理有Overlap现象的呢?正如论文中提到的,PixelLink方法中有两个ground truth, 一个是label map(1通道),代表每个Pixel是否是text, 一个是link map(8 通道),它代表的是每个pixel的8领域所对应的元素是否和自己在同一个text instance中. 如果在则为1,否则为0. 如果有overlap的画,作者将其处理overlap的score map和link map都置为0,这在一些图片上是没有问题的,例如图1,但是在图2中就会存在问题,会将两个有overlap的bounding box划分成四个bounding box. 这个问题对ICPR的数据集来说影响还是很大的.
10 | ![image](http://ocnsbpp0d.bkt.clouddn.com/PixelLink.png)
11 | 
12 | ![image](http://ocnsbpp0d.bkt.clouddn.com/14.png)
13 | - 尝试改进:
14 | 	- 先后尝试了8个版本
15 | 	- east_icdar2015_resnet_v1_50_rbox: 首先我们发现将EAST直接反卷积到原图尺寸效果会比较好(原版本是反卷积到原图尺寸的1/4或者是1/2).可能是因为我们的数据集中小的text instance比较少的原因吧
16 | 	- east_icdar2015_resnet_v1_50_rbox_v1: 在原来的版本中,在处理反卷积这一块作者只使用了resize, 可能是为了避免棋盘效应的, 我是在resize后面增加了一层卷积.保存的是在上面的基础上使用(conv+resize)代替(resize)的版本
17 | 	- east_icdar2015_resnet_v1_50_rbox_v2: 保存的是在上面的基础上使用OHEM的版本(只对geometry使用OHEM，对score map不使用). 在PixelLink论文中看到他使用OHEM方法来选择hard negative pixel,以避免正负text pixel的个数不平衡的问题.
18 | 	- east_icdar2015_resnet_v1_50_rbox_v3: 在V2的基础上改用Inception-ResNet model. 将原来的ResNet50 改造成Inception_ResNet model
19 | 	- east_icdar2015_resnet_v1_50_rbox_v4: 在V3的基础上加上instance-balanced cross entropy loss的结果. 这也是在PixelLink中使用的方法,主要是为了避免不同size的text instance对loss造成的影响不同,大的造成的影响大. 其实后来我也发现了,EAST model is not effective with longer text instance than shorter text instance
20 | 	- east_icdar2015_resnet_v1_50_rbox_v5: 在V4的基础上使用BLSTM提取全局的特征. 这里是想结合一下CTPN,因为CTPN中使用BLSTM去提取了global的Feature. 出发点是想让每个pixel的感受野更宽广,使得对大的pixel预测的更准. 在这里的做法是对每一层即将进行反卷积的Feature map使用LSTM去提取特征.
21 | 	- east_icdar2015_resnet_v1_50_rbox_v6: 在V4的基础上又增加了一个优化的branch---IoU Loss, 主要是因为我们在测试阶段只使用score来作为bounding box的得分是有点不公平的. 这里的出发点是因为EAST模型中,在test 阶段,我们是使用该pixel的score得分来作为整个bounding box的得分的,这其实是不公平的,只用一个点代表整个bounding box可能存在一定的偶然性. 为了解决该问题,我们想出了两种解决方案:
22 | 		- 在测试阶段,使用bounding box内部score的均值作为该bounding box的得分,实验结果表明有改善,但是改善幅度不大. 反而会大幅度增加测试阶段的耗时.
23 | 		- 正如我们前面所说,我们再EAST的基础上,再增加一个branch,计算每个bounding box的IoU 值,然后与预测得到的IoU计算一个Smooth L1 loss,该方法的问题是在训练阶段耗时会超大大幅度增加,因为假设我们图片的大小是512*512,那么针对一幅图像我们每次都要根据geometry prediction和score prediction计算512*512个IoU,这还是在CPU上计算的(gpu不会...),所以会很慢,由于时间关系没有跑下去.
24 | 	- east_icdar2015_resnet_v1_50_rbox_v7: 在V4的基础上使用了instance-banlanced的weights.
25 | - 数据增广
26 | 	- rotate
27 | 	- Flipud 水平镜像
28 | 	- Fliplr 垂直镜像
29 | 	- 随机Dropout
30 | 	- 随机增加噪声
31 | - 效果
32 | 	- 最开始的版本
33 | 	    - Evulation by owner method（153578 step）
34 | 	        - Precision is 0.5885
35 | 	        - Recall is 0.4008
36 | 	        - F1 score is 0.4769
37 | 	    - Evulation by ICDAR method （153578 step）
38 | 	        - "precision": 0.7764084507042254
39 | 	        - "recall": 0.43192948090107736
40 | 	        - "hmean": 0.5550660792951542
41 | 	- 添加反卷积，使其反卷积到原图尺寸
42 | 	    - Evulation by owner method（408899 step）
43 | 	        - Precision is 0.5705
44 | 	        - Recall is 0.4433
45 | 	        - F1 score is 0.4989
46 | 	    - Evulation by ICDAR method （408899 step）
47 | 	        - "recall": 0.5039177277179236
48 | 	        - "precision": 0.7516435354273192
49 | 	        - "hmean": 0.6033421284080915
50 | 	- 添加反卷积，使其反卷积到原图尺寸+conv代替unpool+score map加入ｇｅｏｍｅｔｒｙ中去
51 | 	    - Evulation by owner method（110930 step）
52 | 	        - Precision is　0.5343
53 | 	        - Recall is　0.4330
54 | 	        - F1 score is 0.4784
55 | 	- 添加反卷积，使其反卷积到原图尺寸+conv代替unpool+score map加入ｇｅｏｍｅｔｒｙ中去 + OHEM for geometry
56 | 	    - Evulation by owner method（148415 step）
57 | 	        - Precision is 0.5461
58 | 	        - Recall is 0.4589
59 | 	        - F1 score is 0.4987
60 | 	    - Evulation by ICDAR method（148415 step）
61 | 	        - "recall": 0.5269343780607247
62 | 	        - "precision": 0.727027027027027
63 | 	        - "hmean": 0.6110164679159569
64 | 	- Inceptio-ResNet Version(Modify V3)
65 | 	    - Evulation by owner method（257009 step）
66 | 	        - Precision is 0.5315
67 | 	        - Recall is 0.4306
68 | 	        - F1 score is 0.4758
69 | 	    - Evulation by ICDAR method (257009 step )
70 | 	        - "recall": 0.5259549461312438
71 | 	        - "precision": 0.7351129363449692\
72 | 	        - "hmean": 0.6131886954039394
73 | 	- Inceptio-ResNet Version + balanced cross_entropy loss + without OHEM(Modify V4)
74 | 	    - Evulation by owner method（423777 step）
75 | 	        - Precision is 0.5633
76 | 	        - Recall is 0.4601
77 | 	        - F1 score is 0.5065
78 | 	        - 1000 vresion: 0.51
79 | 	    - Evulation by ICDAR method (423777 step )
80 | 	        - "recall": 0.539177277179236
81 | 	        - "precision": 0.7582644628099173
82 | 	        - "hmean": 0.6302232398397253
83 | - Future
84 | 	- 其实个人感觉基于U-Net的方法应该是未来的主流,主要原因:
85 | 		- 代码简洁易懂,解决同一个问题,如果能取得相同的效果,人们肯定喜欢用简单的方法
86 | 		- U-Net对object的尺度具有一定的任意性.
87 | 	- 还有什么可以改进的?
88 | 		- EAST还是对长的text instance识别的不准确,仅凭我上述说的方法并不能解决该问题.
89 | 		- 还有一个就是在进行nms时候,pixel的score不能很公平的代表整个bounding box的得分.
90 | - 整体的代码:[UpCoder-EAST](https://github.com/UpCoder/ICPR_TextDection),主要还是根据原始版本的EAST方法改的,原始的EAST版本:[EAST](https://github.com/argman/EAST)
91 | 


--------------------------------------------------------------------------------
/nets/NASNet/pnasnet_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for slim.pnasnet."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | from nets.NASNet import pnasnet
 23 | 
 24 | 
 25 | 
 26 | slim = tf.contrib.slim
 27 | 
 28 | 
 29 | class PNASNetTest(tf.test.TestCase):
 30 | 
 31 |   def testBuildLogitsLargeModel(self):
 32 |     batch_size = 5
 33 |     height, width = 331, 331
 34 |     num_classes = 1000
 35 |     inputs = tf.random_uniform((batch_size, height, width, 3))
 36 |     tf.train.create_global_step()
 37 |     with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()):
 38 |       logits, end_points = pnasnet.build_pnasnet_large(inputs, num_classes)
 39 |     auxlogits = end_points['AuxLogits']
 40 |     predictions = end_points['Predictions']
 41 |     self.assertListEqual(auxlogits.get_shape().as_list(),
 42 |                          [batch_size, num_classes])
 43 |     self.assertListEqual(logits.get_shape().as_list(),
 44 |                          [batch_size, num_classes])
 45 |     self.assertListEqual(predictions.get_shape().as_list(),
 46 |                          [batch_size, num_classes])
 47 | 
 48 |   def testBuildPreLogitsLargeModel(self):
 49 |     batch_size = 5
 50 |     height, width = 331, 331
 51 |     num_classes = None
 52 |     inputs = tf.random_uniform((batch_size, height, width, 3))
 53 |     tf.train.create_global_step()
 54 |     with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()):
 55 |       net, end_points = pnasnet.build_pnasnet_large(inputs, num_classes)
 56 |     self.assertFalse('AuxLogits' in end_points)
 57 |     self.assertFalse('Predictions' in end_points)
 58 |     self.assertTrue(net.op.name.startswith('final_layer/Mean'))
 59 |     self.assertListEqual(net.get_shape().as_list(), [batch_size, 4320])
 60 | 
 61 |   def testAllEndPointsShapesLargeModel(self):
 62 |     batch_size = 5
 63 |     height, width = 331, 331
 64 |     num_classes = 1000
 65 |     inputs = tf.random_uniform((batch_size, height, width, 3))
 66 |     tf.train.create_global_step()
 67 |     with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()):
 68 |       _, end_points = pnasnet.build_pnasnet_large(inputs, num_classes)
 69 | 
 70 |     endpoints_shapes = {'Stem': [batch_size, 42, 42, 540],
 71 |                         'Cell_0': [batch_size, 42, 42, 1080],
 72 |                         'Cell_1': [batch_size, 42, 42, 1080],
 73 |                         'Cell_2': [batch_size, 42, 42, 1080],
 74 |                         'Cell_3': [batch_size, 42, 42, 1080],
 75 |                         'Cell_4': [batch_size, 21, 21, 2160],
 76 |                         'Cell_5': [batch_size, 21, 21, 2160],
 77 |                         'Cell_6': [batch_size, 21, 21, 2160],
 78 |                         'Cell_7': [batch_size, 21, 21, 2160],
 79 |                         'Cell_8': [batch_size, 11, 11, 4320],
 80 |                         'Cell_9': [batch_size, 11, 11, 4320],
 81 |                         'Cell_10': [batch_size, 11, 11, 4320],
 82 |                         'Cell_11': [batch_size, 11, 11, 4320],
 83 |                         'global_pool': [batch_size, 4320],
 84 |                         # Logits and predictions
 85 |                         'AuxLogits': [batch_size, 1000],
 86 |                         'Predictions': [batch_size, 1000],
 87 |                         'Logits': [batch_size, 1000],
 88 |                        }
 89 |     self.assertEqual(len(end_points), 17)
 90 |     self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
 91 |     for endpoint_name in endpoints_shapes:
 92 |       tf.logging.info('Endpoint name: {}'.format(endpoint_name))
 93 |       expected_shape = endpoints_shapes[endpoint_name]
 94 |       self.assertIn(endpoint_name, end_points)
 95 |       self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
 96 |                            expected_shape)
 97 | 
 98 |   def testNoAuxHeadLargeModel(self):
 99 |     batch_size = 5
100 |     height, width = 331, 331
101 |     num_classes = 1000
102 |     for use_aux_head in (True, False):
103 |       tf.reset_default_graph()
104 |       inputs = tf.random_uniform((batch_size, height, width, 3))
105 |       tf.train.create_global_step()
106 |       config = pnasnet.large_imagenet_config()
107 |       config.set_hparam('use_aux_head', int(use_aux_head))
108 |       with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()):
109 |         _, end_points = pnasnet.build_pnasnet_large(inputs, num_classes,
110 |                                                     config=config)
111 |       self.assertEqual('AuxLogits' in end_points, use_aux_head)
112 | 
113 |   def testOverrideHParamsLargeModel(self):
114 |     batch_size = 5
115 |     height, width = 331, 331
116 |     num_classes = 1000
117 |     inputs = tf.random_uniform((batch_size, height, width, 3))
118 |     tf.train.create_global_step()
119 |     config = pnasnet.large_imagenet_config()
120 |     config.set_hparam('data_format', 'NCHW')
121 |     with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()):
122 |       _, end_points = pnasnet.build_pnasnet_large(
123 |           inputs, num_classes, config=config)
124 |     self.assertListEqual(
125 |         end_points['Stem'].shape.as_list(), [batch_size, 540, 42, 42])
126 | 
127 | 
128 | if __name__ == '__main__':
129 |   tf.test.main()
130 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/overfeat.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains the model definition for the OverFeat network.
 16 | 
 17 | The definition for the network was obtained from:
 18 |   OverFeat: Integrated Recognition, Localization and Detection using
 19 |   Convolutional Networks
 20 |   Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
 21 |   Yann LeCun, 2014
 22 |   http://arxiv.org/abs/1312.6229
 23 | 
 24 | Usage:
 25 |   with slim.arg_scope(overfeat.overfeat_arg_scope()):
 26 |     outputs, end_points = overfeat.overfeat(inputs)
 27 | 
 28 | @@overfeat
 29 | """
 30 | from __future__ import absolute_import
 31 | from __future__ import division
 32 | from __future__ import print_function
 33 | 
 34 | import tensorflow as tf
 35 | 
 36 | slim = tf.contrib.slim
 37 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
 38 | 
 39 | 
 40 | def overfeat_arg_scope(weight_decay=0.0005):
 41 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
 42 |                       activation_fn=tf.nn.relu,
 43 |                       weights_regularizer=slim.l2_regularizer(weight_decay),
 44 |                       biases_initializer=tf.zeros_initializer()):
 45 |     with slim.arg_scope([slim.conv2d], padding='SAME'):
 46 |       with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
 47 |         return arg_sc
 48 | 
 49 | 
 50 | def overfeat(inputs,
 51 |              num_classes=1000,
 52 |              is_training=True,
 53 |              dropout_keep_prob=0.5,
 54 |              spatial_squeeze=True,
 55 |              scope='overfeat',
 56 |              global_pool=False):
 57 |   """Contains the model definition for the OverFeat network.
 58 | 
 59 |   The definition for the network was obtained from:
 60 |     OverFeat: Integrated Recognition, Localization and Detection using
 61 |     Convolutional Networks
 62 |     Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
 63 |     Yann LeCun, 2014
 64 |     http://arxiv.org/abs/1312.6229
 65 | 
 66 |   Note: All the fully_connected layers have been transformed to conv2d layers.
 67 |         To use in classification mode, resize input to 231x231. To use in fully
 68 |         convolutional mode, set spatial_squeeze to false.
 69 | 
 70 |   Args:
 71 |     inputs: a tensor of size [batch_size, height, width, channels].
 72 |     num_classes: number of predicted classes. If 0 or None, the logits layer is
 73 |       omitted and the input features to the logits layer are returned instead.
 74 |     is_training: whether or not the model is being trained.
 75 |     dropout_keep_prob: the probability that activations are kept in the dropout
 76 |       layers during training.
 77 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
 78 |       outputs. Useful to remove unnecessary dimensions for classification.
 79 |     scope: Optional scope for the variables.
 80 |     global_pool: Optional boolean flag. If True, the input to the classification
 81 |       layer is avgpooled to size 1x1, for any input size. (This is not part
 82 |       of the original OverFeat.)
 83 | 
 84 |   Returns:
 85 |     net: the output of the logits layer (if num_classes is a non-zero integer),
 86 |       or the non-dropped-out input to the logits layer (if num_classes is 0 or
 87 |       None).
 88 |     end_points: a dict of tensors with intermediate activations.
 89 |   """
 90 |   with tf.variable_scope(scope, 'overfeat', [inputs]) as sc:
 91 |     end_points_collection = sc.original_name_scope + '_end_points'
 92 |     # Collect outputs for conv2d, fully_connected and max_pool2d
 93 |     with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
 94 |                         outputs_collections=end_points_collection):
 95 |       net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
 96 |                         scope='conv1')
 97 |       net = slim.max_pool2d(net, [2, 2], scope='pool1')
 98 |       net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2')
 99 |       net = slim.max_pool2d(net, [2, 2], scope='pool2')
100 |       net = slim.conv2d(net, 512, [3, 3], scope='conv3')
101 |       net = slim.conv2d(net, 1024, [3, 3], scope='conv4')
102 |       net = slim.conv2d(net, 1024, [3, 3], scope='conv5')
103 |       net = slim.max_pool2d(net, [2, 2], scope='pool5')
104 | 
105 |       # Use conv2d instead of fully_connected layers.
106 |       with slim.arg_scope([slim.conv2d],
107 |                           weights_initializer=trunc_normal(0.005),
108 |                           biases_initializer=tf.constant_initializer(0.1)):
109 |         net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6')
110 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
111 |                            scope='dropout6')
112 |         net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
113 |         # Convert end_points_collection into a end_point dict.
114 |         end_points = slim.utils.convert_collection_to_dict(
115 |             end_points_collection)
116 |         if global_pool:
117 |           net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
118 |           end_points['global_pool'] = net
119 |         if num_classes:
120 |           net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
121 |                              scope='dropout7')
122 |           net = slim.conv2d(net, num_classes, [1, 1],
123 |                             activation_fn=None,
124 |                             normalizer_fn=None,
125 |                             biases_initializer=tf.zeros_initializer(),
126 |                             scope='fc8')
127 |           if spatial_squeeze:
128 |             net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
129 |           end_points[sc.name + '/fc8'] = net
130 |       return net, end_points
131 | overfeat.default_image_size = 231
132 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/alexnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains a model definition for AlexNet.
 16 | 
 17 | This work was first described in:
 18 |   ImageNet Classification with Deep Convolutional Neural Networks
 19 |   Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton
 20 | 
 21 | and later refined in:
 22 |   One weird trick for parallelizing convolutional neural networks
 23 |   Alex Krizhevsky, 2014
 24 | 
 25 | Here we provide the implementation proposed in "One weird trick" and not
 26 | "ImageNet Classification", as per the paper, the LRN layers have been removed.
 27 | 
 28 | Usage:
 29 |   with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):
 30 |     outputs, end_points = alexnet.alexnet_v2(inputs)
 31 | 
 32 | @@alexnet_v2
 33 | """
 34 | 
 35 | from __future__ import absolute_import
 36 | from __future__ import division
 37 | from __future__ import print_function
 38 | 
 39 | import tensorflow as tf
 40 | 
 41 | slim = tf.contrib.slim
 42 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
 43 | 
 44 | 
 45 | def alexnet_v2_arg_scope(weight_decay=0.0005):
 46 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
 47 |                       activation_fn=tf.nn.relu,
 48 |                       biases_initializer=tf.constant_initializer(0.1),
 49 |                       weights_regularizer=slim.l2_regularizer(weight_decay)):
 50 |     with slim.arg_scope([slim.conv2d], padding='SAME'):
 51 |       with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
 52 |         return arg_sc
 53 | 
 54 | 
 55 | def alexnet_v2(inputs,
 56 |                num_classes=1000,
 57 |                is_training=True,
 58 |                dropout_keep_prob=0.5,
 59 |                spatial_squeeze=True,
 60 |                scope='alexnet_v2',
 61 |                global_pool=False):
 62 |   """AlexNet version 2.
 63 | 
 64 |   Described in: http://arxiv.org/pdf/1404.5997v2.pdf
 65 |   Parameters from:
 66 |   github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
 67 |   layers-imagenet-1gpu.cfg
 68 | 
 69 |   Note: All the fully_connected layers have been transformed to conv2d layers.
 70 |         To use in classification mode, resize input to 224x224 or set
 71 |         global_pool=True. To use in fully convolutional mode, set
 72 |         spatial_squeeze to false.
 73 |         The LRN layers have been removed and change the initializers from
 74 |         random_normal_initializer to xavier_initializer.
 75 | 
 76 |   Args:
 77 |     inputs: a tensor of size [batch_size, height, width, channels].
 78 |     num_classes: the number of predicted classes. If 0 or None, the logits layer
 79 |     is omitted and the input features to the logits layer are returned instead.
 80 |     is_training: whether or not the model is being trained.
 81 |     dropout_keep_prob: the probability that activations are kept in the dropout
 82 |       layers during training.
 83 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
 84 |       logits. Useful to remove unnecessary dimensions for classification.
 85 |     scope: Optional scope for the variables.
 86 |     global_pool: Optional boolean flag. If True, the input to the classification
 87 |       layer is avgpooled to size 1x1, for any input size. (This is not part
 88 |       of the original AlexNet.)
 89 | 
 90 |   Returns:
 91 |     net: the output of the logits layer (if num_classes is a non-zero integer),
 92 |       or the non-dropped-out input to the logits layer (if num_classes is 0
 93 |       or None).
 94 |     end_points: a dict of tensors with intermediate activations.
 95 |   """
 96 |   with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
 97 |     end_points_collection = sc.original_name_scope + '_end_points'
 98 |     # Collect outputs for conv2d, fully_connected and max_pool2d.
 99 |     with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
100 |                         outputs_collections=[end_points_collection]):
101 |       net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
102 |                         scope='conv1')
103 |       net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
104 |       net = slim.conv2d(net, 192, [5, 5], scope='conv2')
105 |       net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
106 |       net = slim.conv2d(net, 384, [3, 3], scope='conv3')
107 |       net = slim.conv2d(net, 384, [3, 3], scope='conv4')
108 |       net = slim.conv2d(net, 256, [3, 3], scope='conv5')
109 |       net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
110 | 
111 |       # Use conv2d instead of fully_connected layers.
112 |       with slim.arg_scope([slim.conv2d],
113 |                           weights_initializer=trunc_normal(0.005),
114 |                           biases_initializer=tf.constant_initializer(0.1)):
115 |         net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
116 |                           scope='fc6')
117 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
118 |                            scope='dropout6')
119 |         net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
120 |         # Convert end_points_collection into a end_point dict.
121 |         end_points = slim.utils.convert_collection_to_dict(
122 |             end_points_collection)
123 |         if global_pool:
124 |           net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
125 |           end_points['global_pool'] = net
126 |         if num_classes:
127 |           net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
128 |                              scope='dropout7')
129 |           net = slim.conv2d(net, num_classes, [1, 1],
130 |                             activation_fn=None,
131 |                             normalizer_fn=None,
132 |                             biases_initializer=tf.zeros_initializer(),
133 |                             scope='fc8')
134 |           if spatial_squeeze:
135 |             net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
136 |           end_points[sc.name + '/fc8'] = net
137 |       return net, end_points
138 | alexnet_v2.default_image_size = 224
139 | 


--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
  1 | # -*- coding=utf-8 -*-
  2 | import numpy as np
  3 | from PIL import ImageDraw, Image
  4 | from scipy.spatial import distance as dist
  5 | import cv2
  6 | 
  7 | def read_from_gt(gt_file):
  8 |     with open(gt_file) as file:
  9 |         lines = file.readlines()
 10 |         gt_bboxs = []
 11 |         txts = []
 12 |         for line in lines:
 13 |             splited_line = line.split(',')
 14 |             splited_line_num = splited_line[:8]
 15 |             splited_line_num = [int(float(ele)) for ele in splited_line_num]
 16 |             gt_bboxs.append(splited_line_num)
 17 |             txts.append(splited_line[8])
 18 |         return gt_bboxs, txts
 19 | 
 20 | 
 21 | def show_image_from_array(image_arr):
 22 |     from PIL import Image
 23 |     img = Image.fromarray(image_arr)
 24 |     img.show()
 25 | 
 26 | 
 27 | def vis_img_bbox(img_file, gt_file):
 28 |     img = cv2.imread(img_file)[:, :, ::-1]
 29 |     gtbboxes = np.asarray(read_from_gt(gt_file)[0])
 30 |     print np.shape(gtbboxes)
 31 |     for box in gtbboxes:
 32 |         cv2.polylines(img[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0),
 33 |                       thickness=1)
 34 |     show_image_from_array(img)
 35 | 
 36 | def save_gt_file(save_gt_path, coordinations, txts=None):
 37 |     with open(save_gt_path, 'wb+') as f:
 38 |         strs = []
 39 |         start_index = 0
 40 |         for idx in range(len(coordinations)):
 41 |             cur_str = ','.join([str(element) for element in coordinations[start_index]])
 42 |             if txts is None:
 43 |                 # cur_str += (',' + txts[idx])
 44 |                 cur_str += ',TXT\n'
 45 |             else:
 46 |                 txts[start_index] = str(txts[start_index]).replace('\n', '')
 47 |                 cur_str += (',' + txts[start_index] + '\n')
 48 |             strs.append(cur_str)
 49 |             start_index += 1
 50 |         f.writelines(strs)
 51 |         f.close()
 52 | 
 53 | def cal_TP(overlaps, threshold=0.7):
 54 |     shape = list(np.shape(overlaps))
 55 |     count = 0
 56 |     for i in range(shape[0]):
 57 |         max_val = np.max(overlaps[i])
 58 |         if max_val >= threshold:
 59 |             count += 1
 60 |     return count
 61 | 
 62 | 
 63 | def cal_FP(overlaps, len_pred, threshold=0.7):
 64 |     return len_pred - cal_TP(overlaps, threshold)
 65 | 
 66 | 
 67 | def cal_FN(overlaps, len_gt, threshold=0.7):
 68 |     '''
 69 |     找到被漏检的，也就是说实际上是gt，但是没有被任何bbox检测出来
 70 |     :param overlaps:
 71 |     :param len_gt:
 72 |     :param threshold:
 73 |     :return:
 74 |     '''
 75 |     if (len_gt - cal_TP(overlaps, threshold)) < 0:
 76 |         print 'Error, FN is negative'
 77 |         assert False
 78 |     max_value = np.max(overlaps, axis=0)
 79 |     return np.sum(max_value < threshold)
 80 |     # return len_gt - cal_TP(overlaps, threshold)
 81 | def draw_rect(img_arr, box):
 82 |     points = [[0, 0], [0, 0], [0, 0], [0, 0]]
 83 |     ind = 0
 84 |     for i in range(4):
 85 |         for j in range(2):
 86 |             points[i][j] = box[ind]
 87 |             ind += 1
 88 |         points[i] = tuple(points[i])
 89 |     img = Image.fromarray(np.array(img_arr))
 90 |     img_draw = ImageDraw.Draw(img)
 91 |     img_draw.polygon(points, fill=128)
 92 |     return img
 93 | def draw_rects(image_arr, boxs):
 94 |     img = Image.fromarray(image_arr)
 95 |     img_draw = ImageDraw.Draw(img)
 96 |     for box in boxs:
 97 |         points = [[0, 0], [0, 0], [0, 0], [0, 0]]
 98 |         ind = 0
 99 |         for i in range(4):
100 |             for j in range(2):
101 |                 points[i][j] = box[ind]
102 |                 ind += 1
103 |             points[i] = tuple(points[i])
104 |         img_draw.polygon(points, fill=128)
105 |     return img
106 | def bbox_overlaps(boxes, query_boxes, im_size):
107 |     '''
108 | 
109 |     :param boxes: (N, 8) ndarray of float, pred
110 |     :param query_boxes: (K, 8) ndarray of float, gt
111 |     :param im_size: 图像的大小
112 |     四个点的顺序如下所示
113 |     1   4
114 |     2   3
115 |     :return: (N, K) ndarray of overlap between boxes and query_boxes
116 |     '''
117 |     def cal_overlap(img1, img2):
118 |         img1 = np.array(img1)
119 |         img2 = np.array(img2)
120 |         img1 = (img1 == 128)
121 |         img2 = (img2 == 128)
122 |         return np.sum(np.logical_and(img1, img2))
123 |     N = boxes.shape[0]
124 |     K = query_boxes.shape[0]
125 |     overlaps = np.zeros((N, K), dtype=np.float32)
126 |     # draw_rects(np.zeros(im_size), query_boxes).show()
127 |     # draw_rects(np.zeros(im_size), boxes).show()
128 |     for k in range(K):
129 |         # 计算ground truth的面积
130 |         cur_gt = query_boxes[k]
131 |         gt_white_img = np.zeros(im_size, np.uint8)
132 |         gt_box_img = draw_rect(gt_white_img, cur_gt)
133 |         # gt_box_img.show()
134 |         gt_area = np.sum(np.array(gt_box_img) == 128)
135 |         for n in range(N):
136 |             # 也就是说最小的左下方的横坐标减去最大的右上方的横坐标代表的就是IoU部分的宽度
137 |             cur_bbox = boxes[n]
138 |             pred_white_img = np.zeros(im_size, np.uint8)
139 |             pred_box_img = draw_rect(pred_white_img, cur_bbox)
140 |             # pred_box_img.show()
141 |             pred_area = np.sum(np.array(pred_box_img) == 128)
142 |             overlap_area = cal_overlap(gt_box_img, pred_box_img)
143 |             overlaps[n, k] = (overlap_area * 1.0) / ((pred_area + gt_area - overlap_area) * 1.0)
144 |     return overlaps
145 | 
146 | 
147 | def order_points(pts):
148 |     # sort the points based on their x-coordinates
149 |     xSorted = pts[np.argsort(pts[:, 0]), :]
150 | 
151 |     # grab the left-most and right-most points from the sorted
152 |     # x-roodinate points
153 |     leftMost = xSorted[:2, :]
154 |     rightMost = xSorted[2:, :]
155 | 
156 |     # now, sort the left-most coordinates according to their
157 |     # y-coordinates so we can grab the top-left and bottom-left
158 |     # points, respectively
159 |     leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
160 |     (tl, bl) = leftMost
161 | 
162 |     # now that we have the top-left coordinate, use it as an
163 |     # anchor to calculate the Euclidean distance between the
164 |     # top-left and right-most points; by the Pythagorean
165 |     # theorem, the point with the largest distance will be
166 |     # our bottom-right point
167 |     D = dist.cdist(tl[np.newaxis], rightMost, "euclidean")[0]
168 |     (br, tr) = rightMost[np.argsort(D)[::-1], :]
169 | 
170 |     # return the coordinates in top-left, top-right,
171 |     # bottom-right, and bottom-left order
172 |     return np.array([tl, tr, br, bl], dtype="float32")
173 | 
174 | if __name__ == '__main__':
175 |     img_size = [400, 300]
176 |     pred_points = np.array(
177 |         [
178 |             [0, 0, 0, 100, 100, 100, 100, 0],
179 |             [77, 92, 77, 195, 483, 195, 483, 92]
180 |         ]
181 |     )
182 |     gt_points = np.array(
183 |         [
184 |             [0, 0, 0, 50, 50, 50, 50, 0],
185 |             [80, 93, 77, 195, 483, 195, 483, 92]
186 |         ]
187 |     )
188 |     print bbox_overlaps(pred_points, gt_points, img_size)


--------------------------------------------------------------------------------
/lanms/lanms.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "clipper/clipper.hpp"
  4 | 
  5 | // locality-aware NMS
  6 | namespace lanms {
  7 | 
  8 | 	namespace cl = ClipperLib;
  9 | 
 10 | 	struct Polygon {
 11 | 		cl::Path poly;
 12 | 		float score;
 13 | 	};
 14 | 
 15 | 	float paths_area(const ClipperLib::Paths &ps) {
 16 | 		float area = 0;
 17 | 		for (auto &&p: ps)
 18 | 			area += cl::Area(p);
 19 | 		return area;
 20 | 	}
 21 | 
 22 | 	float poly_iou(const Polygon &a, const Polygon &b) {
 23 | 		cl::Clipper clpr;
 24 | 		clpr.AddPath(a.poly, cl::ptSubject, true);
 25 | 		clpr.AddPath(b.poly, cl::ptClip, true);
 26 | 
 27 | 		cl::Paths inter, uni;
 28 | 		clpr.Execute(cl::ctIntersection, inter, cl::pftEvenOdd);
 29 | 		clpr.Execute(cl::ctUnion, uni, cl::pftEvenOdd);
 30 | 
 31 | 		auto inter_area = paths_area(inter),
 32 | 			 uni_area = paths_area(uni);
 33 | 		return std::abs(inter_area) / std::max(std::abs(uni_area), 1.0f);
 34 | 	}
 35 | 
 36 | 	bool should_merge(const Polygon &a, const Polygon &b, float iou_threshold) {
 37 | 		return poly_iou(a, b) > iou_threshold;
 38 | 	}
 39 | 
 40 | 	/**
 41 | 	 * Incrementally merge polygons
 42 | 	 */
 43 | 	class PolyMerger {
 44 | 		public:
 45 | 			PolyMerger(): score(0), nr_polys(0) {
 46 | 				memset(data, 0, sizeof(data));
 47 | 			}
 48 | 
 49 | 			/**
 50 | 			 * Add a new polygon to be merged.
 51 | 			 */
 52 | 			void add(const Polygon &p_given) {
 53 | 				Polygon p;
 54 | 				if (nr_polys > 0) {
 55 | 					// vertices of two polygons to merge may not in the same order;
 56 | 					// we match their vertices by choosing the ordering that
 57 | 					// minimizes the total squared distance.
 58 | 					// see function normalize_poly for details.
 59 | 					p = normalize_poly(get(), p_given);
 60 | 				} else {
 61 | 					p = p_given;
 62 | 				}
 63 | 				assert(p.poly.size() == 4);
 64 | 				auto &poly = p.poly;
 65 | 				auto s = p.score;
 66 | 				data[0] += poly[0].X * s;
 67 | 				data[1] += poly[0].Y * s;
 68 | 
 69 | 				data[2] += poly[1].X * s;
 70 | 				data[3] += poly[1].Y * s;
 71 | 
 72 | 				data[4] += poly[2].X * s;
 73 | 				data[5] += poly[2].Y * s;
 74 | 
 75 | 				data[6] += poly[3].X * s;
 76 | 				data[7] += poly[3].Y * s;
 77 | 
 78 | 				score += p.score;
 79 | 
 80 | 				nr_polys += 1;
 81 | 			}
 82 | 
 83 | 			inline std::int64_t sqr(std::int64_t x) { return x * x; }
 84 | 
 85 | 			Polygon normalize_poly(
 86 | 					const Polygon &ref,
 87 | 					const Polygon &p) {
 88 | 
 89 | 				std::int64_t min_d = std::numeric_limits<std::int64_t>::max();
 90 | 				size_t best_start = 0, best_order = 0;
 91 | 
 92 | 				for (size_t start = 0; start < 4; start ++) {
 93 | 					size_t j = start;
 94 | 					std::int64_t d = (
 95 | 							sqr(ref.poly[(j + 0) % 4].X - p.poly[(j + 0) % 4].X)
 96 | 							+ sqr(ref.poly[(j + 0) % 4].Y - p.poly[(j + 0) % 4].Y)
 97 | 							+ sqr(ref.poly[(j + 1) % 4].X - p.poly[(j + 1) % 4].X)
 98 | 							+ sqr(ref.poly[(j + 1) % 4].Y - p.poly[(j + 1) % 4].Y)
 99 | 							+ sqr(ref.poly[(j + 2) % 4].X - p.poly[(j + 2) % 4].X)
100 | 							+ sqr(ref.poly[(j + 2) % 4].Y - p.poly[(j + 2) % 4].Y)
101 | 							+ sqr(ref.poly[(j + 3) % 4].X - p.poly[(j + 3) % 4].X)
102 | 							+ sqr(ref.poly[(j + 3) % 4].Y - p.poly[(j + 3) % 4].Y)
103 | 							);
104 | 					if (d < min_d) {
105 | 						min_d = d;
106 | 						best_start = start;
107 | 						best_order = 0;
108 | 					}
109 | 
110 | 					d = (
111 | 							sqr(ref.poly[(j + 0) % 4].X - p.poly[(j + 3) % 4].X)
112 | 							+ sqr(ref.poly[(j + 0) % 4].Y - p.poly[(j + 3) % 4].Y)
113 | 							+ sqr(ref.poly[(j + 1) % 4].X - p.poly[(j + 2) % 4].X)
114 | 							+ sqr(ref.poly[(j + 1) % 4].Y - p.poly[(j + 2) % 4].Y)
115 | 							+ sqr(ref.poly[(j + 2) % 4].X - p.poly[(j + 1) % 4].X)
116 | 							+ sqr(ref.poly[(j + 2) % 4].Y - p.poly[(j + 1) % 4].Y)
117 | 							+ sqr(ref.poly[(j + 3) % 4].X - p.poly[(j + 0) % 4].X)
118 | 							+ sqr(ref.poly[(j + 3) % 4].Y - p.poly[(j + 0) % 4].Y)
119 | 						);
120 | 					if (d < min_d) {
121 | 						min_d = d;
122 | 						best_start = start;
123 | 						best_order = 1;
124 | 					}
125 | 				}
126 | 
127 | 				Polygon r;
128 | 				r.poly.resize(4);
129 | 				auto j = best_start;
130 | 				if (best_order == 0) {
131 | 					for (size_t i = 0; i < 4; i ++)
132 | 						r.poly[i] = p.poly[(j + i) % 4];
133 | 				} else {
134 | 					for (size_t i = 0; i < 4; i ++)
135 | 						r.poly[i] = p.poly[(j + 4 - i - 1) % 4];
136 | 				}
137 | 				r.score = p.score;
138 | 				return r;
139 | 			}
140 | 
141 | 			Polygon get() const {
142 | 				Polygon p;
143 | 
144 | 				auto &poly = p.poly;
145 | 				poly.resize(4);
146 | 				auto score_inv = 1.0f / std::max(1e-8f, score);
147 | 				poly[0].X = data[0] * score_inv;
148 | 				poly[0].Y = data[1] * score_inv;
149 | 				poly[1].X = data[2] * score_inv;
150 | 				poly[1].Y = data[3] * score_inv;
151 | 				poly[2].X = data[4] * score_inv;
152 | 				poly[2].Y = data[5] * score_inv;
153 | 				poly[3].X = data[6] * score_inv;
154 | 				poly[3].Y = data[7] * score_inv;
155 | 
156 | 				assert(score > 0);
157 | 				p.score = score;
158 | 
159 | 				return p;
160 | 			}
161 | 
162 | 		private:
163 | 			std::int64_t data[8];
164 | 			float score;
165 | 			std::int32_t nr_polys;
166 | 	};
167 | 
168 | 
169 | 	/**
170 | 	 * The standard NMS algorithm.
171 | 	 */
172 | 	std::vector<Polygon> standard_nms(std::vector<Polygon> &polys, float iou_threshold) {
173 | 		size_t n = polys.size();
174 | 		if (n == 0)
175 | 			return {};
176 | 		std::vector<size_t> indices(n);
177 | 		std::iota(std::begin(indices), std::end(indices), 0);
178 | 		std::sort(std::begin(indices), std::end(indices), [&](size_t i, size_t j) { return polys[i].score > polys[j].score; });
179 | 
180 | 		std::vector<size_t> keep;
181 | 		while (indices.size()) {
182 | 			size_t p = 0, cur = indices[0];
183 | 			keep.emplace_back(cur);
184 | 			for (size_t i = 1; i < indices.size(); i ++) {
185 | 				if (!should_merge(polys[cur], polys[indices[i]], iou_threshold)) {
186 | 					indices[p ++] = indices[i];
187 | 				}
188 | 			}
189 | 			indices.resize(p);
190 | 		}
191 | 
192 | 		std::vector<Polygon> ret;
193 | 		for (auto &&i: keep) {
194 | 			ret.emplace_back(polys[i]);
195 | 		}
196 | 		return ret;
197 | 	}
198 | 
199 | 	std::vector<Polygon>
200 | 		merge_quadrangle_n9(const float *data, size_t n, float iou_threshold) {
201 | 			using cInt = cl::cInt;
202 | 
203 | 			// first pass
204 | 			std::vector<Polygon> polys;
205 | 			for (size_t i = 0; i < n; i ++) {
206 | 				auto p = data + i * 9;
207 | 				Polygon poly{
208 | 					{
209 | 						{cInt(p[0]), cInt(p[1])},
210 | 						{cInt(p[2]), cInt(p[3])},
211 | 						{cInt(p[4]), cInt(p[5])},
212 | 						{cInt(p[6]), cInt(p[7])},
213 | 					},
214 | 					p[8],
215 | 				};
216 | 
217 | 				if (polys.size()) {
218 | 					// merge with the last one
219 | 					auto &bpoly = polys.back();
220 | 					if (should_merge(poly, bpoly, iou_threshold)) {
221 | 						PolyMerger merger;
222 | 						merger.add(bpoly);
223 | 						merger.add(poly);
224 | 						bpoly = merger.get();
225 | 					} else {
226 | 						polys.emplace_back(poly);
227 | 					}
228 | 				} else {
229 | 					polys.emplace_back(poly);
230 | 				}
231 | 			}
232 | 			return standard_nms(polys, iou_threshold);
233 | 		}
234 | }
235 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/pix2pix_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # =============================================================================
 15 | """Tests for pix2pix."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import tensorflow as tf
 22 | from nets import pix2pix
 23 | 
 24 | 
 25 | class GeneratorTest(tf.test.TestCase):
 26 | 
 27 |   def test_nonsquare_inputs_raise_exception(self):
 28 |     batch_size = 2
 29 |     height, width = 240, 320
 30 |     num_outputs = 4
 31 | 
 32 |     images = tf.ones((batch_size, height, width, 3))
 33 | 
 34 |     with self.assertRaises(ValueError):
 35 |       with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
 36 |         pix2pix.pix2pix_generator(
 37 |             images, num_outputs, upsample_method='nn_upsample_conv')
 38 | 
 39 |   def _reduced_default_blocks(self):
 40 |     """Returns the default blocks, scaled down to make test run faster."""
 41 |     return [pix2pix.Block(b.num_filters // 32, b.decoder_keep_prob)
 42 |             for b in pix2pix._default_generator_blocks()]
 43 | 
 44 |   def test_output_size_nn_upsample_conv(self):
 45 |     batch_size = 2
 46 |     height, width = 256, 256
 47 |     num_outputs = 4
 48 | 
 49 |     images = tf.ones((batch_size, height, width, 3))
 50 |     with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
 51 |       logits, _ = pix2pix.pix2pix_generator(
 52 |           images, num_outputs, blocks=self._reduced_default_blocks(),
 53 |           upsample_method='nn_upsample_conv')
 54 | 
 55 |     with self.test_session() as session:
 56 |       session.run(tf.global_variables_initializer())
 57 |       np_outputs = session.run(logits)
 58 |       self.assertListEqual([batch_size, height, width, num_outputs],
 59 |                            list(np_outputs.shape))
 60 | 
 61 |   def test_output_size_conv2d_transpose(self):
 62 |     batch_size = 2
 63 |     height, width = 256, 256
 64 |     num_outputs = 4
 65 | 
 66 |     images = tf.ones((batch_size, height, width, 3))
 67 |     with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
 68 |       logits, _ = pix2pix.pix2pix_generator(
 69 |           images, num_outputs, blocks=self._reduced_default_blocks(),
 70 |           upsample_method='conv2d_transpose')
 71 | 
 72 |     with self.test_session() as session:
 73 |       session.run(tf.global_variables_initializer())
 74 |       np_outputs = session.run(logits)
 75 |       self.assertListEqual([batch_size, height, width, num_outputs],
 76 |                            list(np_outputs.shape))
 77 | 
 78 |   def test_block_number_dictates_number_of_layers(self):
 79 |     batch_size = 2
 80 |     height, width = 256, 256
 81 |     num_outputs = 4
 82 | 
 83 |     images = tf.ones((batch_size, height, width, 3))
 84 |     blocks = [
 85 |         pix2pix.Block(64, 0.5),
 86 |         pix2pix.Block(128, 0),
 87 |     ]
 88 |     with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
 89 |       _, end_points = pix2pix.pix2pix_generator(
 90 |           images, num_outputs, blocks)
 91 | 
 92 |     num_encoder_layers = 0
 93 |     num_decoder_layers = 0
 94 |     for end_point in end_points:
 95 |       if end_point.startswith('encoder'):
 96 |         num_encoder_layers += 1
 97 |       elif end_point.startswith('decoder'):
 98 |         num_decoder_layers += 1
 99 | 
100 |     self.assertEqual(num_encoder_layers, len(blocks))
101 |     self.assertEqual(num_decoder_layers, len(blocks))
102 | 
103 | 
104 | class DiscriminatorTest(tf.test.TestCase):
105 | 
106 |   def _layer_output_size(self, input_size, kernel_size=4, stride=2, pad=2):
107 |     return (input_size + pad * 2 - kernel_size) // stride + 1
108 | 
109 |   def test_four_layers(self):
110 |     batch_size = 2
111 |     input_size = 256
112 | 
113 |     output_size = self._layer_output_size(input_size)
114 |     output_size = self._layer_output_size(output_size)
115 |     output_size = self._layer_output_size(output_size)
116 |     output_size = self._layer_output_size(output_size, stride=1)
117 |     output_size = self._layer_output_size(output_size, stride=1)
118 | 
119 |     images = tf.ones((batch_size, input_size, input_size, 3))
120 |     with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
121 |       logits, end_points = pix2pix.pix2pix_discriminator(
122 |           images, num_filters=[64, 128, 256, 512])
123 |     self.assertListEqual([batch_size, output_size, output_size, 1],
124 |                          logits.shape.as_list())
125 |     self.assertListEqual([batch_size, output_size, output_size, 1],
126 |                          end_points['predictions'].shape.as_list())
127 | 
128 |   def test_four_layers_no_padding(self):
129 |     batch_size = 2
130 |     input_size = 256
131 | 
132 |     output_size = self._layer_output_size(input_size, pad=0)
133 |     output_size = self._layer_output_size(output_size, pad=0)
134 |     output_size = self._layer_output_size(output_size, pad=0)
135 |     output_size = self._layer_output_size(output_size, stride=1, pad=0)
136 |     output_size = self._layer_output_size(output_size, stride=1, pad=0)
137 | 
138 |     images = tf.ones((batch_size, input_size, input_size, 3))
139 |     with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
140 |       logits, end_points = pix2pix.pix2pix_discriminator(
141 |           images, num_filters=[64, 128, 256, 512], padding=0)
142 |     self.assertListEqual([batch_size, output_size, output_size, 1],
143 |                          logits.shape.as_list())
144 |     self.assertListEqual([batch_size, output_size, output_size, 1],
145 |                          end_points['predictions'].shape.as_list())
146 | 
147 |   def test_four_layers_wrog_paddig(self):
148 |     batch_size = 2
149 |     input_size = 256
150 | 
151 |     images = tf.ones((batch_size, input_size, input_size, 3))
152 |     with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
153 |       with self.assertRaises(TypeError):
154 |         pix2pix.pix2pix_discriminator(
155 |             images, num_filters=[64, 128, 256, 512], padding=1.5)
156 | 
157 |   def test_four_layers_negative_padding(self):
158 |     batch_size = 2
159 |     input_size = 256
160 | 
161 |     images = tf.ones((batch_size, input_size, input_size, 3))
162 |     with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
163 |       with self.assertRaises(ValueError):
164 |         pix2pix.pix2pix_discriminator(
165 |             images, num_filters=[64, 128, 256, 512], padding=-1)
166 | 
167 | if __name__ == '__main__':
168 |   tf.test.main()
169 | 


--------------------------------------------------------------------------------
/lanms/include/pybind11/chrono.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     pybind11/chrono.h: Transparent conversion between std::chrono and python's datetime
  3 | 
  4 |     Copyright (c) 2016 Trent Houliston <trent@houliston.me> and
  5 |                        Wenzel Jakob <wenzel.jakob@epfl.ch>
  6 | 
  7 |     All rights reserved. Use of this source code is governed by a
  8 |     BSD-style license that can be found in the LICENSE file.
  9 | */
 10 | 
 11 | #pragma once
 12 | 
 13 | #include "pybind11.h"
 14 | #include <cmath>
 15 | #include <ctime>
 16 | #include <chrono>
 17 | #include <datetime.h>
 18 | 
 19 | // Backport the PyDateTime_DELTA functions from Python3.3 if required
 20 | #ifndef PyDateTime_DELTA_GET_DAYS
 21 | #define PyDateTime_DELTA_GET_DAYS(o)         (((PyDateTime_Delta*)o)->days)
 22 | #endif
 23 | #ifndef PyDateTime_DELTA_GET_SECONDS
 24 | #define PyDateTime_DELTA_GET_SECONDS(o)      (((PyDateTime_Delta*)o)->seconds)
 25 | #endif
 26 | #ifndef PyDateTime_DELTA_GET_MICROSECONDS
 27 | #define PyDateTime_DELTA_GET_MICROSECONDS(o) (((PyDateTime_Delta*)o)->microseconds)
 28 | #endif
 29 | 
 30 | NAMESPACE_BEGIN(pybind11)
 31 | NAMESPACE_BEGIN(detail)
 32 | 
 33 | template <typename type> class duration_caster {
 34 | public:
 35 |     typedef typename type::rep rep;
 36 |     typedef typename type::period period;
 37 | 
 38 |     typedef std::chrono::duration<uint_fast32_t, std::ratio<86400>> days;
 39 | 
 40 |     bool load(handle src, bool) {
 41 |         using namespace std::chrono;
 42 | 
 43 |         // Lazy initialise the PyDateTime import
 44 |         if (!PyDateTimeAPI) { PyDateTime_IMPORT; }
 45 | 
 46 |         if (!src) return false;
 47 |         // If invoked with datetime.delta object
 48 |         if (PyDelta_Check(src.ptr())) {
 49 |             value = type(duration_cast<duration<rep, period>>(
 50 |                   days(PyDateTime_DELTA_GET_DAYS(src.ptr()))
 51 |                 + seconds(PyDateTime_DELTA_GET_SECONDS(src.ptr()))
 52 |                 + microseconds(PyDateTime_DELTA_GET_MICROSECONDS(src.ptr()))));
 53 |             return true;
 54 |         }
 55 |         // If invoked with a float we assume it is seconds and convert
 56 |         else if (PyFloat_Check(src.ptr())) {
 57 |             value = type(duration_cast<duration<rep, period>>(duration<double>(PyFloat_AsDouble(src.ptr()))));
 58 |             return true;
 59 |         }
 60 |         else return false;
 61 |     }
 62 | 
 63 |     // If this is a duration just return it back
 64 |     static const std::chrono::duration<rep, period>& get_duration(const std::chrono::duration<rep, period> &src) {
 65 |         return src;
 66 |     }
 67 | 
 68 |     // If this is a time_point get the time_since_epoch
 69 |     template <typename Clock> static std::chrono::duration<rep, period> get_duration(const std::chrono::time_point<Clock, std::chrono::duration<rep, period>> &src) {
 70 |         return src.time_since_epoch();
 71 |     }
 72 | 
 73 |     static handle cast(const type &src, return_value_policy /* policy */, handle /* parent */) {
 74 |         using namespace std::chrono;
 75 | 
 76 |         // Use overloaded function to get our duration from our source
 77 |         // Works out if it is a duration or time_point and get the duration
 78 |         auto d = get_duration(src);
 79 | 
 80 |         // Lazy initialise the PyDateTime import
 81 |         if (!PyDateTimeAPI) { PyDateTime_IMPORT; }
 82 | 
 83 |         // Declare these special duration types so the conversions happen with the correct primitive types (int)
 84 |         using dd_t = duration<int, std::ratio<86400>>;
 85 |         using ss_t = duration<int, std::ratio<1>>;
 86 |         using us_t = duration<int, std::micro>;
 87 | 
 88 |         auto dd = duration_cast<dd_t>(d);
 89 |         auto subd = d - dd;
 90 |         auto ss = duration_cast<ss_t>(subd);
 91 |         auto us = duration_cast<us_t>(subd - ss);
 92 |         return PyDelta_FromDSU(dd.count(), ss.count(), us.count());
 93 |     }
 94 | 
 95 |     PYBIND11_TYPE_CASTER(type, _("datetime.timedelta"));
 96 | };
 97 | 
 98 | // This is for casting times on the system clock into datetime.datetime instances
 99 | template <typename Duration> class type_caster<std::chrono::time_point<std::chrono::system_clock, Duration>> {
100 | public:
101 |     typedef std::chrono::time_point<std::chrono::system_clock, Duration> type;
102 |     bool load(handle src, bool) {
103 |         using namespace std::chrono;
104 | 
105 |         // Lazy initialise the PyDateTime import
106 |         if (!PyDateTimeAPI) { PyDateTime_IMPORT; }
107 | 
108 |         if (!src) return false;
109 |         if (PyDateTime_Check(src.ptr())) {
110 |             std::tm cal;
111 |             cal.tm_sec   = PyDateTime_DATE_GET_SECOND(src.ptr());
112 |             cal.tm_min   = PyDateTime_DATE_GET_MINUTE(src.ptr());
113 |             cal.tm_hour  = PyDateTime_DATE_GET_HOUR(src.ptr());
114 |             cal.tm_mday  = PyDateTime_GET_DAY(src.ptr());
115 |             cal.tm_mon   = PyDateTime_GET_MONTH(src.ptr()) - 1;
116 |             cal.tm_year  = PyDateTime_GET_YEAR(src.ptr()) - 1900;
117 |             cal.tm_isdst = -1;
118 | 
119 |             value = system_clock::from_time_t(std::mktime(&cal)) + microseconds(PyDateTime_DATE_GET_MICROSECOND(src.ptr()));
120 |             return true;
121 |         }
122 |         else return false;
123 |     }
124 | 
125 |     static handle cast(const std::chrono::time_point<std::chrono::system_clock, Duration> &src, return_value_policy /* policy */, handle /* parent */) {
126 |         using namespace std::chrono;
127 | 
128 |         // Lazy initialise the PyDateTime import
129 |         if (!PyDateTimeAPI) { PyDateTime_IMPORT; }
130 | 
131 |         std::time_t tt = system_clock::to_time_t(src);
132 |         // this function uses static memory so it's best to copy it out asap just in case
133 |         // otherwise other code that is using localtime may break this (not just python code)
134 |         std::tm localtime = *std::localtime(&tt);
135 | 
136 |         // Declare these special duration types so the conversions happen with the correct primitive types (int)
137 |         using us_t = duration<int, std::micro>;
138 | 
139 |         return PyDateTime_FromDateAndTime(localtime.tm_year + 1900,
140 |                                           localtime.tm_mon + 1,
141 |                                           localtime.tm_mday,
142 |                                           localtime.tm_hour,
143 |                                           localtime.tm_min,
144 |                                           localtime.tm_sec,
145 |                                           (duration_cast<us_t>(src.time_since_epoch() % seconds(1))).count());
146 |     }
147 |     PYBIND11_TYPE_CASTER(type, _("datetime.datetime"));
148 | };
149 | 
150 | // Other clocks that are not the system clock are not measured as datetime.datetime objects
151 | // since they are not measured on calendar time. So instead we just make them timedeltas
152 | // Or if they have passed us a time as a float we convert that
153 | template <typename Clock, typename Duration> class type_caster<std::chrono::time_point<Clock, Duration>>
154 | : public duration_caster<std::chrono::time_point<Clock, Duration>> {
155 | };
156 | 
157 | template <typename Rep, typename Period> class type_caster<std::chrono::duration<Rep, Period>>
158 | : public duration_caster<std::chrono::duration<Rep, Period>> {
159 | };
160 | 
161 | NAMESPACE_END(detail)
162 | NAMESPACE_END(pybind11)
163 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/nets_factory.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains a factory for building various models."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | import functools
 21 | 
 22 | import tensorflow as tf
 23 | 
 24 | from nets import alexnet
 25 | from nets import cifarnet
 26 | from nets import inception
 27 | from nets import lenet
 28 | from nets import mobilenet_v1
 29 | from nets import overfeat
 30 | from nets import resnet_v1
 31 | from nets import resnet_v2
 32 | from nets import vgg
 33 | from nets.mobilenet import mobilenet_v2
 34 | from nets.nasnet import nasnet
 35 | from nets.nasnet import pnasnet
 36 | 
 37 | slim = tf.contrib.slim
 38 | 
 39 | networks_map = {'alexnet_v2': alexnet.alexnet_v2,
 40 |                 'cifarnet': cifarnet.cifarnet,
 41 |                 'overfeat': overfeat.overfeat,
 42 |                 'vgg_a': vgg.vgg_a,
 43 |                 'vgg_16': vgg.vgg_16,
 44 |                 'vgg_19': vgg.vgg_19,
 45 |                 'inception_v1': inception.inception_v1,
 46 |                 'inception_v2': inception.inception_v2,
 47 |                 'inception_v3': inception.inception_v3,
 48 |                 'inception_v4': inception.inception_v4,
 49 |                 'inception_resnet_v2': inception.inception_resnet_v2,
 50 |                 'lenet': lenet.lenet,
 51 |                 'resnet_v1_50': resnet_v1.resnet_v1_50,
 52 |                 'resnet_v1_101': resnet_v1.resnet_v1_101,
 53 |                 'resnet_v1_152': resnet_v1.resnet_v1_152,
 54 |                 'resnet_v1_200': resnet_v1.resnet_v1_200,
 55 |                 'resnet_v2_50': resnet_v2.resnet_v2_50,
 56 |                 'resnet_v2_101': resnet_v2.resnet_v2_101,
 57 |                 'resnet_v2_152': resnet_v2.resnet_v2_152,
 58 |                 'resnet_v2_200': resnet_v2.resnet_v2_200,
 59 |                 'mobilenet_v1': mobilenet_v1.mobilenet_v1,
 60 |                 'mobilenet_v1_075': mobilenet_v1.mobilenet_v1_075,
 61 |                 'mobilenet_v1_050': mobilenet_v1.mobilenet_v1_050,
 62 |                 'mobilenet_v1_025': mobilenet_v1.mobilenet_v1_025,
 63 |                 'mobilenet_v2': mobilenet_v2.mobilenet,
 64 |                 'nasnet_cifar': nasnet.build_nasnet_cifar,
 65 |                 'nasnet_mobile': nasnet.build_nasnet_mobile,
 66 |                 'nasnet_large': nasnet.build_nasnet_large,
 67 |                 'pnasnet_large': pnasnet.build_pnasnet_large,
 68 |                }
 69 | 
 70 | arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope,
 71 |                   'cifarnet': cifarnet.cifarnet_arg_scope,
 72 |                   'overfeat': overfeat.overfeat_arg_scope,
 73 |                   'vgg_a': vgg.vgg_arg_scope,
 74 |                   'vgg_16': vgg.vgg_arg_scope,
 75 |                   'vgg_19': vgg.vgg_arg_scope,
 76 |                   'inception_v1': inception.inception_v3_arg_scope,
 77 |                   'inception_v2': inception.inception_v3_arg_scope,
 78 |                   'inception_v3': inception.inception_v3_arg_scope,
 79 |                   'inception_v4': inception.inception_v4_arg_scope,
 80 |                   'inception_resnet_v2':
 81 |                   inception.inception_resnet_v2_arg_scope,
 82 |                   'lenet': lenet.lenet_arg_scope,
 83 |                   'resnet_v1_50': resnet_v1.resnet_arg_scope,
 84 |                   'resnet_v1_101': resnet_v1.resnet_arg_scope,
 85 |                   'resnet_v1_152': resnet_v1.resnet_arg_scope,
 86 |                   'resnet_v1_200': resnet_v1.resnet_arg_scope,
 87 |                   'resnet_v2_50': resnet_v2.resnet_arg_scope,
 88 |                   'resnet_v2_101': resnet_v2.resnet_arg_scope,
 89 |                   'resnet_v2_152': resnet_v2.resnet_arg_scope,
 90 |                   'resnet_v2_200': resnet_v2.resnet_arg_scope,
 91 |                   'mobilenet_v1': mobilenet_v1.mobilenet_v1_arg_scope,
 92 |                   'mobilenet_v1_075': mobilenet_v1.mobilenet_v1_arg_scope,
 93 |                   'mobilenet_v1_050': mobilenet_v1.mobilenet_v1_arg_scope,
 94 |                   'mobilenet_v1_025': mobilenet_v1.mobilenet_v1_arg_scope,
 95 |                   'mobilenet_v2': mobilenet_v2.training_scope,
 96 |                   'nasnet_cifar': nasnet.nasnet_cifar_arg_scope,
 97 |                   'nasnet_mobile': nasnet.nasnet_mobile_arg_scope,
 98 |                   'nasnet_large': nasnet.nasnet_large_arg_scope,
 99 |                   'pnasnet_large': pnasnet.pnasnet_large_arg_scope,
100 |                  }
101 | 
102 | 
103 | def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
104 |   """Returns a network_fn such as `logits, end_points = network_fn(images)`.
105 | 
106 |   Args:
107 |     name: The name of the network.
108 |     num_classes: The number of classes to use for classification. If 0 or None,
109 |       the logits layer is omitted and its input features are returned instead.
110 |     weight_decay: The l2 coefficient for the model weights.
111 |     is_training: `True` if the model is being used for training and `False`
112 |       otherwise.
113 | 
114 |   Returns:
115 |     network_fn: A function that applies the model to a batch of images. It has
116 |       the following signature:
117 |           net, end_points = network_fn(images)
118 |       The `images` input is a tensor of shape [batch_size, height, width, 3]
119 |       with height = width = network_fn.default_image_size. (The permissibility
120 |       and treatment of other sizes depends on the network_fn.)
121 |       The returned `end_points` are a dictionary of intermediate activations.
122 |       The returned `net` is the topmost layer, depending on `num_classes`:
123 |       If `num_classes` was a non-zero integer, `net` is a logits tensor
124 |       of shape [batch_size, num_classes].
125 |       If `num_classes` was 0 or `None`, `net` is a tensor with the input
126 |       to the logits layer of shape [batch_size, 1, 1, num_features] or
127 |       [batch_size, num_features]. Dropout has not been applied to this
128 |       (even if the network's original classification does); it remains for
129 |       the caller to do this or not.
130 | 
131 |   Raises:
132 |     ValueError: If network `name` is not recognized.
133 |   """
134 |   if name not in networks_map:
135 |     raise ValueError('Name of network unknown %s' % name)
136 |   func = networks_map[name]
137 |   @functools.wraps(func)
138 |   def network_fn(images, **kwargs):
139 |     arg_scope = arg_scopes_map[name](weight_decay=weight_decay)
140 |     with slim.arg_scope(arg_scope):
141 |       return func(images, num_classes, is_training=is_training, **kwargs)
142 |   if hasattr(func, 'default_image_size'):
143 |     network_fn.default_image_size = func.default_image_size
144 | 
145 |   return network_fn
146 | 


--------------------------------------------------------------------------------
/run_demo_server.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import os
  4 | 
  5 | import time
  6 | import datetime
  7 | import cv2
  8 | import numpy as np
  9 | import uuid
 10 | import json
 11 | 
 12 | import functools
 13 | import logging
 14 | import collections
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | logger.setLevel(logging.INFO)
 18 | 
 19 | 
 20 | @functools.lru_cache(maxsize=1)
 21 | def get_host_info():
 22 |     ret = {}
 23 |     with open('/proc/cpuinfo') as f:
 24 |         ret['cpuinfo'] = f.read()
 25 | 
 26 |     with open('/proc/meminfo') as f:
 27 |         ret['meminfo'] = f.read()
 28 | 
 29 |     with open('/proc/loadavg') as f:
 30 |         ret['loadavg'] = f.read()
 31 | 
 32 |     return ret
 33 | 
 34 | 
 35 | @functools.lru_cache(maxsize=100)
 36 | def get_predictor(checkpoint_path):
 37 |     logger.info('loading model')
 38 |     import tensorflow as tf
 39 |     import model
 40 |     from icdar import restore_rectangle
 41 |     import lanms
 42 |     from eval import resize_image, sort_poly, detect
 43 | 
 44 |     input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
 45 |     global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
 46 | 
 47 |     f_score, f_geometry = model.model(input_images, is_training=False)
 48 | 
 49 |     variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
 50 |     saver = tf.train.Saver(variable_averages.variables_to_restore())
 51 | 
 52 |     sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
 53 | 
 54 |     ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
 55 |     model_path = os.path.join(checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
 56 |     logger.info('Restore from {}'.format(model_path))
 57 |     saver.restore(sess, model_path)
 58 | 
 59 |     def predictor(img):
 60 |         """
 61 |         :return: {
 62 |             'text_lines': [
 63 |                 {
 64 |                     'score': ,
 65 |                     'x0': ,
 66 |                     'y0': ,
 67 |                     'x1': ,
 68 |                     ...
 69 |                     'y3': ,
 70 |                 }
 71 |             ],
 72 |             'rtparams': {  # runtime parameters
 73 |                 'image_size': ,
 74 |                 'working_size': ,
 75 |             },
 76 |             'timing': {
 77 |                 'net': ,
 78 |                 'restore': ,
 79 |                 'nms': ,
 80 |                 'cpuinfo': ,
 81 |                 'meminfo': ,
 82 |                 'uptime': ,
 83 |             }
 84 |         }
 85 |         """
 86 |         start_time = time.time()
 87 |         rtparams = collections.OrderedDict()
 88 |         rtparams['start_time'] = datetime.datetime.now().isoformat()
 89 |         rtparams['image_size'] = '{}x{}'.format(img.shape[1], img.shape[0])
 90 |         timer = collections.OrderedDict([
 91 |             ('net', 0),
 92 |             ('restore', 0),
 93 |             ('nms', 0)
 94 |         ])
 95 | 
 96 |         im_resized, (ratio_h, ratio_w) = resize_image(img)
 97 |         rtparams['working_size'] = '{}x{}'.format(
 98 |             im_resized.shape[1], im_resized.shape[0])
 99 |         start = time.time()
100 |         score, geometry = sess.run(
101 |             [f_score, f_geometry],
102 |             feed_dict={input_images: [im_resized[:,:,::-1]]})
103 |         timer['net'] = time.time() - start
104 | 
105 |         boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer)
106 |         logger.info('net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
107 |             timer['net']*1000, timer['restore']*1000, timer['nms']*1000))
108 | 
109 |         if boxes is not None:
110 |             scores = boxes[:,8].reshape(-1)
111 |             boxes = boxes[:, :8].reshape((-1, 4, 2))
112 |             boxes[:, :, 0] /= ratio_w
113 |             boxes[:, :, 1] /= ratio_h
114 | 
115 |         duration = time.time() - start_time
116 |         timer['overall'] = duration
117 |         logger.info('[timing] {}'.format(duration))
118 | 
119 |         text_lines = []
120 |         if boxes is not None:
121 |             text_lines = []
122 |             for box, score in zip(boxes, scores):
123 |                 box = sort_poly(box.astype(np.int32))
124 |                 if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5:
125 |                     continue
126 |                 tl = collections.OrderedDict(zip(
127 |                     ['x0', 'y0', 'x1', 'y1', 'x2', 'y2', 'x3', 'y3'],
128 |                     map(float, box.flatten())))
129 |                 tl['score'] = float(score)
130 |                 text_lines.append(tl)
131 |         ret = {
132 |             'text_lines': text_lines,
133 |             'rtparams': rtparams,
134 |             'timing': timer,
135 |         }
136 |         ret.update(get_host_info())
137 |         return ret
138 | 
139 | 
140 |     return predictor
141 | 
142 | 
143 | ### the webserver
144 | from flask import Flask, request, render_template
145 | import argparse
146 | 
147 | 
148 | class Config:
149 |     SAVE_DIR = 'static/results'
150 | 
151 | 
152 | config = Config()
153 | 
154 | 
155 | app = Flask(__name__)
156 | 
157 | @app.route('/')
158 | def index():
159 |     return render_template('index.html', session_id='dummy_session_id')
160 | 
161 | 
162 | def draw_illu(illu, rst):
163 |     for t in rst['text_lines']:
164 |         d = np.array([t['x0'], t['y0'], t['x1'], t['y1'], t['x2'],
165 |                       t['y2'], t['x3'], t['y3']], dtype='int32')
166 |         d = d.reshape(-1, 2)
167 |         cv2.polylines(illu, [d], isClosed=True, color=(255, 255, 0))
168 |     return illu
169 | 
170 | 
171 | def save_result(img, rst):
172 |     session_id = str(uuid.uuid1())
173 |     dirpath = os.path.join(config.SAVE_DIR, session_id)
174 |     os.makedirs(dirpath)
175 | 
176 |     # save input image
177 |     output_path = os.path.join(dirpath, 'input.png')
178 |     cv2.imwrite(output_path, img)
179 | 
180 |     # save illustration
181 |     output_path = os.path.join(dirpath, 'output.png')
182 |     cv2.imwrite(output_path, draw_illu(img.copy(), rst))
183 | 
184 |     # save json data
185 |     output_path = os.path.join(dirpath, 'result.json')
186 |     with open(output_path, 'w') as f:
187 |         json.dump(rst, f)
188 | 
189 |     rst['session_id'] = session_id
190 |     return rst
191 | 
192 | 
193 | 
194 | checkpoint_path = './east_icdar2015_resnet_v1_50_rbox'
195 | 
196 | 
197 | @app.route('/', methods=['POST'])
198 | def index_post():
199 |     global predictor
200 |     import io
201 |     bio = io.BytesIO()
202 |     request.files['image'].save(bio)
203 |     img = cv2.imdecode(np.frombuffer(bio.getvalue(), dtype='uint8'), 1)
204 |     rst = get_predictor(checkpoint_path)(img)
205 | 
206 |     save_result(img, rst)
207 |     return render_template('index.html', session_id=rst['session_id'])
208 | 
209 | 
210 | def main():
211 |     global checkpoint_path
212 |     parser = argparse.ArgumentParser()
213 |     parser.add_argument('--port', default=8769, type=int)
214 |     parser.add_argument('--checkpoint-path', default=checkpoint_path)
215 |     parser.add_argument('--debug', action='store_true')
216 |     args = parser.parse_args()
217 |     checkpoint_path = args.checkpoint_path
218 | 
219 |     if not os.path.exists(args.checkpoint_path):
220 |         raise RuntimeError(
221 |             'Checkpoint `{}` not found'.format(args.checkpoint_path))
222 | 
223 |     app.debug = args.debug
224 |     app.run('0.0.0.0', args.port)
225 | 
226 | if __name__ == '__main__':
227 |     main()
228 | 
229 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/overfeat_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for slim.nets.overfeat."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | from nets import overfeat
 23 | 
 24 | slim = tf.contrib.slim
 25 | 
 26 | 
 27 | class OverFeatTest(tf.test.TestCase):
 28 | 
 29 |   def testBuild(self):
 30 |     batch_size = 5
 31 |     height, width = 231, 231
 32 |     num_classes = 1000
 33 |     with self.test_session():
 34 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 35 |       logits, _ = overfeat.overfeat(inputs, num_classes)
 36 |       self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed')
 37 |       self.assertListEqual(logits.get_shape().as_list(),
 38 |                            [batch_size, num_classes])
 39 | 
 40 |   def testFullyConvolutional(self):
 41 |     batch_size = 1
 42 |     height, width = 281, 281
 43 |     num_classes = 1000
 44 |     with self.test_session():
 45 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 46 |       logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False)
 47 |       self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd')
 48 |       self.assertListEqual(logits.get_shape().as_list(),
 49 |                            [batch_size, 2, 2, num_classes])
 50 | 
 51 |   def testGlobalPool(self):
 52 |     batch_size = 1
 53 |     height, width = 281, 281
 54 |     num_classes = 1000
 55 |     with self.test_session():
 56 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 57 |       logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False,
 58 |                                     global_pool=True)
 59 |       self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd')
 60 |       self.assertListEqual(logits.get_shape().as_list(),
 61 |                            [batch_size, 1, 1, num_classes])
 62 | 
 63 |   def testEndPoints(self):
 64 |     batch_size = 5
 65 |     height, width = 231, 231
 66 |     num_classes = 1000
 67 |     with self.test_session():
 68 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 69 |       _, end_points = overfeat.overfeat(inputs, num_classes)
 70 |       expected_names = ['overfeat/conv1',
 71 |                         'overfeat/pool1',
 72 |                         'overfeat/conv2',
 73 |                         'overfeat/pool2',
 74 |                         'overfeat/conv3',
 75 |                         'overfeat/conv4',
 76 |                         'overfeat/conv5',
 77 |                         'overfeat/pool5',
 78 |                         'overfeat/fc6',
 79 |                         'overfeat/fc7',
 80 |                         'overfeat/fc8'
 81 |                        ]
 82 |       self.assertSetEqual(set(end_points.keys()), set(expected_names))
 83 | 
 84 |   def testNoClasses(self):
 85 |     batch_size = 5
 86 |     height, width = 231, 231
 87 |     num_classes = None
 88 |     with self.test_session():
 89 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 90 |       net, end_points = overfeat.overfeat(inputs, num_classes)
 91 |       expected_names = ['overfeat/conv1',
 92 |                         'overfeat/pool1',
 93 |                         'overfeat/conv2',
 94 |                         'overfeat/pool2',
 95 |                         'overfeat/conv3',
 96 |                         'overfeat/conv4',
 97 |                         'overfeat/conv5',
 98 |                         'overfeat/pool5',
 99 |                         'overfeat/fc6',
100 |                         'overfeat/fc7'
101 |                        ]
102 |       self.assertSetEqual(set(end_points.keys()), set(expected_names))
103 |       self.assertTrue(net.op.name.startswith('overfeat/fc7'))
104 | 
105 |   def testModelVariables(self):
106 |     batch_size = 5
107 |     height, width = 231, 231
108 |     num_classes = 1000
109 |     with self.test_session():
110 |       inputs = tf.random_uniform((batch_size, height, width, 3))
111 |       overfeat.overfeat(inputs, num_classes)
112 |       expected_names = ['overfeat/conv1/weights',
113 |                         'overfeat/conv1/biases',
114 |                         'overfeat/conv2/weights',
115 |                         'overfeat/conv2/biases',
116 |                         'overfeat/conv3/weights',
117 |                         'overfeat/conv3/biases',
118 |                         'overfeat/conv4/weights',
119 |                         'overfeat/conv4/biases',
120 |                         'overfeat/conv5/weights',
121 |                         'overfeat/conv5/biases',
122 |                         'overfeat/fc6/weights',
123 |                         'overfeat/fc6/biases',
124 |                         'overfeat/fc7/weights',
125 |                         'overfeat/fc7/biases',
126 |                         'overfeat/fc8/weights',
127 |                         'overfeat/fc8/biases',
128 |                        ]
129 |       model_variables = [v.op.name for v in slim.get_model_variables()]
130 |       self.assertSetEqual(set(model_variables), set(expected_names))
131 | 
132 |   def testEvaluation(self):
133 |     batch_size = 2
134 |     height, width = 231, 231
135 |     num_classes = 1000
136 |     with self.test_session():
137 |       eval_inputs = tf.random_uniform((batch_size, height, width, 3))
138 |       logits, _ = overfeat.overfeat(eval_inputs, is_training=False)
139 |       self.assertListEqual(logits.get_shape().as_list(),
140 |                            [batch_size, num_classes])
141 |       predictions = tf.argmax(logits, 1)
142 |       self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
143 | 
144 |   def testTrainEvalWithReuse(self):
145 |     train_batch_size = 2
146 |     eval_batch_size = 1
147 |     train_height, train_width = 231, 231
148 |     eval_height, eval_width = 281, 281
149 |     num_classes = 1000
150 |     with self.test_session():
151 |       train_inputs = tf.random_uniform(
152 |           (train_batch_size, train_height, train_width, 3))
153 |       logits, _ = overfeat.overfeat(train_inputs)
154 |       self.assertListEqual(logits.get_shape().as_list(),
155 |                            [train_batch_size, num_classes])
156 |       tf.get_variable_scope().reuse_variables()
157 |       eval_inputs = tf.random_uniform(
158 |           (eval_batch_size, eval_height, eval_width, 3))
159 |       logits, _ = overfeat.overfeat(eval_inputs, is_training=False,
160 |                                     spatial_squeeze=False)
161 |       self.assertListEqual(logits.get_shape().as_list(),
162 |                            [eval_batch_size, 2, 2, num_classes])
163 |       logits = tf.reduce_mean(logits, [1, 2])
164 |       predictions = tf.argmax(logits, 1)
165 |       self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
166 | 
167 |   def testForward(self):
168 |     batch_size = 1
169 |     height, width = 231, 231
170 |     with self.test_session() as sess:
171 |       inputs = tf.random_uniform((batch_size, height, width, 3))
172 |       logits, _ = overfeat.overfeat(inputs)
173 |       sess.run(tf.global_variables_initializer())
174 |       output = sess.run(logits)
175 |       self.assertTrue(output.any())
176 | 
177 | if __name__ == '__main__':
178 |   tf.test.main()
179 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/alexnet_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for slim.nets.alexnet."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | from nets import alexnet
 23 | 
 24 | slim = tf.contrib.slim
 25 | 
 26 | 
 27 | class AlexnetV2Test(tf.test.TestCase):
 28 | 
 29 |   def testBuild(self):
 30 |     batch_size = 5
 31 |     height, width = 224, 224
 32 |     num_classes = 1000
 33 |     with self.test_session():
 34 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 35 |       logits, _ = alexnet.alexnet_v2(inputs, num_classes)
 36 |       self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed')
 37 |       self.assertListEqual(logits.get_shape().as_list(),
 38 |                            [batch_size, num_classes])
 39 | 
 40 |   def testFullyConvolutional(self):
 41 |     batch_size = 1
 42 |     height, width = 300, 400
 43 |     num_classes = 1000
 44 |     with self.test_session():
 45 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 46 |       logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False)
 47 |       self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')
 48 |       self.assertListEqual(logits.get_shape().as_list(),
 49 |                            [batch_size, 4, 7, num_classes])
 50 | 
 51 |   def testGlobalPool(self):
 52 |     batch_size = 1
 53 |     height, width = 256, 256
 54 |     num_classes = 1000
 55 |     with self.test_session():
 56 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 57 |       logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False,
 58 |                                      global_pool=True)
 59 |       self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')
 60 |       self.assertListEqual(logits.get_shape().as_list(),
 61 |                            [batch_size, 1, 1, num_classes])
 62 | 
 63 |   def testEndPoints(self):
 64 |     batch_size = 5
 65 |     height, width = 224, 224
 66 |     num_classes = 1000
 67 |     with self.test_session():
 68 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 69 |       _, end_points = alexnet.alexnet_v2(inputs, num_classes)
 70 |       expected_names = ['alexnet_v2/conv1',
 71 |                         'alexnet_v2/pool1',
 72 |                         'alexnet_v2/conv2',
 73 |                         'alexnet_v2/pool2',
 74 |                         'alexnet_v2/conv3',
 75 |                         'alexnet_v2/conv4',
 76 |                         'alexnet_v2/conv5',
 77 |                         'alexnet_v2/pool5',
 78 |                         'alexnet_v2/fc6',
 79 |                         'alexnet_v2/fc7',
 80 |                         'alexnet_v2/fc8'
 81 |                        ]
 82 |       self.assertSetEqual(set(end_points.keys()), set(expected_names))
 83 | 
 84 |   def testNoClasses(self):
 85 |     batch_size = 5
 86 |     height, width = 224, 224
 87 |     num_classes = None
 88 |     with self.test_session():
 89 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 90 |       net, end_points = alexnet.alexnet_v2(inputs, num_classes)
 91 |       expected_names = ['alexnet_v2/conv1',
 92 |                         'alexnet_v2/pool1',
 93 |                         'alexnet_v2/conv2',
 94 |                         'alexnet_v2/pool2',
 95 |                         'alexnet_v2/conv3',
 96 |                         'alexnet_v2/conv4',
 97 |                         'alexnet_v2/conv5',
 98 |                         'alexnet_v2/pool5',
 99 |                         'alexnet_v2/fc6',
100 |                         'alexnet_v2/fc7'
101 |                        ]
102 |       self.assertSetEqual(set(end_points.keys()), set(expected_names))
103 |       self.assertTrue(net.op.name.startswith('alexnet_v2/fc7'))
104 |       self.assertListEqual(net.get_shape().as_list(),
105 |                            [batch_size, 1, 1, 4096])
106 | 
107 |   def testModelVariables(self):
108 |     batch_size = 5
109 |     height, width = 224, 224
110 |     num_classes = 1000
111 |     with self.test_session():
112 |       inputs = tf.random_uniform((batch_size, height, width, 3))
113 |       alexnet.alexnet_v2(inputs, num_classes)
114 |       expected_names = ['alexnet_v2/conv1/weights',
115 |                         'alexnet_v2/conv1/biases',
116 |                         'alexnet_v2/conv2/weights',
117 |                         'alexnet_v2/conv2/biases',
118 |                         'alexnet_v2/conv3/weights',
119 |                         'alexnet_v2/conv3/biases',
120 |                         'alexnet_v2/conv4/weights',
121 |                         'alexnet_v2/conv4/biases',
122 |                         'alexnet_v2/conv5/weights',
123 |                         'alexnet_v2/conv5/biases',
124 |                         'alexnet_v2/fc6/weights',
125 |                         'alexnet_v2/fc6/biases',
126 |                         'alexnet_v2/fc7/weights',
127 |                         'alexnet_v2/fc7/biases',
128 |                         'alexnet_v2/fc8/weights',
129 |                         'alexnet_v2/fc8/biases',
130 |                        ]
131 |       model_variables = [v.op.name for v in slim.get_model_variables()]
132 |       self.assertSetEqual(set(model_variables), set(expected_names))
133 | 
134 |   def testEvaluation(self):
135 |     batch_size = 2
136 |     height, width = 224, 224
137 |     num_classes = 1000
138 |     with self.test_session():
139 |       eval_inputs = tf.random_uniform((batch_size, height, width, 3))
140 |       logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False)
141 |       self.assertListEqual(logits.get_shape().as_list(),
142 |                            [batch_size, num_classes])
143 |       predictions = tf.argmax(logits, 1)
144 |       self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
145 | 
146 |   def testTrainEvalWithReuse(self):
147 |     train_batch_size = 2
148 |     eval_batch_size = 1
149 |     train_height, train_width = 224, 224
150 |     eval_height, eval_width = 300, 400
151 |     num_classes = 1000
152 |     with self.test_session():
153 |       train_inputs = tf.random_uniform(
154 |           (train_batch_size, train_height, train_width, 3))
155 |       logits, _ = alexnet.alexnet_v2(train_inputs)
156 |       self.assertListEqual(logits.get_shape().as_list(),
157 |                            [train_batch_size, num_classes])
158 |       tf.get_variable_scope().reuse_variables()
159 |       eval_inputs = tf.random_uniform(
160 |           (eval_batch_size, eval_height, eval_width, 3))
161 |       logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False,
162 |                                      spatial_squeeze=False)
163 |       self.assertListEqual(logits.get_shape().as_list(),
164 |                            [eval_batch_size, 4, 7, num_classes])
165 |       logits = tf.reduce_mean(logits, [1, 2])
166 |       predictions = tf.argmax(logits, 1)
167 |       self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
168 | 
169 |   def testForward(self):
170 |     batch_size = 1
171 |     height, width = 224, 224
172 |     with self.test_session() as sess:
173 |       inputs = tf.random_uniform((batch_size, height, width, 3))
174 |       logits, _ = alexnet.alexnet_v2(inputs)
175 |       sess.run(tf.global_variables_initializer())
176 |       output = sess.run(logits)
177 |       self.assertTrue(output.any())
178 | 
179 | if __name__ == '__main__':
180 |   tf.test.main()
181 | 


--------------------------------------------------------------------------------
/lanms/include/pybind11/embed.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     pybind11/embed.h: Support for embedding the interpreter
  3 | 
  4 |     Copyright (c) 2017 Wenzel Jakob <wenzel.jakob@epfl.ch>
  5 | 
  6 |     All rights reserved. Use of this source code is governed by a
  7 |     BSD-style license that can be found in the LICENSE file.
  8 | */
  9 | 
 10 | #pragma once
 11 | 
 12 | #include "pybind11.h"
 13 | #include "eval.h"
 14 | 
 15 | #if defined(PYPY_VERSION)
 16 | #  error Embedding the interpreter is not supported with PyPy
 17 | #endif
 18 | 
 19 | #if PY_MAJOR_VERSION >= 3
 20 | #  define PYBIND11_EMBEDDED_MODULE_IMPL(name)            \
 21 |       extern "C" PyObject *pybind11_init_impl_##name() { \
 22 |           return pybind11_init_wrapper_##name();         \
 23 |       }
 24 | #else
 25 | #  define PYBIND11_EMBEDDED_MODULE_IMPL(name)            \
 26 |       extern "C" void pybind11_init_impl_##name() {      \
 27 |           pybind11_init_wrapper_##name();                \
 28 |       }
 29 | #endif
 30 | 
 31 | /** \rst
 32 |     Add a new module to the table of builtins for the interpreter. Must be
 33 |     defined in global scope. The first macro parameter is the name of the
 34 |     module (without quotes). The second parameter is the variable which will
 35 |     be used as the interface to add functions and classes to the module.
 36 | 
 37 |     .. code-block:: cpp
 38 | 
 39 |         PYBIND11_EMBEDDED_MODULE(example, m) {
 40 |             // ... initialize functions and classes here
 41 |             m.def("foo", []() {
 42 |                 return "Hello, World!";
 43 |             });
 44 |         }
 45 |  \endrst */
 46 | #define PYBIND11_EMBEDDED_MODULE(name, variable)                              \
 47 |     static void pybind11_init_##name(pybind11::module &);                     \
 48 |     static PyObject *pybind11_init_wrapper_##name() {                         \
 49 |         auto m = pybind11::module(#name);                                     \
 50 |         try {                                                                 \
 51 |             pybind11_init_##name(m);                                          \
 52 |             return m.ptr();                                                   \
 53 |         } catch (pybind11::error_already_set &e) {                            \
 54 |             PyErr_SetString(PyExc_ImportError, e.what());                     \
 55 |             return nullptr;                                                   \
 56 |         } catch (const std::exception &e) {                                   \
 57 |             PyErr_SetString(PyExc_ImportError, e.what());                     \
 58 |             return nullptr;                                                   \
 59 |         }                                                                     \
 60 |     }                                                                         \
 61 |     PYBIND11_EMBEDDED_MODULE_IMPL(name)                                       \
 62 |     pybind11::detail::embedded_module name(#name, pybind11_init_impl_##name); \
 63 |     void pybind11_init_##name(pybind11::module &variable)
 64 | 
 65 | 
 66 | NAMESPACE_BEGIN(pybind11)
 67 | NAMESPACE_BEGIN(detail)
 68 | 
 69 | /// Python 2.7/3.x compatible version of `PyImport_AppendInittab` and error checks.
 70 | struct embedded_module {
 71 | #if PY_MAJOR_VERSION >= 3
 72 |     using init_t = PyObject *(*)();
 73 | #else
 74 |     using init_t = void (*)();
 75 | #endif
 76 |     embedded_module(const char *name, init_t init) {
 77 |         if (Py_IsInitialized())
 78 |             pybind11_fail("Can't add new modules after the interpreter has been initialized");
 79 | 
 80 |         auto result = PyImport_AppendInittab(name, init);
 81 |         if (result == -1)
 82 |             pybind11_fail("Insufficient memory to add a new module");
 83 |     }
 84 | };
 85 | 
 86 | NAMESPACE_END(detail)
 87 | 
 88 | /** \rst
 89 |     Initialize the Python interpreter. No other pybind11 or CPython API functions can be
 90 |     called before this is done; with the exception of `PYBIND11_EMBEDDED_MODULE`. The
 91 |     optional parameter can be used to skip the registration of signal handlers (see the
 92 |     Python documentation for details). Calling this function again after the interpreter
 93 |     has already been initialized is a fatal error.
 94 |  \endrst */
 95 | inline void initialize_interpreter(bool init_signal_handlers = true) {
 96 |     if (Py_IsInitialized())
 97 |         pybind11_fail("The interpreter is already running");
 98 | 
 99 |     Py_InitializeEx(init_signal_handlers ? 1 : 0);
100 | 
101 |     // Make .py files in the working directory available by default
102 |     auto sys_path = reinterpret_borrow<list>(module::import("sys").attr("path"));
103 |     sys_path.append(".");
104 | }
105 | 
106 | /** \rst
107 |     Shut down the Python interpreter. No pybind11 or CPython API functions can be called
108 |     after this. In addition, pybind11 objects must not outlive the interpreter:
109 | 
110 |     .. code-block:: cpp
111 | 
112 |         { // BAD
113 |             py::initialize_interpreter();
114 |             auto hello = py::str("Hello, World!");
115 |             py::finalize_interpreter();
116 |         } // <-- BOOM, hello's destructor is called after interpreter shutdown
117 | 
118 |         { // GOOD
119 |             py::initialize_interpreter();
120 |             { // scoped
121 |                 auto hello = py::str("Hello, World!");
122 |             } // <-- OK, hello is cleaned up properly
123 |             py::finalize_interpreter();
124 |         }
125 | 
126 |         { // BETTER
127 |             py::scoped_interpreter guard{};
128 |             auto hello = py::str("Hello, World!");
129 |         }
130 | 
131 |     .. warning::
132 | 
133 |         The interpreter can be restarted by calling `initialize_interpreter` again.
134 |         Modules created using pybind11 can be safely re-initialized. However, Python
135 |         itself cannot completely unload binary extension modules and there are several
136 |         caveats with regard to interpreter restarting. All the details can be found
137 |         in the CPython documentation. In short, not all interpreter memory may be
138 |         freed, either due to reference cycles or user-created global data.
139 | 
140 |  \endrst */
141 | inline void finalize_interpreter() {
142 |     handle builtins(PyEval_GetBuiltins());
143 |     const char *id = PYBIND11_INTERNALS_ID;
144 | 
145 |     // Get the internals pointer (without creating it if it doesn't exist).  It's possible for the
146 |     // internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()`
147 |     // during destruction), so we get the pointer-pointer here and check it after Py_Finalize().
148 |     detail::internals **internals_ptr_ptr = &detail::get_internals_ptr();
149 |     // It could also be stashed in builtins, so look there too:
150 |     if (builtins.contains(id) && isinstance<capsule>(builtins[id]))
151 |         internals_ptr_ptr = capsule(builtins[id]);
152 | 
153 |     Py_Finalize();
154 | 
155 |     if (internals_ptr_ptr) {
156 |         delete *internals_ptr_ptr;
157 |         *internals_ptr_ptr = nullptr;
158 |     }
159 | }
160 | 
161 | /** \rst
162 |     Scope guard version of `initialize_interpreter` and `finalize_interpreter`.
163 |     This a move-only guard and only a single instance can exist.
164 | 
165 |     .. code-block:: cpp
166 | 
167 |         #include <pybind11/embed.h>
168 | 
169 |         int main() {
170 |             py::scoped_interpreter guard{};
171 |             py::print(Hello, World!);
172 |         } // <-- interpreter shutdown
173 |  \endrst */
174 | class scoped_interpreter {
175 | public:
176 |     scoped_interpreter(bool init_signal_handlers = true) {
177 |         initialize_interpreter(init_signal_handlers);
178 |     }
179 | 
180 |     scoped_interpreter(const scoped_interpreter &) = delete;
181 |     scoped_interpreter(scoped_interpreter &&other) noexcept { other.is_valid = false; }
182 |     scoped_interpreter &operator=(const scoped_interpreter &) = delete;
183 |     scoped_interpreter &operator=(scoped_interpreter &&) = delete;
184 | 
185 |     ~scoped_interpreter() {
186 |         if (is_valid)
187 |             finalize_interpreter();
188 |     }
189 | 
190 | private:
191 |     bool is_valid = true;
192 | };
193 | 
194 | NAMESPACE_END(pybind11)
195 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import time
  3 | import math
  4 | import os
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | 
  8 | import locality_aware_nms as nms_locality
  9 | import lanms
 10 | 
 11 | tf.app.flags.DEFINE_string('test_data_path', '/tmp/ch4_test_images/images/', '')
 12 | tf.app.flags.DEFINE_string('gpu_list', '0', '')
 13 | tf.app.flags.DEFINE_string('checkpoint_path', '/tmp/east_icdar2015_resnet_v1_50_rbox/', '')
 14 | tf.app.flags.DEFINE_string('output_dir', '/tmp/ch4_test_images/images/', '')
 15 | tf.app.flags.DEFINE_bool('no_write_images', False, 'do not write images')
 16 | 
 17 | import model
 18 | from icdar import restore_rectangle
 19 | 
 20 | FLAGS = tf.app.flags.FLAGS
 21 | 
 22 | def get_images():
 23 |     '''
 24 |     find image files in test data path
 25 |     :return: list of files found
 26 |     '''
 27 |     files = []
 28 |     exts = ['jpg', 'png', 'jpeg', 'JPG']
 29 |     for parent, dirnames, filenames in os.walk(FLAGS.test_data_path):
 30 |         for filename in filenames:
 31 |             for ext in exts:
 32 |                 if filename.endswith(ext):
 33 |                     files.append(os.path.join(parent, filename))
 34 |                     break
 35 |     print('Find {} images'.format(len(files)))
 36 |     return files
 37 | 
 38 | 
 39 | def resize_image(im, max_side_len=2400):
 40 |     '''
 41 |     resize image to a size multiple of 32 which is required by the network
 42 |     :param im: the resized image
 43 |     :param max_side_len: limit of max image size to avoid out of memory in gpu
 44 |     :return: the resized image and the resize ratio
 45 |     '''
 46 |     h, w, _ = im.shape
 47 | 
 48 |     resize_w = w
 49 |     resize_h = h
 50 | 
 51 |     # limit the max side
 52 |     if max(resize_h, resize_w) > max_side_len:
 53 |         ratio = float(max_side_len) / resize_h if resize_h > resize_w else float(max_side_len) / resize_w
 54 |     else:
 55 |         ratio = 1.
 56 |     resize_h = int(resize_h * ratio)
 57 |     resize_w = int(resize_w * ratio)
 58 | 
 59 |     resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32
 60 |     resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32
 61 |     im = cv2.resize(im, (int(resize_w), int(resize_h)))
 62 | 
 63 |     ratio_h = resize_h / float(h)
 64 |     ratio_w = resize_w / float(w)
 65 | 
 66 |     return im, (ratio_h, ratio_w)
 67 | 
 68 | 
 69 | def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2):
 70 |     '''
 71 |     restore text boxes from score map and geo map
 72 |     :param score_map:
 73 |     :param geo_map:
 74 |     :param timer:
 75 |     :param score_map_thresh: threshhold for score map
 76 |     :param box_thresh: threshhold for boxes
 77 |     :param nms_thres: threshold for nms
 78 |     :return:
 79 |     '''
 80 |     if len(score_map.shape) == 4:
 81 |         score_map = score_map[0, :, :, 0]
 82 |         geo_map = geo_map[0, :, :, ]
 83 |     # filter the score map
 84 |     xy_text = np.argwhere(score_map > score_map_thresh)
 85 |     # sort the text boxes via the y axis
 86 |     xy_text = xy_text[np.argsort(xy_text[:, 0])]
 87 |     # restore
 88 |     start = time.time()
 89 |     text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2
 90 |     print('{} text boxes before nms'.format(text_box_restored.shape[0]))
 91 |     boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
 92 |     boxes[:, :8] = text_box_restored.reshape((-1, 8))
 93 |     boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
 94 |     timer['restore'] = time.time() - start
 95 |     # nms part
 96 |     start = time.time()
 97 |     # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
 98 |     boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
 99 |     timer['nms'] = time.time() - start
100 | 
101 |     if boxes.shape[0] == 0:
102 |         return None, timer
103 | 
104 |     # here we filter some low score boxes by the average score map, this is different from the orginal paper
105 |     for i, box in enumerate(boxes):
106 |         mask = np.zeros_like(score_map, dtype=np.uint8)
107 |         cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1)
108 |         boxes[i, 8] = cv2.mean(score_map, mask)[0]
109 |     boxes = boxes[boxes[:, 8] > box_thresh]
110 | 
111 |     return boxes, timer
112 | 
113 | 
114 | def sort_poly(p):
115 |     min_axis = np.argmin(np.sum(p, axis=1))
116 |     p = p[[min_axis, (min_axis+1)%4, (min_axis+2)%4, (min_axis+3)%4]]
117 |     if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
118 |         return p
119 |     else:
120 |         return p[[0, 3, 2, 1]]
121 | 
122 | 
123 | def main(argv=None):
124 |     import os
125 |     os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
126 | 
127 | 
128 |     try:
129 |         os.makedirs(FLAGS.output_dir)
130 |     except OSError as e:
131 |         if e.errno != 17:
132 |             raise
133 | 
134 |     with tf.get_default_graph().as_default():
135 |         input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
136 |         global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
137 | 
138 |         f_score, f_geometry = model.model(input_images, is_training=False)
139 | 
140 |         variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
141 |         saver = tf.train.Saver(variable_averages.variables_to_restore())
142 | 
143 |         with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
144 |             ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
145 |             model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
146 |             print('Restore from {}'.format(model_path))
147 |             saver.restore(sess, model_path)
148 | 
149 |             im_fn_list = get_images()
150 |             for im_fn in im_fn_list:
151 |                 im = cv2.imread(im_fn)[:, :, ::-1]
152 |                 start_time = time.time()
153 |                 im_resized, (ratio_h, ratio_w) = resize_image(im)
154 | 
155 |                 timer = {'net': 0, 'restore': 0, 'nms': 0}
156 |                 start = time.time()
157 |                 score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: [im_resized]})
158 |                 timer['net'] = time.time() - start
159 | 
160 |                 boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer)
161 |                 print('{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
162 |                     im_fn, timer['net']*1000, timer['restore']*1000, timer['nms']*1000))
163 | 
164 |                 if boxes is not None:
165 |                     boxes = boxes[:, :8].reshape((-1, 4, 2))
166 |                     boxes[:, :, 0] /= ratio_w
167 |                     boxes[:, :, 1] /= ratio_h
168 | 
169 |                 duration = time.time() - start_time
170 |                 print('[timing] {}'.format(duration))
171 | 
172 |                 # save to file
173 |                 if boxes is not None:
174 |                     res_file = os.path.join(
175 |                         FLAGS.output_dir,
176 |                         '{}.txt'.format(
177 |                             os.path.basename(im_fn).split('.')[0]))
178 | 
179 |                     with open(res_file, 'w') as f:
180 |                         for box in boxes:
181 |                             # to avoid submitting errors
182 |                             box = sort_poly(box.astype(np.int32))
183 |                             print np.shape(box)
184 |                             if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5:
185 |                                 continue
186 |                             f.write('{},{},{},{},{},{},{},{}\r\n'.format(
187 |                                 box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1],
188 |                             ))
189 |                             cv2.polylines(im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=1)
190 |                 if not FLAGS.no_write_images:
191 |                     img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn))
192 |                     cv2.imwrite(img_path, im[:, :, ::-1])
193 | 
194 | if __name__ == '__main__':
195 |     tf.app.run()
196 | 


--------------------------------------------------------------------------------
/nets/NASNet/pnasnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains the definition for the PNASNet classification networks.
 16 | 
 17 | Paper: https://arxiv.org/abs/1712.00559
 18 | """
 19 | 
 20 | from __future__ import absolute_import
 21 | from __future__ import division
 22 | from __future__ import print_function
 23 | 
 24 | import copy
 25 | import tensorflow as tf
 26 | 
 27 | from nets.NASNet import nasnet
 28 | from nets.NASNet import nasnet_utils
 29 | 
 30 | arg_scope = tf.contrib.framework.arg_scope
 31 | slim = tf.contrib.slim
 32 | 
 33 | 
 34 | def large_imagenet_config():
 35 |   """Large ImageNet configuration based on PNASNet-5."""
 36 |   return tf.contrib.training.HParams(
 37 |       stem_multiplier=3.0,
 38 |       dense_dropout_keep_prob=0.5,
 39 |       num_cells=12,
 40 |       filter_scaling_rate=2.0,
 41 |       num_conv_filters=216,
 42 |       drop_path_keep_prob=0.6,
 43 |       use_aux_head=1,
 44 |       num_reduction_layers=2,
 45 |       data_format='NHWC',
 46 |       total_training_steps=250000,
 47 |   )
 48 | 
 49 | 
 50 | def pnasnet_large_arg_scope(weight_decay=4e-5, batch_norm_decay=0.9997,
 51 |                             batch_norm_epsilon=0.001):
 52 |   """Default arg scope for the PNASNet Large ImageNet model."""
 53 |   return nasnet.nasnet_large_arg_scope(
 54 |       weight_decay, batch_norm_decay, batch_norm_epsilon)
 55 | 
 56 | 
 57 | def _build_pnasnet_base(images,
 58 |                         normal_cell,
 59 |                         num_classes,
 60 |                         hparams,
 61 |                         is_training,
 62 |                         final_endpoint=None):
 63 |   """Constructs a PNASNet image model."""
 64 | 
 65 |   end_points = {}
 66 | 
 67 |   def add_and_check_endpoint(endpoint_name, net):
 68 |     end_points[endpoint_name] = net
 69 |     return final_endpoint and (endpoint_name == final_endpoint)
 70 | 
 71 |   # Find where to place the reduction cells or stride normal cells
 72 |   reduction_indices = nasnet_utils.calc_reduction_layers(
 73 |       hparams.num_cells, hparams.num_reduction_layers)
 74 |   print('reduction_indices: ', reduction_indices)
 75 |   # pylint: disable=protected-access
 76 |   stem = lambda: nasnet._imagenet_stem(images, hparams, normal_cell)
 77 |   # pylint: enable=protected-access
 78 |   net, cell_outputs = stem()
 79 |   end_points['scale-1'] = cell_outputs[1]
 80 |   end_points['scale-2'] = cell_outputs[2]
 81 |   end_points['scale-3'] = cell_outputs[3]
 82 |   if add_and_check_endpoint('Stem', net):
 83 |     return net, end_points
 84 | 
 85 |   # Setup for building in the auxiliary head.
 86 |   aux_head_cell_idxes = []
 87 |   if len(reduction_indices) >= 2:
 88 |     aux_head_cell_idxes.append(reduction_indices[1] - 1)
 89 | 
 90 |   # Run the cells
 91 |   filter_scaling = 1.0
 92 |   # true_cell_num accounts for the stem cells
 93 |   true_cell_num = 2
 94 |   print('hparams.num_cells is ', hparams.num_cells)
 95 |   for cell_num in range(hparams.num_cells):
 96 |     is_reduction = cell_num in reduction_indices
 97 |     stride = 2 if is_reduction else 1
 98 |     if is_reduction: filter_scaling *= hparams.filter_scaling_rate
 99 |     prev_layer = cell_outputs[-2]
100 |     net = normal_cell(
101 |         net,
102 |         scope='cell_{}'.format(cell_num),
103 |         filter_scaling=filter_scaling,
104 |         stride=stride,
105 |         prev_layer=prev_layer,
106 |         cell_num=true_cell_num)
107 |     if add_and_check_endpoint('Cell_{}'.format(cell_num), net):
108 |       return net, end_points
109 |     true_cell_num += 1
110 |     cell_outputs.append(net)
111 | 
112 |     if (hparams.use_aux_head and cell_num in aux_head_cell_idxes and
113 |         num_classes and is_training):
114 |       aux_net = tf.nn.relu(net)
115 |       # pylint: disable=protected-access
116 |       nasnet._build_aux_head(aux_net, end_points, num_classes, hparams,
117 |                              scope='aux_{}'.format(cell_num))
118 |       # pylint: enable=protected-access
119 | 
120 |   # Final softmax layer
121 |   with tf.variable_scope('final_layer'):
122 |     net = tf.nn.relu(net)
123 |     net = nasnet_utils.global_avg_pool(net)
124 |     if add_and_check_endpoint('global_pool', net) or not num_classes:
125 |       return net, end_points
126 |     net = slim.dropout(net, hparams.dense_dropout_keep_prob, scope='dropout')
127 |     logits = slim.fully_connected(net, num_classes)
128 | 
129 |     if add_and_check_endpoint('Logits', logits):
130 |       return net, end_points
131 | 
132 |     predictions = tf.nn.softmax(logits, name='predictions')
133 |     if add_and_check_endpoint('Predictions', predictions):
134 |       return net, end_points
135 |   return logits, end_points
136 | 
137 | 
138 | def build_pnasnet_large(images,
139 |                         num_classes,
140 |                         is_training=True,
141 |                         final_endpoint=None,
142 |                         config=None):
143 |   """Build PNASNet Large model for the ImageNet Dataset."""
144 |   hparams = copy.deepcopy(config) if config else large_imagenet_config()
145 |   # pylint: disable=protected-access
146 |   nasnet._update_hparams(hparams, is_training)
147 |   # pylint: enable=protected-access
148 | 
149 |   if tf.test.is_gpu_available() and hparams.data_format == 'NHWC':
150 |     tf.logging.info('A GPU is available on the machine, consider using NCHW '
151 |                     'data format for increased speed on GPU.')
152 | 
153 |   if hparams.data_format == 'NCHW':
154 |     images = tf.transpose(images, [0, 3, 1, 2])
155 | 
156 |   # Calculate the total number of cells in the network.
157 |   # There is no distinction between reduction and normal cells in PNAS so the
158 |   # total number of cells is equal to the number normal cells plus the number
159 |   # of stem cells (two by default).
160 |   total_num_cells = hparams.num_cells + 2
161 | 
162 |   normal_cell = PNasNetNormalCell(hparams.num_conv_filters,
163 |                                   hparams.drop_path_keep_prob, total_num_cells,
164 |                                   hparams.total_training_steps)
165 |   with arg_scope(
166 |       [slim.dropout, nasnet_utils.drop_path, slim.batch_norm],
167 |       is_training=is_training):
168 |     with arg_scope([slim.avg_pool2d, slim.max_pool2d, slim.conv2d,
169 |                     slim.batch_norm, slim.separable_conv2d,
170 |                     nasnet_utils.factorized_reduction,
171 |                     nasnet_utils.global_avg_pool,
172 |                     nasnet_utils.get_channel_index,
173 |                     nasnet_utils.get_channel_dim],
174 |                    data_format=hparams.data_format):
175 |       return _build_pnasnet_base(
176 |           images,
177 |           normal_cell=normal_cell,
178 |           num_classes=num_classes,
179 |           hparams=hparams,
180 |           is_training=is_training,
181 |           final_endpoint=final_endpoint)
182 | build_pnasnet_large.default_image_size = 331
183 | 
184 | 
185 | 
186 | class PNasNetNormalCell(nasnet_utils.NasNetABaseCell):
187 |   """PNASNet Normal Cell."""
188 | 
189 |   def __init__(self, num_conv_filters, drop_path_keep_prob, total_num_cells,
190 |                total_training_steps):
191 |     # Configuration for the PNASNet-5 model.
192 |     operations = [
193 |         'separable_5x5_2', 'max_pool_3x3', 'separable_7x7_2', 'max_pool_3x3',
194 |         'separable_5x5_2', 'separable_3x3_2', 'separable_3x3_2', 'max_pool_3x3',
195 |         'separable_3x3_2', 'none'
196 |     ]
197 |     used_hiddenstates = [1, 1, 0, 0, 0, 0, 0]
198 |     hiddenstate_indices = [1, 1, 0, 0, 0, 0, 4, 0, 1, 0]
199 | 
200 |     super(PNasNetNormalCell, self).__init__(
201 |         num_conv_filters, operations, used_hiddenstates, hiddenstate_indices,
202 |         drop_path_keep_prob, total_num_cells, total_training_steps)
203 | 


--------------------------------------------------------------------------------
/nets/Inception_ResNet_V2/nets/dcgan.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """DCGAN generator and discriminator from https://arxiv.org/abs/1511.06434."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | from math import log
 21 | 
 22 | from six.moves import xrange  # pylint: disable=redefined-builtin
 23 | import tensorflow as tf
 24 | 
 25 | slim = tf.contrib.slim
 26 | 
 27 | 
 28 | def _validate_image_inputs(inputs):
 29 |   inputs.get_shape().assert_has_rank(4)
 30 |   inputs.get_shape()[1:3].assert_is_fully_defined()
 31 |   if inputs.get_shape()[1] != inputs.get_shape()[2]:
 32 |     raise ValueError('Input tensor does not have equal width and height: ',
 33 |                      inputs.get_shape()[1:3])
 34 |   width = inputs.get_shape().as_list()[1]
 35 |   if log(width, 2) != int(log(width, 2)):
 36 |     raise ValueError('Input tensor `width` is not a power of 2: ', width)
 37 | 
 38 | 
 39 | # TODO(joelshor): Use fused batch norm by default. Investigate why some GAN
 40 | # setups need the gradient of gradient FusedBatchNormGrad.
 41 | def discriminator(inputs,
 42 |                   depth=64,
 43 |                   is_training=True,
 44 |                   reuse=None,
 45 |                   scope='Discriminator',
 46 |                   fused_batch_norm=False):
 47 |   """Discriminator network for DCGAN.
 48 | 
 49 |   Construct discriminator network from inputs to the final endpoint.
 50 | 
 51 |   Args:
 52 |     inputs: A tensor of size [batch_size, height, width, channels]. Must be
 53 |       floating point.
 54 |     depth: Number of channels in first convolution layer.
 55 |     is_training: Whether the network is for training or not.
 56 |     reuse: Whether or not the network variables should be reused. `scope`
 57 |       must be given to be reused.
 58 |     scope: Optional variable_scope.
 59 |     fused_batch_norm: If `True`, use a faster, fused implementation of
 60 |       batch norm.
 61 | 
 62 |   Returns:
 63 |     logits: The pre-softmax activations, a tensor of size [batch_size, 1]
 64 |     end_points: a dictionary from components of the network to their activation.
 65 | 
 66 |   Raises:
 67 |     ValueError: If the input image shape is not 4-dimensional, if the spatial
 68 |       dimensions aren't defined at graph construction time, if the spatial
 69 |       dimensions aren't square, or if the spatial dimensions aren't a power of
 70 |       two.
 71 |   """
 72 | 
 73 |   normalizer_fn = slim.batch_norm
 74 |   normalizer_fn_args = {
 75 |       'is_training': is_training,
 76 |       'zero_debias_moving_mean': True,
 77 |       'fused': fused_batch_norm,
 78 |   }
 79 | 
 80 |   _validate_image_inputs(inputs)
 81 |   inp_shape = inputs.get_shape().as_list()[1]
 82 | 
 83 |   end_points = {}
 84 |   with tf.variable_scope(scope, values=[inputs], reuse=reuse) as scope:
 85 |     with slim.arg_scope([normalizer_fn], **normalizer_fn_args):
 86 |       with slim.arg_scope([slim.conv2d],
 87 |                           stride=2,
 88 |                           kernel_size=4,
 89 |                           activation_fn=tf.nn.leaky_relu):
 90 |         net = inputs
 91 |         for i in xrange(int(log(inp_shape, 2))):
 92 |           scope = 'conv%i' % (i + 1)
 93 |           current_depth = depth * 2**i
 94 |           normalizer_fn_ = None if i == 0 else normalizer_fn
 95 |           net = slim.conv2d(
 96 |               net, current_depth, normalizer_fn=normalizer_fn_, scope=scope)
 97 |           end_points[scope] = net
 98 | 
 99 |         logits = slim.conv2d(net, 1, kernel_size=1, stride=1, padding='VALID',
100 |                              normalizer_fn=None, activation_fn=None)
101 |         logits = tf.reshape(logits, [-1, 1])
102 |         end_points['logits'] = logits
103 | 
104 |         return logits, end_points
105 | 
106 | 
107 | # TODO(joelshor): Use fused batch norm by default. Investigate why some GAN
108 | # setups need the gradient of gradient FusedBatchNormGrad.
109 | def generator(inputs,
110 |               depth=64,
111 |               final_size=32,
112 |               num_outputs=3,
113 |               is_training=True,
114 |               reuse=None,
115 |               scope='Generator',
116 |               fused_batch_norm=False):
117 |   """Generator network for DCGAN.
118 | 
119 |   Construct generator network from inputs to the final endpoint.
120 | 
121 |   Args:
122 |     inputs: A tensor with any size N. [batch_size, N]
123 |     depth: Number of channels in last deconvolution layer.
124 |     final_size: The shape of the final output.
125 |     num_outputs: Number of output features. For images, this is the number of
126 |       channels.
127 |     is_training: whether is training or not.
128 |     reuse: Whether or not the network has its variables should be reused. scope
129 |       must be given to be reused.
130 |     scope: Optional variable_scope.
131 |     fused_batch_norm: If `True`, use a faster, fused implementation of
132 |       batch norm.
133 | 
134 |   Returns:
135 |     logits: the pre-softmax activations, a tensor of size
136 |       [batch_size, 32, 32, channels]
137 |     end_points: a dictionary from components of the network to their activation.
138 | 
139 |   Raises:
140 |     ValueError: If `inputs` is not 2-dimensional.
141 |     ValueError: If `final_size` isn't a power of 2 or is less than 8.
142 |   """
143 |   normalizer_fn = slim.batch_norm
144 |   normalizer_fn_args = {
145 |       'is_training': is_training,
146 |       'zero_debias_moving_mean': True,
147 |       'fused': fused_batch_norm,
148 |   }
149 | 
150 |   inputs.get_shape().assert_has_rank(2)
151 |   if log(final_size, 2) != int(log(final_size, 2)):
152 |     raise ValueError('`final_size` (%i) must be a power of 2.' % final_size)
153 |   if final_size < 8:
154 |     raise ValueError('`final_size` (%i) must be greater than 8.' % final_size)
155 | 
156 |   end_points = {}
157 |   num_layers = int(log(final_size, 2)) - 1
158 |   with tf.variable_scope(scope, values=[inputs], reuse=reuse) as scope:
159 |     with slim.arg_scope([normalizer_fn], **normalizer_fn_args):
160 |       with slim.arg_scope([slim.conv2d_transpose],
161 |                           normalizer_fn=normalizer_fn,
162 |                           stride=2,
163 |                           kernel_size=4):
164 |         net = tf.expand_dims(tf.expand_dims(inputs, 1), 1)
165 | 
166 |         # First upscaling is different because it takes the input vector.
167 |         current_depth = depth * 2 ** (num_layers - 1)
168 |         scope = 'deconv1'
169 |         net = slim.conv2d_transpose(
170 |             net, current_depth, stride=1, padding='VALID', scope=scope)
171 |         end_points[scope] = net
172 | 
173 |         for i in xrange(2, num_layers):
174 |           scope = 'deconv%i' % (i)
175 |           current_depth = depth * 2 ** (num_layers - i)
176 |           net = slim.conv2d_transpose(net, current_depth, scope=scope)
177 |           end_points[scope] = net
178 | 
179 |         # Last layer has different normalizer and activation.
180 |         scope = 'deconv%i' % (num_layers)
181 |         net = slim.conv2d_transpose(
182 |             net, depth, normalizer_fn=None, activation_fn=None, scope=scope)
183 |         end_points[scope] = net
184 | 
185 |         # Convert to proper channels.
186 |         scope = 'logits'
187 |         logits = slim.conv2d(
188 |             net,
189 |             num_outputs,
190 |             normalizer_fn=None,
191 |             activation_fn=None,
192 |             kernel_size=1,
193 |             stride=1,
194 |             padding='VALID',
195 |             scope=scope)
196 |         end_points[scope] = logits
197 | 
198 |         logits.get_shape().assert_has_rank(4)
199 |         logits.get_shape().assert_is_compatible_with(
200 |             [None, final_size, final_size, num_outputs])
201 | 
202 |         return logits, end_points
203 | 


--------------------------------------------------------------------------------
/cal_IoU_gt_py.py:
--------------------------------------------------------------------------------
  1 | # -*- coding=utf-8 -*-
  2 | import numpy as np
  3 | import logging
  4 | from icdar import restore_rectangle
  5 | from multiprocessing import Pool
  6 | import logging
  7 | from shapely.geometry import Polygon
  8 | import multiprocessing
  9 | 
 10 | def cal_IoU_gt_py_multiprocess(pred_geo, pred_cls, gt, threshold=0.8):
 11 |     def compute_IoU(polygon1, polygon2):
 12 |         '''
 13 |         计算两个rect的IoU值
 14 |         :param polygon1: 4， 2
 15 |         :param polygon2: 4， 2
 16 |         :return: 0~1 value
 17 |         '''
 18 |         polygon1 = Polygon(polygon1)
 19 |         if not polygon1.is_valid:
 20 |             polygon1 = polygon1.buffer(0)
 21 |         polygon2 = Polygon(polygon2)
 22 |         if not polygon2.is_valid:
 23 |             polygon2 = polygon2.buffer(0)
 24 |         intersection_polygon = polygon1.intersection(polygon2)
 25 |         if not intersection_polygon.is_valid:
 26 |             return 0.0
 27 |         intersection_area = intersection_polygon.area
 28 |         uion_area = polygon1.area + polygon2.area - intersection_area
 29 |         return (1.0 * intersection_area) / (1.0 * uion_area)
 30 | 
 31 |     '''
 32 |     根据预测得到的pred_geo 和 pred_cls 我们针对每个pixel都可以计算他和ground truth的IoU值
 33 |     :param pred_geo: N, W, H, 5
 34 |     :param pred_cls: N, W, H, 1
 35 |     :param gt: N, M, 4, 2
 36 |     :param threshold: 0.8
 37 |     :return:
 38 |     '''
 39 |     # 删除纬度数是1的纬度
 40 | 
 41 |     print 'hello0'
 42 |     pred_cls = np.squeeze(pred_cls)
 43 |     shape = np.shape(pred_geo)
 44 |     IoU_gt = np.zeros([shape[0], shape[1], shape[2], 1], np.float32)
 45 | 
 46 |     for batch_id in range(shape[0]):
 47 |         process_num = 8
 48 |         pool = Pool(processes=process_num)
 49 |         print 'hello1'
 50 |         score_map = pred_cls[batch_id]
 51 |         geo_map = pred_geo[batch_id]
 52 |         cur_gt = gt[batch_id]
 53 | 
 54 |         print 'hello2'
 55 |         # print 'the shape of score_map is ', np.shape(score_map)
 56 |         # print 'the shape of geo_map is ', np.shape(geo_map)
 57 |         if len(np.shape(score_map)) != 2:
 58 |             logging.log(logging.ERROR, 'score map shape isn\'t correct!')
 59 |             assert False
 60 |         xy_text = np.argwhere(score_map > threshold)
 61 |         # sort the text boxes via the y axis
 62 |         xy_text = xy_text[np.argsort(xy_text[:, 0])]
 63 |         # print 'The number of points that satisfy the condition is ', len(xy_text)
 64 |         text_box_restored = restore_rectangle(xy_text[:, ::-1], geo_map[xy_text[:, 0], xy_text[:, 1], :])  # N*4*2
 65 |         # print np.shape(text_box_restored)
 66 | 
 67 |         pre_process_num = len(xy_text) / process_num + 1
 68 |         xss = {}
 69 |         yss = {}
 70 |         boxss = {}
 71 | 
 72 |         print 'hello3'
 73 |         for idx, ((x, y), box) in enumerate(zip(xy_text, text_box_restored)):
 74 |             process_id = idx / pre_process_num
 75 |             if process_id not in xss.keys():
 76 |                 xss[process_id] = []
 77 |                 yss[process_id] = []
 78 |                 boxss[process_id] = []
 79 |                 xss[process_id].append(x)
 80 |                 yss[process_id].append(y)
 81 |                 boxss[process_id].append(box)
 82 |             else:
 83 |                 xss[process_id].append(x)
 84 |                 yss[process_id].append(y)
 85 |                 boxss[process_id].append(box)
 86 | 
 87 |         print 'hello4'
 88 | 
 89 |         def process_single_test():
 90 |             return 1.0
 91 |         def process_single(boxs, cur_gt):
 92 |             print 'hello4-0'
 93 |             IoU_values = []
 94 |             print 'hello4-1'
 95 |             return np.random.random(len(boxs))
 96 |             for box in boxs:
 97 |                 cur_IoU_value = 0.0
 98 |                 print 'hello4-2'
 99 |                 for gt_id in range(len(cur_gt)):
100 |                     if np.sum(cur_gt[gt_id]) == -8:
101 |                         break
102 |                     cur_IoU_value = max(cur_IoU_value, compute_IoU(np.asarray(box), np.asarray(cur_gt[gt_id])))
103 |                 IoU_values.append(cur_IoU_value)
104 |                 print 'hello4-3'
105 |             print 'hello4-3'
106 |             return IoU_values
107 |         results = []
108 | 
109 |         print 'hello5'
110 |         for process_id in range(process_num):
111 |             print 'hello6'
112 |             # results.append(pool.apply_async(func=process_single, args=(boxss[process_id], cur_gt, )))
113 |             results.append(pool.apply_async(func=process_single_test, args=()))
114 |             print 'hello7'
115 |         pool.close()
116 |         pool.join()
117 | 
118 |         print 'hello8'
119 |         for process_id, res in enumerate(results):
120 |             xs = xss[process_id]
121 |             ys = yss[process_id]
122 | 
123 |             print 'hello9'
124 |             xs = np.asarray(xs)
125 |             ys = np.asarray(ys)
126 |             print np.shape(xs)
127 |             print np.shape(ys)
128 |             IoU_values = res.get()
129 |             xs = np.asarray(xs)
130 |             ys = np.asarray(ys)
131 |             print np.shape(IoU_values)
132 |             print np.shape(xs)
133 |             print np.shape(ys)
134 |             IoU_gt[batch_id, xs, ys, 0] = IoU_values
135 | 
136 |             print 'hello10'
137 | 
138 |         print 'hello11'
139 |     return IoU_gt
140 | 
141 | def cal_IoU_gt_py(pred_geo, pred_cls, gt, threshold=0.8):
142 |     def compute_IoU(polygon1, polygon2):
143 |         '''
144 |         计算两个rect的IoU值
145 |         :param polygon1: 4， 2
146 |         :param polygon2: 4， 2
147 |         :return: 0~1 value
148 |         '''
149 |         polygon1 = Polygon(polygon1)
150 |         if not polygon1.is_valid:
151 |             polygon1 = polygon1.buffer(0)
152 |         polygon2 = Polygon(polygon2)
153 |         if not polygon2.is_valid:
154 |             polygon2 = polygon2.buffer(0)
155 |         intersection_polygon = polygon1.intersection(polygon2)
156 |         if not intersection_polygon.is_valid:
157 |             return 0.0
158 |         intersection_area = intersection_polygon.area
159 |         uion_area = polygon1.area + polygon2.area - intersection_area
160 |         return (1.0 * intersection_area) / (1.0 * uion_area)
161 | 
162 |     '''
163 |     根据预测得到的pred_geo 和 pred_cls 我们针对每个pixel都可以计算他和ground truth的IoU值
164 |     :param pred_geo: N, W, H, 5
165 |     :param pred_cls: N, W, H, 1
166 |     :param gt: N, M, 4, 2
167 |     :param threshold: 0.8
168 |     :return:
169 |     '''
170 |     # 删除纬度数是1的纬度
171 | 
172 |     pred_cls = np.squeeze(pred_cls)
173 |     shape = np.shape(pred_geo)
174 |     IoU_gt = np.zeros([shape[0], shape[1], shape[2], 1], np.float32)
175 | 
176 |     for batch_id in range(shape[0]):
177 |         score_map = pred_cls[batch_id]
178 |         geo_map = pred_geo[batch_id]
179 |         cur_gt = gt[batch_id]
180 | 
181 |         if len(np.shape(score_map)) != 2:
182 |             logging.log(logging.ERROR, 'score map shape isn\'t correct!')
183 |             assert False
184 |         xy_text = np.argwhere(score_map > threshold)
185 |         # sort the text boxes via the y axis
186 |         xy_text = xy_text[np.argsort(xy_text[:, 0])]
187 |         # print 'The number of points that satisfy the condition is ', len(xy_text)
188 |         text_box_restored = restore_rectangle(xy_text[:, ::-1], geo_map[xy_text[:, 0], xy_text[:, 1], :])  # N*4*2
189 |         # print np.shape(text_box_restored)
190 | 
191 |         for idx, ((x, y), box) in enumerate(zip(xy_text, text_box_restored)):
192 |             cur_IoU_value = 0.0
193 |             for gt_id in range(len(cur_gt)):
194 |                 if np.sum(cur_gt[gt_id]) == -8:
195 |                     break
196 |                 cur_IoU_value = max(cur_IoU_value, compute_IoU(np.asarray(box), np.asarray(cur_gt[gt_id])))
197 |             IoU_gt[batch_id, x, y, 0] = cur_IoU_value
198 |     return IoU_gt
199 | 
200 | 
201 | if __name__ == '__main__':
202 |     # pred_geo = np.random.random([2, 512, 512, 5])
203 |     # pred_cls = np.random.random([2, 512, 512, 1])
204 |     # cal_IoU_gt_py(pred_geo, pred_cls, None)
205 |     def process_single_test():
206 |         print 'test'
207 |         return 1.0
208 |     process_num = 8
209 |     pool = Pool(processes=process_num)
210 |     results = []
211 |     for i in range(process_num):
212 |         results.append(pool.apply_async(func=process_single_test, args=()))
213 |     pool.close()
214 |     pool.join()
215 |     for i in range(process_num):
216 |         print results[i].get()
217 | 
218 | 


--------------------------------------------------------------------------------