├── .idea
└── Unicorn.iml
├── README.md
├── src
├── Application.py
├── DbBuild.py
├── FeatureExtract.py
├── FilesMaker.py
├── ImageDb.py
├── Performance.py
├── Util
│ ├── File.py
│ └── Path.py
├── config.py
├── inception
│ ├── image_processing.py
│ ├── inception_model.py
│ ├── inception_module.py
│ ├── losses.py
│ ├── ops.py
│ ├── scopes.py
│ ├── slim.py
│ └── variables.py
├── myComputePrecision.py
├── myGetFeatures.py
└── myRetrieval.py
└── test
└── MultiDbBulidTest.py
/.idea/Unicorn.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ImageRetrieval
2 |
3 | ## Requirements
4 | - Tensorflow
--------------------------------------------------------------------------------
/src/Application.py:
--------------------------------------------------------------------------------
1 | '''
2 | '''
3 | # coding = 'utf-8'
4 |
5 | import ImageDb
6 | import FeatureExtract
7 | import numpy as np
8 |
9 | class ImageFinder(object):
10 | def __init__(self,config):
11 | self.__imageDb = ImageDb.ImageDb(config['db'])
12 | self.__featureExtracter = FeatureExtract.FeatureExtracter(config['extracter'])
13 |
14 |
15 | def find(self, imagePath):
16 | '''查找图像的Label
17 |
18 | 参数
19 | imagePath: 图像文件路径
20 | '''
21 | hashFeature, imageFeature = self.__featureExtracter.extract(imagePath)
22 | # hashFeature = np.zeros(shape=[1, 128], dtype='float32')
23 | # imageFeature = np.zeros(shape=[1, 2048], dtype='float32')
24 | return self.__imageDb.find(hashFeature, imageFeature)
25 |
26 |
27 |
28 | if __name__ == '__main__':
29 |
30 | config = {
31 | 'db': {
32 | 'hashFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\1.bin',
33 | 'imageFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\2.bin',
34 | 'generatedLabelsFilename': r'D:\dev\unicorn\test\data\__NImage\feature\generatedLabels.txt',
35 | },
36 | 'extracter': {
37 | 'num_classes': 11,
38 | 'checkpoint_path': 'D:/dev/unicorn/model/model.ckpt-42100'
39 | }
40 | }
41 | imageFinder = ImageFinder(config)
42 | imagePath = r'D:\dev\unicorn\test\data\__NImage\image\0\0.jpg '
43 | imageFinder.find(imagePath)
44 |
45 |
--------------------------------------------------------------------------------
/src/DbBuild.py:
--------------------------------------------------------------------------------
1 | '''
2 | '''
3 | # coding = 'utf-8'
4 |
5 | import numpy as np
6 | import os
7 | import colorama
8 |
9 | from src import FeatureExtract
10 | from src.Util import Path
11 |
12 |
13 | class DbBuilder(object):
14 | def __init__(self, config, featureExtracter=None):
15 | self.__config = config
16 | self.__count = 0
17 | self.__hashFeatures = None
18 | self.__imageFeatures = None
19 | self.__featureExtracter = featureExtracter if featureExtracter is not None else FeatureExtract.FeatureExtracter(config['extracter'])
20 |
21 |
22 | def __initFeatures(self, totalImages):
23 | self.__hashFeatures = np.zeros(shape=[totalImages, 128], dtype='float32')
24 | self.__imageFeatures = np.zeros(shape=[totalImages, 2048], dtype='float32')
25 |
26 |
27 | def __add(self, hashFeature, imageFeature):
28 | self.__hashFeatures[self.__count] = hashFeature
29 | self.__imageFeatures[self.__count] = imageFeature
30 | self.__count += 1
31 |
32 |
33 | def __save(self):
34 | self.__hashFeatures.tofile(self.__config['db']['hashFeatureFilename'])
35 | self.__imageFeatures.tofile(self.__config['db']['imageFeatureFilename'])
36 | np.savetxt(self.__config['db']['labelsFilename'],self.__labels,fmt='%s',newline='\n')
37 |
38 | def build(self):
39 | '''构建图像特征数据库
40 | '''
41 | self.__count = 0
42 | with open(self.__config['imagePathLabelsFilename'], 'r') as file:
43 | imagePaths = [imagePath for imagePath, label in (line.strip().split(' ') for line in file)]
44 | with open(self.__config['imagePathLabelsFilename'], 'r') as file:
45 | self.__labels = [label for imagePath, label in (line.strip().split(' ') for line in file)]
46 | #imagePaths = [i for i in Path.ilistFileEx(self.__config['imageRoot'])]
47 | self.__initFeatures(len(imagePaths))
48 | for imagePath in imagePaths:
49 | hashFeature, imageFeature = self.__featureExtracter.extract(imagePath)
50 | self.__add(hashFeature, imageFeature)
51 | self.__save()
52 |
53 |
54 |
55 | if __name__ == '__main__':
56 | # config = {
57 | # #'imageRoot': r'D:\dev\unicorn\test\data\__NImage\image',
58 | # 'db': {
59 | # 'hashFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\1.bin',
60 | # 'imageFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\2.bin',
61 | # 'generatedLabelsFilename': r'D:\dev\unicorn\test\data\__NImage\feature\generatedLabels.txt',
62 | # },
63 | # 'imagePathLabelsFilename': r'D:\dev\unicorn\test\data\__NImage\feature\imagePathLabels.txt',
64 | # 'extracter': {
65 | # 'num_classes': 11,
66 | # 'checkpoint_path': 'D:/dev/unicorn/model/model.ckpt-42100'
67 | # }
68 | # }
69 | # builder = DbBuilder(config)
70 | # builder.build()
71 | pass
72 |
--------------------------------------------------------------------------------
/src/FeatureExtract.py:
--------------------------------------------------------------------------------
1 | '''
2 | '''
3 | # coding = 'utf-8'
4 |
5 | import tensorflow as tf
6 | import numpy as np
7 | from src.inception import inception_module
8 |
9 | class FeatureExtracter():
10 | def __init__(self,config):
11 | self.__config=config
12 | self.x_image = None
13 | self.__image_buffer = None
14 | self.logits = None
15 | self.highfeatures = None
16 | self.ssss = None
17 | self.__output = None
18 | self.saver = None
19 | self.__sess = None
20 | self.__sess = None
21 | self.__buildNet()
22 | self.__loadModel()
23 |
24 | def __loadModel(self):
25 | self.saver.restore(self.__sess, self.__config['checkpoint_path'])
26 |
27 | def __buildInputImagePlaceholder(self):
28 | self.__image_buffer = tf.placeholder("string")
29 | image = tf.image.decode_jpeg(self.__image_buffer, channels=3)
30 | image = tf.image.convert_image_dtype(image, dtype=tf.float32)
31 | image = tf.image.central_crop(image, central_fraction=0.875)
32 | # Resize the image to the original height and width.
33 | image = tf.expand_dims(image, 0)
34 | image = tf.image.resize_bilinear(image, [299, 299], align_corners=False)
35 | image_tensor = tf.squeeze(image, [0])
36 | self.x_image = tf.reshape(image_tensor, [-1, 299, 299, 3])
37 |
38 | def __buildNet(self):
39 | #graph = tf.Graph().as_default()
40 | # Number of classes in the dataset label set plus 1.
41 | # Label 0 is reserved for an (unused) background class.
42 | num_classes = self.__config['num_classes']+1
43 | # setup an input image placeholder to feed image buffer
44 | self.__buildInputImagePlaceholder()
45 | # Build a Graph that computes the logits predictions from the inference model.
46 | # WARNING!!!
47 | self.logits,self.highfeatures, self.ssss = inception_module.inference(self.x_image, num_classes)
48 | # result is the output of the softmax unit
49 | self.__output = tf.nn.softmax(self.ssss, name="result")
50 | # Restore the moving average version of the learned variables for eval.
51 | variable_averages = tf.train.ExponentialMovingAverage(
52 | inception_module.MOVING_AVERAGE_DECAY)
53 | variables_to_restore = variable_averages.variables_to_restore()
54 | self.saver = tf.train.Saver(variables_to_restore)
55 | self.__sess = tf.Session()
56 |
57 | def extract(self, image_path):
58 | '''提取图像特征
59 |
60 | 参数
61 | image_path: 图像检索路径
62 | '''
63 | image_data = tf.gfile.FastGFile(image_path, 'rb').read()
64 | output, hashFeature, imageFeature = self.__sess.run([self.__output, self.logits, self.highfeatures],
65 | feed_dict={self.__image_buffer: image_data})
66 | hashFeature = np.squeeze(hashFeature)
67 | imageFeature = np.squeeze(imageFeature[0])
68 | imageFeature = np.array([imageFeature])
69 | hashFeature = np.array([hashFeature])
70 | for i in range(len(hashFeature[0])):
71 | if hashFeature[0][i] > 0.5:
72 | hashFeature[0][i] = 1
73 | else:
74 | hashFeature[0][i] = 0
75 | return hashFeature, imageFeature
76 |
77 | if __name__ == '__main__':
78 | pass
--------------------------------------------------------------------------------
/src/FilesMaker.py:
--------------------------------------------------------------------------------
1 | '''
2 | '''
3 | # coding = 'utf-8'
4 | from Util import Path
5 | from Util import File
6 | from pathlib import PurePath
7 | import itertools
8 | import operator
9 | import numpy as np
10 | import subprocess
11 | import collections
12 | import functools
13 |
14 | ResolveResult = collections.namedtuple('ResolveResult', ['label', 'path', 'alias'])
15 |
16 |
17 | @functools.lru_cache()
18 | def _resolve(path):
19 | _path = PurePath(path)
20 | label = _path.parts[-2]
21 | imgAlias = '_'.join([label, _path.stem])
22 | return ResolveResult(label, path, imgAlias)
23 |
24 |
25 | @functools.lru_cache()
26 | def _extractFields(tp, fields):
27 | return ' '.join([_extractField(field, tp) for field in fields])
28 |
29 |
30 | @functools.singledispatch
31 | def _extractField(field, tp):
32 | pass
33 |
34 |
35 | @_extractField.register(str)
36 | def _(field, tp):
37 | return operator.attrgetter(field)(tp)
38 |
39 |
40 | @_extractField.register(int)
41 | def _(field, tp):
42 | return operator.itemgetter(field)(tp)
43 |
44 |
45 | _RESOLVER_BY_NAME = {
46 | 'default': _resolve
47 | }
48 |
49 |
50 | def makeConfigFile(config, resolver=None, openFileAfterWrite=False):
51 | imageRoot = config['imageRoot']
52 | resolver = resolver or _RESOLVER_BY_NAME[config.get('resolver', 'default')]
53 |
54 | for filename, fields in config['targets']:
55 | _fields = fields if isinstance(fields, tuple) else tuple(fields)
56 |
57 |
58 | seq = (_extractFields(resolver(imagePath), _fields) for imagePath in Path.ilistFileEx(imageRoot))
59 |
60 | File.writeLines(filename,seq)
61 | if openFileAfterWrite:
62 | subprocess.Popen(f'notepad {filename}', shell=True)
63 |
64 |
65 | if __name__ == '__main__':
66 | # config = {
67 | # 'imageRoot': r'D:\dev\unicorn\test\data\__NImage\image',
68 | # 'targets': [(r'D:\dev\unicorn\test\data\__NImage\feature\imagePathLabels.txt', ['path','label']),
69 | # (r'D:\dev\unicorn\test\data\__NImage\feature\imagePathAlias.txt', ['path', 'alias']),
70 | # (r'D:\dev\unicorn\test\data\__NImage\feature\aliasLabel.txt',['alias','label'])]
71 | # }
72 | # # FM = FileMaker(config)
73 | # # FM.run()
74 | # makeConfigFile(config, openFileAfterWrite=True)
75 |
76 |
77 | pass
78 |
--------------------------------------------------------------------------------
/src/ImageDb.py:
--------------------------------------------------------------------------------
1 | '''
2 | '''
3 | # coding = 'utf-8'
4 |
5 | import cv2
6 | import numpy as np
7 |
8 |
9 | class ImageDb(object):
10 | '''图像库
11 | '''
12 | _FLANN_INDEX_KDTREE = 1
13 | _FLANN_INDEX_PARAM = {'algorithm': _FLANN_INDEX_KDTREE, 'trees': 4}
14 | _K_OF_KNN = 1
15 | _HASH_FEATURE_DIM = 128
16 | _IMGE_FEATURE_DIM = 2048
17 |
18 |
19 | def __init__(self, config):
20 | '''
21 | 参数
22 | config: 配置
23 | '''
24 |
25 | self.__config = config
26 | self.__hashFeatures = np.fromfile(config['hashFeatureFilename'], dtype=np.float32)
27 | self.__hashFeatures.shape = self.__hashFeatures.size // ImageDb._HASH_FEATURE_DIM, ImageDb._HASH_FEATURE_DIM
28 | self.__imageFeatures = np.fromfile(config['imageFeatureFilename'],dtype=np.float32)
29 | self.__imageFeatures.shape = self.__imageFeatures.size // ImageDb._IMGE_FEATURE_DIM, ImageDb._IMGE_FEATURE_DIM
30 | # self.__imageLabels = np.fromfile(config['imageLabelsFilename'],dtype=np.string_)
31 | imageLabels = np.loadtxt(config['labelsFilename'],dtype=np.string_).astype(str).tolist()
32 | self.__imageLabels = imageLabels if isinstance(imageLabels, list) else [imageLabels]
33 | self.__indexer = cv2.flann_Index(self.__hashFeatures, ImageDb._FLANN_INDEX_PARAM)
34 |
35 |
36 | def find(self, hashFeature, imageFeature):
37 | '''查找图像Label
38 |
39 | 参数
40 | hashFeature: 图像的Hash特征
41 | imageFeature: 图像特征
42 | '''
43 | candidates = self.__findCandidate(hashFeature)
44 | refinedCandidates = self.__refineCandidate(candidates, imageFeature)
45 | label = [self.__imageLabels[refinedCandidate] for refinedCandidate in refinedCandidates]
46 | print(label)
47 | return label
48 |
49 |
50 | def __findCandidate(self, hashFeature):
51 | _seedCandidates, _ = self.__indexer.knnSearch(hashFeature, ImageDb._K_OF_KNN, params={})
52 | seedCandidates = _seedCandidates[0].tolist()
53 | candidates = list(seedCandidates)
54 | for i in seedCandidates:
55 | #all(1)输出一个bool型矩阵,每个元素指示原矩阵一行中所有元素是否都是true
56 | auxCandidates = np.where((self.__hashFeatures == self.__hashFeatures[i]).all(1))[0]
57 | candidates.extend(auxCandidates)
58 | return list(set(candidates))
59 |
60 |
61 | def __refineCandidate(self, candidates, imageFeature):
62 | distWithQuery = lambda index: np.linalg.norm(self.__imageFeatures[index] - imageFeature[0])
63 | refinedCandidates = min(candidates, key=distWithQuery)
64 | return [refinedCandidates]
65 |
66 |
67 | if __name__ == '__main__':
68 | pass
69 |
--------------------------------------------------------------------------------
/src/Performance.py:
--------------------------------------------------------------------------------
1 | '''
2 | '''
3 | # coding = 'utf-8'
4 | import sys
5 | sys.path.append('d:/dev/unicorn/')
6 | from src import Application
7 | import FilesMaker
8 | import DbBuild
9 | from pathlib import PurePath
10 |
11 |
12 | def fakeIlistFileEx(num,name,_):
13 | for i in range(num):
14 | yield name
15 |
16 |
17 | class PerformanceEvaluater(object):
18 | def __init__(self,config):
19 | self.__actual = None
20 | self.__expected = None
21 | self.__config = config
22 |
23 |
24 | def evaluate(self):
25 | self.__getActual()
26 | self.__loadExpected()
27 | isOK, reason = self.__compare()
28 | print(reason if not isOK else 'ok')
29 |
30 |
31 | def __getActual(self):
32 | imageFinder = Application.ImageFinder(self.__config)
33 | print('Retrieval Start!')
34 | self.__actual ={}
35 | with open(self.__config['imagePathAlias'], 'r') as file:
36 | #WARNING!!!imageFinder.find()目前只能返回1个结果
37 | count = 0
38 | for imagePath, alias in (line.strip().split(' ') for line in file):
39 | self.__actual[alias] = imageFinder.find(imagePath)[0]
40 | count += 1
41 | import time
42 | time.sleep(0.0001)
43 | if count%100 == 0:
44 | print(f'Processing Up To {count} Images', end='\r')
45 | print(f'Processed {count} Images Totally')
46 |
47 | #self.__actual = {alias:imageFinder.find(imagePath)[0] for imagePath,alias in (line.strip().split(' ') for line in file)}
48 |
49 |
50 | def __loadExpected(self):
51 | with open(self.__config['groundTruth'], 'r') as file:
52 | self.__expected = dict(line.strip().split(' ') for line in file)
53 |
54 |
55 | def __compare(self):
56 | print('Compute Accuracy Start!')
57 | if set(self.__actual.keys()) != set(self.__expected.keys()):
58 | return False,'keyNum is wrong!'
59 | # wrong = 0
60 | # for (k,v) in self.__actual.items():
61 | # wrong += 1 if self.__expected[k] != v else 0
62 | # print(1-wrong/len(self.__expected))
63 | print('Compute Accuracy Completed ,is',len(set(self.__actual.items()) & set(self.__expected.items())) / len(self.__expected))
64 |
65 | return True,'OK'
66 |
67 | from time import clock
68 | if __name__ == '__main__':
69 | config = {
70 | #15张图
71 | #'imageRoot': r'D:\dev\unicorn\test\data\__NImage\image',
72 | #10k张图
73 | #'imageRoot': r'D:\1000000\image\images0\images\0',
74 | #100k张图
75 | 'imageRoot': r'D:\1000000\image\images0\images',
76 |
77 | 'targets': [(r'D:\dev\unicorn\test\data\__NImage\feature\imagePathLabels.txt', ['path', 'label']),
78 | (r'D:\dev\unicorn\test\data\__NImage\feature\imagePathAlias.txt', ['path', 'alias']),
79 | (r'D:\dev\unicorn\test\data\__NImage\feature\aliasLabel.txt', ['alias', 'label'])],
80 | 'db': {
81 | 'hashFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\1.bin',
82 | 'imageFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\2.bin',
83 | 'labelsFilename': r'D:\dev\unicorn\test\data\__NImage\feature\labels.txt',
84 | },
85 |
86 | 'imagePathLabelsFilename': r'D:\dev\unicorn\test\data\__NImage\feature\imagePathLabels.txt',
87 | 'groundTruth': r'D:\dev\unicorn\test\data\__NImage\feature\aliasLabel.txt',
88 | 'imagePathAlias': r'D:\dev\unicorn\test\data\__NImage\feature\imagePathAlias.txt',
89 | # 'sample': [0, 10, 20]
90 | 'extracter': {
91 | 'num_classes': 11,
92 | 'checkpoint_path': 'D:/dev/unicorn/model/model.ckpt-42100'
93 | }
94 |
95 | }
96 |
97 | start = clock()
98 | # step1
99 | def _resolve2(path):
100 | _path = PurePath(path)
101 | label = '_'.join([_path.parts[-2], _path.stem])
102 | imgAlias = '_'.join([label, _path.stem])
103 | return FilesMaker.ResolveResult(label, path, imgAlias)
104 | FilesMaker.makeConfigFile(config, resolver=_resolve2, openFileAfterWrite=True)
105 | finish=clock()
106 | print (finish-start)
107 |
108 | # WARNING!:imageFinder对象被重复构建
109 | # step2
110 | builder = DbBuild.DbBuilder(config)
111 | builder.build()
112 | finish=clock()
113 | print (finish-start)
114 |
115 | # step3
116 | #
117 | # PE = PerformanceEvaluater(config)
118 | # PE.evaluate()
119 | # finish=clock()
120 | # print (f'Time Consumed {finish-start} s')
--------------------------------------------------------------------------------
/src/Util/File.py:
--------------------------------------------------------------------------------
1 | # -- coding:utf-8 --
2 |
3 | def writeLines(filename:str, content):
4 | with open(filename, 'w') as file:
5 | file.writelines((str(line) + '\n') for line in content)
6 |
7 | if __name__ == '__main__':
8 | pass
--------------------------------------------------------------------------------
/src/Util/Path.py:
--------------------------------------------------------------------------------
1 | # -- coding:utf-8 --
2 |
3 | import os
4 |
5 | # pip install pypiwin32
6 | import pythoncom
7 | from win32com.shell import shell
8 |
9 | def ilistFiles(dir):
10 | '''获取目录下的所有文件列表(generator)
11 |
12 | 参数
13 | dir : 目录名
14 | '''
15 |
16 | _, _, files = next(os.walk(dir))
17 | return (os.path.join(dir, file) for file in files)
18 |
19 | def listFiles(dir):
20 | '''获取目录下的所有文件列表(list)
21 |
22 | 参数
23 | dir : 目录名
24 | '''
25 |
26 | _, _, files = next(os.walk(dir))
27 | #print([os.path.join(dir, file) for file in files])
28 | return [os.path.join(dir, file) for file in files]
29 |
30 | def _getShorcutRealPath(path):
31 | try:
32 | pythoncom.CoInitialize()
33 | shortcut = pythoncom.CoCreateInstance(
34 | shell.CLSID_ShellLink,
35 | None,
36 | pythoncom.CLSCTX_INPROC_SERVER,
37 | shell.IID_IShellLink)
38 | shortcut.QueryInterface(pythoncom.IID_IPersistFile).Load(path)
39 | realPath = shortcut.GetPath(shell.SLGP_SHORTPATH)[0]
40 | return realPath
41 | except Exception as err:
42 | return path
43 |
44 | def ilistFileEx(dir, recursive=True):
45 | '''获取目录下所有文件列表(generator)
46 |
47 | 参数
48 | dir: 目录名
49 | '''
50 | for root, dirs, files in os.walk(dir):
51 | for file in files:
52 | # 不是.lnk文件,返回文件名
53 | if os.path.splitext(file)[1].lower() != '.lnk':
54 | yield os.path.normpath(os.path.join(root, file))
55 | continue
56 |
57 | # 是.lnk文件,但不是快捷方式,返回文件名
58 | realName = _getShorcutRealPath(os.path.join(root, file))
59 | if not realName:
60 | yield os.path.normpath(os.path.join(root, file))
61 | continue
62 |
63 | # 快捷方式指向文件,返回真实文件名
64 | if not os.path.isdir(realName):
65 | yield os.path.normpath(realName)
66 | continue
67 |
68 | # 快捷方式指向目录,则遍历该目录
69 | yield from ilistFileEx(realName)
70 |
71 | for dir in dirs:
72 | yield from ilistFileEx(dir)
73 |
74 |
75 | if __name__ == '__main__':
76 | pass
77 | # i1 = [i for i in ilistFileEx(r'D:\dev\unicorn\test\data\__NImage\image')]
78 | # print(i1)
--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 |
5 | class Config():
6 | instance = None
7 |
8 | def __init__(self):
9 | # self.currenpath = sys.path[0]
10 |
11 | # wu
12 | # self.currenpath = "/home/imc/caffe/poster/0406/tensorflow"
13 | self.currenpath = "e:/caffe_poster_0406_tensorflow"
14 |
15 | # self.library_path = "/home/imc/caffe/poster/0406/library"
16 | # wu
17 | # self.library_path = "/home/imc/models-master/inception/mydata/postertest1"
18 | self.library_path = "e:/models-master/inception/mydata/postertest1"
19 |
20 | self.hashfeature_path = self.currenpath + "/hashfeature.bin"
21 | self.feature_path = self.currenpath + "/feature.bin"
22 | self.groundtruth_path = self.currenpath + "/groundtruth.txt"
23 | self.FEATURE_LENGTH = 2048
24 | self.HASH_LENGTH = 128
25 |
26 | self.caffe_root = '/home/imc/caffe/'
27 | self.net_file = self.caffe_root + 'poster/models/bvlc_alexnet/deploy_hash.prototxt'
28 | self.caffe_model = self.caffe_root + 'poster/models/bvlc_alexnet/caffe_alexnet_train_iter_826.caffemodel'
29 | self.mean_file = self.caffe_root + 'poster/data/all/mean.npy'
30 |
31 | self.khash = 5
32 |
33 | def Getlibrary_path(self):
34 | return self.library_path
35 |
36 | def Getgroundtruth_path(self):
37 | return self.groundtruth_path
38 |
39 | def Gethashfeature_path(self):
40 | return self.hashfeature_path
41 |
42 | def Getfeature_path(self):
43 | return self.feature_path
44 |
45 | def GetFEATURE_LENGTH(self):
46 | return self.FEATURE_LENGTH
47 |
48 | def GetHASH_LENGTH(self):
49 | return self.HASH_LENGTH
50 |
51 | def Getcaffe_root(self):
52 | return self.caffe_root
53 |
54 | def Getnet_file(self):
55 | return self.net_file
56 |
57 | def Getcaffe_model(self):
58 | return self.caffe_model
59 |
60 | def Getmean_file(self):
61 | return self.mean_file
62 |
63 | def Getkhash(self):
64 | return self.khash
65 |
66 |
67 | a = Config();
68 |
69 |
--------------------------------------------------------------------------------
/src/inception/image_processing.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Read and preprocess image data.
16 |
17 | Image processing occurs on a single image at a time. Image are read and
18 | preprocessed in pararllel across mulitple threads. The resulting images
19 | are concatenated together to form a single batch for training or evaluation.
20 |
21 | -- Provide processed image data for a network:
22 | inputs: Construct batches of evaluation examples of images.
23 | distorted_inputs: Construct batches of training examples of images.
24 | batch_inputs: Construct batches of training or evaluation examples of images.
25 |
26 | -- Data processing:
27 | parse_example_proto: Parses an Example proto containing a training example
28 | of an image.
29 |
30 | -- Image decoding:
31 | decode_jpeg: Decode a JPEG encoded string into a 3-D float32 Tensor.
32 |
33 | -- Image preprocessing:
34 | image_preprocessing: Decode and preprocess one image for evaluation or training
35 | distort_image: Distort one image for training a network.
36 | eval_image: Prepare one image for evaluation.
37 | distort_color: Distort the color in one image for training.
38 | """
39 | from __future__ import absolute_import
40 | from __future__ import division
41 | from __future__ import print_function
42 |
43 | import tensorflow as tf
44 |
45 | FLAGS = tf.app.flags.FLAGS
46 |
47 | tf.app.flags.DEFINE_integer('batch_size', 32,
48 | """Number of images to process in a batch.""")
49 | tf.app.flags.DEFINE_integer('image_size', 299,
50 | """Provide square images of this size.""")
51 | tf.app.flags.DEFINE_integer('num_preprocess_threads', 4,
52 | """Number of preprocessing threads per tower. """
53 | """Please make this a multiple of 4.""")
54 | tf.app.flags.DEFINE_integer('num_readers', 4,
55 | """Number of parallel readers during train.""")
56 |
57 | # Images are preprocessed asynchronously using multiple threads specifed by
58 | # --num_preprocss_threads and the resulting processed images are stored in a
59 | # random shuffling queue. The shuffling queue dequeues --batch_size images
60 | # for processing on a given Inception tower. A larger shuffling queue guarantees
61 | # better mixing across examples within a batch and results in slightly higher
62 | # predictive performance in a trained model. Empirically,
63 | # --input_queue_memory_factor=16 works well. A value of 16 implies a queue size
64 | # of 1024*16 images. Assuming RGB 299x299 images, this implies a queue size of
65 | # 16GB. If the machine is memory limited, then decrease this factor to
66 | # decrease the CPU memory footprint, accordingly.
67 | tf.app.flags.DEFINE_integer('input_queue_memory_factor', 16,
68 | """Size of the queue of preprocessed images. """
69 | """Default is ideal but try smaller values, e.g. """
70 | """4, 2 or 1, if host memory is constrained. See """
71 | """comments in code for more details.""")
72 |
73 |
74 | def inputs(dataset, batch_size=None, num_preprocess_threads=None):
75 | """Generate batches of ImageNet images for evaluation.
76 |
77 | Use this function as the inputs for evaluating a network.
78 |
79 | Note that some (minimal) image preprocessing occurs during evaluation
80 | including central cropping and resizing of the image to fit the network.
81 |
82 | Args:
83 | dataset: instance of Dataset class specifying the dataset.
84 | batch_size: integer, number of examples in batch
85 | num_preprocess_threads: integer, total number of preprocessing threads but
86 | None defaults to FLAGS.num_preprocess_threads.
87 |
88 | Returns:
89 | images: Images. 4D tensor of size [batch_size, FLAGS.image_size,
90 | image_size, 3].
91 | labels: 1-D integer Tensor of [FLAGS.batch_size].
92 | """
93 | if not batch_size:
94 | batch_size = FLAGS.batch_size
95 |
96 | # Force all input processing onto CPU in order to reserve the GPU for
97 | # the forward inference and back-propagation.
98 | with tf.device('/cpu:0'):
99 | images, labels = batch_inputs(
100 | dataset, batch_size, train=False,
101 | num_preprocess_threads=num_preprocess_threads,
102 | num_readers=1)
103 |
104 | return images, labels
105 |
106 |
107 | def distorted_inputs(dataset, batch_size=None, num_preprocess_threads=None):
108 | """Generate batches of distorted versions of ImageNet images.
109 |
110 | Use this function as the inputs for training a network.
111 |
112 | Distorting images provides a useful technique for augmenting the data
113 | set during training in order to make the network invariant to aspects
114 | of the image that do not effect the label.
115 |
116 | Args:
117 | dataset: instance of Dataset class specifying the dataset.
118 | batch_size: integer, number of examples in batch
119 | num_preprocess_threads: integer, total number of preprocessing threads but
120 | None defaults to FLAGS.num_preprocess_threads.
121 |
122 | Returns:
123 | images: Images. 4D tensor of size [batch_size, FLAGS.image_size,
124 | FLAGS.image_size, 3].
125 | labels: 1-D integer Tensor of [batch_size].
126 | """
127 | if not batch_size:
128 | batch_size = FLAGS.batch_size
129 |
130 | # Force all input processing onto CPU in order to reserve the GPU for
131 | # the forward inference and back-propagation.
132 | with tf.device('/cpu:0'):
133 | images, labels = batch_inputs(
134 | dataset, batch_size, train=True,
135 | num_preprocess_threads=num_preprocess_threads,
136 | num_readers=FLAGS.num_readers)
137 | return images, labels
138 |
139 |
140 | def decode_jpeg(image_buffer, scope=None):
141 | """Decode a JPEG string into one 3-D float image Tensor.
142 |
143 | Args:
144 | image_buffer: scalar string Tensor.
145 | scope: Optional scope for op_scope.
146 | Returns:
147 | 3-D float Tensor with values ranging from [0, 1).
148 | """
149 | with tf.op_scope([image_buffer], scope, 'decode_jpeg'):
150 | # Decode the string as an RGB JPEG.
151 | # Note that the resulting image contains an unknown height and width
152 | # that is set dynamically by decode_jpeg. In other words, the height
153 | # and width of image is unknown at compile-time.
154 | image = tf.image.decode_jpeg(image_buffer, channels=3)
155 |
156 | # After this point, all image pixels reside in [0,1)
157 | # until the very end, when they're rescaled to (-1, 1). The various
158 | # adjust_* ops all require this range for dtype float.
159 | image = tf.image.convert_image_dtype(image, dtype=tf.float32)
160 | return image
161 |
162 |
163 | def distort_color(image, thread_id=0, scope=None):
164 | """Distort the color of the image.
165 |
166 | Each color distortion is non-commutative and thus ordering of the color ops
167 | matters. Ideally we would randomly permute the ordering of the color ops.
168 | Rather then adding that level of complication, we select a distinct ordering
169 | of color ops for each preprocessing thread.
170 |
171 | Args:
172 | image: Tensor containing single image.
173 | thread_id: preprocessing thread ID.
174 | scope: Optional scope for op_scope.
175 | Returns:
176 | color-distorted image
177 | """
178 | with tf.op_scope([image], scope, 'distort_color'):
179 | color_ordering = thread_id % 2
180 |
181 | if color_ordering == 0:
182 | image = tf.image.random_brightness(image, max_delta=32. / 255.)
183 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
184 | image = tf.image.random_hue(image, max_delta=0.2)
185 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
186 | elif color_ordering == 1:
187 | image = tf.image.random_brightness(image, max_delta=32. / 255.)
188 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
189 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
190 | image = tf.image.random_hue(image, max_delta=0.2)
191 |
192 | # The random_* ops do not necessarily clamp.
193 | image = tf.clip_by_value(image, 0.0, 1.0)
194 | return image
195 |
196 |
197 | def distort_image(image, height, width, bbox, thread_id=0, scope=None):
198 | """Distort one image for training a network.
199 |
200 | Distorting images provides a useful technique for augmenting the data
201 | set during training in order to make the network invariant to aspects
202 | of the image that do not effect the label.
203 |
204 | Args:
205 | image: 3-D float Tensor of image
206 | height: integer
207 | width: integer
208 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
209 | where each coordinate is [0, 1) and the coordinates are arranged
210 | as [ymin, xmin, ymax, xmax].
211 | thread_id: integer indicating the preprocessing thread.
212 | scope: Optional scope for op_scope.
213 | Returns:
214 | 3-D float Tensor of distorted image used for training.
215 | """
216 | with tf.op_scope([image, height, width, bbox], scope, 'distort_image'):
217 | # Each bounding box has shape [1, num_boxes, box coords] and
218 | # the coordinates are ordered [ymin, xmin, ymax, xmax].
219 |
220 | # Display the bounding box in the first thread only.
221 | if not thread_id:
222 | image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
223 | bbox)
224 | tf.image_summary('image_with_bounding_boxes', image_with_box)
225 |
226 | # A large fraction of image datasets contain a human-annotated bounding
227 | # box delineating the region of the image containing the object of interest.
228 | # We choose to create a new bounding box for the object which is a randomly
229 | # distorted version of the human-annotated bounding box that obeys an allowed
230 | # range of aspect ratios, sizes and overlap with the human-annotated
231 | # bounding box. If no box is supplied, then we assume the bounding box is
232 | # the entire image.
233 | sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
234 | tf.shape(image),
235 | bounding_boxes=bbox,
236 | min_object_covered=0.1,
237 | aspect_ratio_range=[0.75, 1.33],
238 | area_range=[0.05, 1.0],
239 | max_attempts=100,
240 | use_image_if_no_bounding_boxes=True)
241 | bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
242 | if not thread_id:
243 | image_with_distorted_box = tf.image.draw_bounding_boxes(
244 | tf.expand_dims(image, 0), distort_bbox)
245 | tf.image_summary('images_with_distorted_bounding_box',
246 | image_with_distorted_box)
247 |
248 | # Crop the image to the specified bounding box.
249 | distorted_image = tf.slice(image, bbox_begin, bbox_size)
250 |
251 | # This resizing operation may distort the images because the aspect
252 | # ratio is not respected. We select a resize method in a round robin
253 | # fashion based on the thread number.
254 | # Note that ResizeMethod contains 4 enumerated resizing methods.
255 | resize_method = thread_id % 4
256 | distorted_image = tf.image.resize_images(distorted_image, height, width,
257 | resize_method)
258 | # Restore the shape since the dynamic slice based upon the bbox_size loses
259 | # the third dimension.
260 | distorted_image.set_shape([height, width, 3])
261 | if not thread_id:
262 | tf.image_summary('cropped_resized_image',
263 | tf.expand_dims(distorted_image, 0))
264 |
265 | # Randomly flip the image horizontally.
266 | distorted_image = tf.image.random_flip_left_right(distorted_image)
267 |
268 | # Randomly distort the colors.
269 | distorted_image = distort_color(distorted_image, thread_id)
270 |
271 | if not thread_id:
272 | tf.image_summary('final_distorted_image',
273 | tf.expand_dims(distorted_image, 0))
274 | return distorted_image
275 |
276 |
277 | def eval_image(image, height, width, scope=None):
278 | """Prepare one image for evaluation.
279 |
280 | Args:
281 | image: 3-D float Tensor
282 | height: integer
283 | width: integer
284 | scope: Optional scope for op_scope.
285 | Returns:
286 | 3-D float Tensor of prepared image.
287 | """
288 | with tf.op_scope([image, height, width], scope, 'eval_image'):
289 | # Crop the central region of the image with an area containing 87.5% of
290 | # the original image.
291 | image = tf.image.central_crop(image, central_fraction=0.875)
292 |
293 | # Resize the image to the original height and width.
294 | image = tf.expand_dims(image, 0)
295 | image = tf.image.resize_bilinear(image, [height, width],
296 | align_corners=False)
297 | image = tf.squeeze(image, [0])
298 | return image
299 |
300 |
301 | def image_preprocessing(image_buffer, bbox, train, thread_id=0):
302 | """Decode and preprocess one image for evaluation or training.
303 |
304 | Args:
305 | image_buffer: JPEG encoded string Tensor
306 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
307 | where each coordinate is [0, 1) and the coordinates are arranged as
308 | [ymin, xmin, ymax, xmax].
309 | train: boolean
310 | thread_id: integer indicating preprocessing thread
311 |
312 | Returns:
313 | 3-D float Tensor containing an appropriately scaled image
314 |
315 | Raises:
316 | ValueError: if user does not provide bounding box
317 | """
318 | if bbox is None:
319 | raise ValueError('Please supply a bounding box.')
320 |
321 | image = decode_jpeg(image_buffer)
322 | height = FLAGS.image_size
323 | width = FLAGS.image_size
324 |
325 | if train:
326 | image = distort_image(image, height, width, bbox, thread_id)
327 | else:
328 | image = eval_image(image, height, width)
329 |
330 | # Finally, rescale to [-1,1] instead of [0, 1)
331 | image = tf.sub(image, 0.5)
332 | image = tf.mul(image, 2.0)
333 | return image
334 |
335 |
336 | def parse_example_proto(example_serialized):
337 | """Parses an Example proto containing a training example of an image.
338 |
339 | The output of the build_image_data.py image preprocessing script is a dataset
340 | containing serialized Example protocol buffers. Each Example proto contains
341 | the following fields:
342 |
343 | image/height: 462
344 | image/width: 581
345 | image/colorspace: 'RGB'
346 | image/channels: 3
347 | image/class/label: 615
348 | image/class/synset: 'n03623198'
349 | image/class/text: 'knee pad'
350 | image/object/bbox/xmin: 0.1
351 | image/object/bbox/xmax: 0.9
352 | image/object/bbox/ymin: 0.2
353 | image/object/bbox/ymax: 0.6
354 | image/object/bbox/label: 615
355 | image/format: 'JPEG'
356 | image/filename: 'ILSVRC2012_val_00041207.JPEG'
357 | image/encoded:
358 |
359 | Args:
360 | example_serialized: scalar Tensor tf.string containing a serialized
361 | Example protocol buffer.
362 |
363 | Returns:
364 | image_buffer: Tensor tf.string containing the contents of a JPEG file.
365 | label: Tensor tf.int32 containing the label.
366 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
367 | where each coordinate is [0, 1) and the coordinates are arranged as
368 | [ymin, xmin, ymax, xmax].
369 | text: Tensor tf.string containing the human-readable label.
370 | """
371 | # Dense features in Example proto.
372 | feature_map = {
373 | 'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
374 | default_value=''),
375 | 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64,
376 | default_value=-1),
377 | 'image/class/text': tf.FixedLenFeature([], dtype=tf.string,
378 | default_value=''),
379 | }
380 | sparse_float32 = tf.VarLenFeature(dtype=tf.float32)
381 | # Sparse features in Example proto.
382 | feature_map.update(
383 | {k: sparse_float32 for k in ['image/object/bbox/xmin',
384 | 'image/object/bbox/ymin',
385 | 'image/object/bbox/xmax',
386 | 'image/object/bbox/ymax']})
387 |
388 | features = tf.parse_single_example(example_serialized, feature_map)
389 | label = tf.cast(features['image/class/label'], dtype=tf.int32)
390 |
391 | xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
392 | ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
393 | xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
394 | ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
395 |
396 | # Note that we impose an ordering of (y, x) just to make life difficult.
397 | bbox = tf.concat(0, [ymin, xmin, ymax, xmax])
398 |
399 | # Force the variable number of bounding boxes into the shape
400 | # [1, num_boxes, coords].
401 | bbox = tf.expand_dims(bbox, 0)
402 | bbox = tf.transpose(bbox, [0, 2, 1])
403 |
404 | return features['image/encoded'], label, bbox, features['image/class/text']
405 |
406 |
407 | def batch_inputs(dataset, batch_size, train, num_preprocess_threads=None,
408 | num_readers=1):
409 | """Contruct batches of training or evaluation examples from the image dataset.
410 |
411 | Args:
412 | dataset: instance of Dataset class specifying the dataset.
413 | See dataset.py for details.
414 | batch_size: integer
415 | train: boolean
416 | num_preprocess_threads: integer, total number of preprocessing threads
417 | num_readers: integer, number of parallel readers
418 |
419 | Returns:
420 | images: 4-D float Tensor of a batch of images
421 | labels: 1-D integer Tensor of [batch_size].
422 |
423 | Raises:
424 | ValueError: if data is not found
425 | """
426 | with tf.name_scope('batch_processing'):
427 | data_files = dataset.data_files()
428 | if data_files is None:
429 | raise ValueError('No data files found for this dataset')
430 | print(type(data_files))
431 | # Create filename_queue
432 | if train:
433 | filename_queue = tf.train.string_input_producer(data_files,
434 | shuffle=True,
435 | capacity=16)
436 | else:
437 | filename_queue = tf.train.string_input_producer(data_files,
438 | shuffle=False,
439 | capacity=1)
440 | if num_preprocess_threads is None:
441 | num_preprocess_threads = FLAGS.num_preprocess_threads
442 |
443 | if num_preprocess_threads % 4:
444 | raise ValueError('Please make num_preprocess_threads a multiple '
445 | 'of 4 (%d % 4 != 0).', num_preprocess_threads)
446 |
447 | if num_readers is None:
448 | num_readers = FLAGS.num_readers
449 |
450 | if num_readers < 1:
451 | raise ValueError('Please make num_readers at least 1')
452 |
453 | # Approximate number of examples per shard.
454 | examples_per_shard = 1024
455 | # Size the random shuffle queue to balance between good global
456 | # mixing (more examples) and memory use (fewer examples).
457 | # 1 image uses 299*299*3*4 bytes = 1MB
458 | # The default input_queue_memory_factor is 16 implying a shuffling queue
459 | # size: examples_per_shard * 16 * 1MB = 17.6GB
460 | min_queue_examples = examples_per_shard * FLAGS.input_queue_memory_factor
461 | if train:
462 | examples_queue = tf.RandomShuffleQueue(
463 | capacity=min_queue_examples + 3 * batch_size,
464 | min_after_dequeue=min_queue_examples,
465 | dtypes=[tf.string])
466 | else:
467 | examples_queue = tf.FIFOQueue(
468 | capacity=examples_per_shard + 3 * batch_size,
469 | dtypes=[tf.string])
470 |
471 | # Create multiple readers to populate the queue of examples.
472 | if num_readers > 1:
473 | enqueue_ops = []
474 | for _ in range(num_readers):
475 | reader = dataset.reader()
476 | _, value = reader.read(filename_queue)
477 | enqueue_ops.append(examples_queue.enqueue([value]))
478 |
479 | tf.train.queue_runner.add_queue_runner(
480 | tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
481 | example_serialized = examples_queue.dequeue()
482 | else:
483 | reader = dataset.reader()
484 | _, example_serialized = reader.read(filename_queue)
485 |
486 | images_and_labels = []
487 | for thread_id in range(num_preprocess_threads):
488 | # Parse a serialized Example proto to extract the image and metadata.
489 | image_buffer, label_index, bbox, _ = parse_example_proto(
490 | example_serialized)
491 | image = image_preprocessing(image_buffer, bbox, train, thread_id)
492 | images_and_labels.append([image, label_index])
493 |
494 | images, label_index_batch = tf.train.batch_join(
495 | images_and_labels,
496 | batch_size=batch_size,
497 | capacity=2 * num_preprocess_threads * batch_size)
498 |
499 | # Reshape images into these desired dimensions.
500 | height = FLAGS.image_size
501 | width = FLAGS.image_size
502 | depth = 3
503 |
504 | images = tf.cast(images, tf.float32)
505 | images = tf.reshape(images, shape=[batch_size, height, width, depth])
506 |
507 | # Display the training images in the visualizer.
508 | tf.image_summary('images', images)
509 |
510 | return images, tf.reshape(label_index_batch, [batch_size])
511 |
512 |
513 |
514 |
--------------------------------------------------------------------------------
/src/inception/inception_model.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Inception-v3 expressed in TensorFlow-Slim.
16 |
17 | Usage:
18 |
19 | # Parameters for BatchNorm.
20 | batch_norm_params = {
21 | # Decay for the batch_norm moving averages.
22 | 'decay': BATCHNORM_MOVING_AVERAGE_DECAY,
23 | # epsilon to prevent 0s in variance.
24 | 'epsilon': 0.001,
25 | }
26 | # Set weight_decay for weights in Conv and FC layers.
27 | with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], weight_decay=0.00004):
28 | with slim.arg_scope([slim.ops.conv2d],
29 | stddev=0.1,
30 | activation=tf.nn.relu,
31 | batch_norm_params=batch_norm_params):
32 | # Force all Variables to reside on the CPU.
33 | with slim.arg_scope([slim.variables.variable], device='/cpu:0'):
34 | logits, endpoints = slim.inception.inception_v3(
35 | images,
36 | dropout_keep_prob=0.8,
37 | num_classes=num_classes,
38 | is_training=for_training,
39 | restore_logits=restore_logits,
40 | scope=scope)
41 | """
42 | from __future__ import absolute_import
43 | from __future__ import division
44 | from __future__ import print_function
45 |
46 | import tensorflow as tf
47 |
48 | from src.inception import ops
49 | from src.inception import scopes
50 |
51 |
52 | def inception_v3(inputs,
53 | dropout_keep_prob=0.8,
54 | num_classes=1000,
55 | is_training=True,
56 | restore_logits=True,
57 | scope=''):
58 | """Latest Inception from http://arxiv.org/abs/1512.00567.
59 |
60 | "Rethinking the Inception Architecture for Computer Vision"
61 |
62 | Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
63 | Zbigniew Wojna
64 |
65 | Args:
66 | inputs: a tensor of size [batch_size, height, width, channels].
67 | dropout_keep_prob: dropout keep_prob.
68 | num_classes: number of predicted classes.
69 | is_training: whether is training or not.
70 | restore_logits: whether or not the logits layers should be restored.
71 | Useful for fine-tuning a model with different num_classes.
72 | scope: Optional scope for op_scope.
73 |
74 | Returns:
75 | a list containing 'logits', 'aux_logits' Tensors.
76 | """
77 | # end_points will collect relevant activations for external use, for example
78 | # summaries or losses.
79 | end_points = {}
80 | with tf.op_scope([inputs], scope, 'inception_v3'):
81 | with scopes.arg_scope([ops.conv2d, ops.fc, ops.batch_norm, ops.dropout],
82 | is_training=is_training):
83 | with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool],
84 | stride=1, padding='VALID'):
85 | # 299 x 299 x 3
86 | end_points['conv0'] = ops.conv2d(inputs, 32, [3, 3], stride=2,
87 | scope='conv0')
88 | # 149 x 149 x 32
89 | end_points['conv1'] = ops.conv2d(end_points['conv0'], 32, [3, 3],
90 | scope='conv1')
91 | # 147 x 147 x 32
92 | end_points['conv2'] = ops.conv2d(end_points['conv1'], 64, [3, 3],
93 | padding='SAME', scope='conv2')
94 | # 147 x 147 x 64
95 | end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3],
96 | stride=2, scope='pool1')
97 | # 73 x 73 x 64
98 | end_points['conv3'] = ops.conv2d(end_points['pool1'], 80, [1, 1],
99 | scope='conv3')
100 | # 73 x 73 x 80.
101 | end_points['conv4'] = ops.conv2d(end_points['conv3'], 192, [3, 3],
102 | scope='conv4')
103 | # 71 x 71 x 192.
104 | end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3],
105 | stride=2, scope='pool2')
106 | # 35 x 35 x 192.
107 | net = end_points['pool2']
108 | # Inception blocks
109 | with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool],
110 | stride=1, padding='SAME'):
111 | # mixed: 35 x 35 x 256.
112 | with tf.variable_scope('mixed_35x35x256a'):
113 | with tf.variable_scope('branch1x1'):
114 | branch1x1 = ops.conv2d(net, 64, [1, 1])
115 | with tf.variable_scope('branch5x5'):
116 | branch5x5 = ops.conv2d(net, 48, [1, 1])
117 | branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
118 | with tf.variable_scope('branch3x3dbl'):
119 | branch3x3dbl = ops.conv2d(net, 64, [1, 1])
120 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
121 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
122 | with tf.variable_scope('branch_pool'):
123 | branch_pool = ops.avg_pool(net, [3, 3])
124 | branch_pool = ops.conv2d(branch_pool, 32, [1, 1])
125 | net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool],3)
126 | end_points['mixed_35x35x256a'] = net
127 | # mixed_1: 35 x 35 x 288.
128 | with tf.variable_scope('mixed_35x35x288a'):
129 | with tf.variable_scope('branch1x1'):
130 | branch1x1 = ops.conv2d(net, 64, [1, 1])
131 | with tf.variable_scope('branch5x5'):
132 | branch5x5 = ops.conv2d(net, 48, [1, 1])
133 | branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
134 | with tf.variable_scope('branch3x3dbl'):
135 | branch3x3dbl = ops.conv2d(net, 64, [1, 1])
136 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
137 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
138 | with tf.variable_scope('branch_pool'):
139 | branch_pool = ops.avg_pool(net, [3, 3])
140 | branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
141 | net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool],3)
142 | end_points['mixed_35x35x288a'] = net
143 | # mixed_2: 35 x 35 x 288.
144 | with tf.variable_scope('mixed_35x35x288b'):
145 | with tf.variable_scope('branch1x1'):
146 | branch1x1 = ops.conv2d(net, 64, [1, 1])
147 | with tf.variable_scope('branch5x5'):
148 | branch5x5 = ops.conv2d(net, 48, [1, 1])
149 | branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
150 | with tf.variable_scope('branch3x3dbl'):
151 | branch3x3dbl = ops.conv2d(net, 64, [1, 1])
152 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
153 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
154 | with tf.variable_scope('branch_pool'):
155 | branch_pool = ops.avg_pool(net, [3, 3])
156 | branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
157 | net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool],3)
158 | end_points['mixed_35x35x288b'] = net
159 | # mixed_3: 17 x 17 x 768.
160 | with tf.variable_scope('mixed_17x17x768a'):
161 | with tf.variable_scope('branch3x3'):
162 | branch3x3 = ops.conv2d(net, 384, [3, 3], stride=2, padding='VALID')
163 | with tf.variable_scope('branch3x3dbl'):
164 | branch3x3dbl = ops.conv2d(net, 64, [1, 1])
165 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
166 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3],
167 | stride=2, padding='VALID')
168 | with tf.variable_scope('branch_pool'):
169 | branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID')
170 | net = tf.concat( [branch3x3, branch3x3dbl, branch_pool],3)
171 | end_points['mixed_17x17x768a'] = net
172 | # mixed4: 17 x 17 x 768.
173 | with tf.variable_scope('mixed_17x17x768b'):
174 | with tf.variable_scope('branch1x1'):
175 | branch1x1 = ops.conv2d(net, 192, [1, 1])
176 | with tf.variable_scope('branch7x7'):
177 | branch7x7 = ops.conv2d(net, 128, [1, 1])
178 | branch7x7 = ops.conv2d(branch7x7, 128, [1, 7])
179 | branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
180 | with tf.variable_scope('branch7x7dbl'):
181 | branch7x7dbl = ops.conv2d(net, 128, [1, 1])
182 | branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1])
183 | branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7])
184 | branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1])
185 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
186 | with tf.variable_scope('branch_pool'):
187 | branch_pool = ops.avg_pool(net, [3, 3])
188 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
189 | net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool],3)
190 | end_points['mixed_17x17x768b'] = net
191 | # mixed_5: 17 x 17 x 768.
192 | with tf.variable_scope('mixed_17x17x768c'):
193 | with tf.variable_scope('branch1x1'):
194 | branch1x1 = ops.conv2d(net, 192, [1, 1])
195 | with tf.variable_scope('branch7x7'):
196 | branch7x7 = ops.conv2d(net, 160, [1, 1])
197 | branch7x7 = ops.conv2d(branch7x7, 160, [1, 7])
198 | branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
199 | with tf.variable_scope('branch7x7dbl'):
200 | branch7x7dbl = ops.conv2d(net, 160, [1, 1])
201 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
202 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7])
203 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
204 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
205 | with tf.variable_scope('branch_pool'):
206 | branch_pool = ops.avg_pool(net, [3, 3])
207 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
208 | net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool],3)
209 | end_points['mixed_17x17x768c'] = net
210 | # mixed_6: 17 x 17 x 768.
211 | with tf.variable_scope('mixed_17x17x768d'):
212 | with tf.variable_scope('branch1x1'):
213 | branch1x1 = ops.conv2d(net, 192, [1, 1])
214 | with tf.variable_scope('branch7x7'):
215 | branch7x7 = ops.conv2d(net, 160, [1, 1])
216 | branch7x7 = ops.conv2d(branch7x7, 160, [1, 7])
217 | branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
218 | with tf.variable_scope('branch7x7dbl'):
219 | branch7x7dbl = ops.conv2d(net, 160, [1, 1])
220 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
221 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7])
222 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
223 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
224 | with tf.variable_scope('branch_pool'):
225 | branch_pool = ops.avg_pool(net, [3, 3])
226 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
227 | net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool],3 )
228 | end_points['mixed_17x17x768d'] = net
229 | # mixed_7: 17 x 17 x 768.
230 | with tf.variable_scope('mixed_17x17x768e'):
231 | with tf.variable_scope('branch1x1'):
232 | branch1x1 = ops.conv2d(net, 192, [1, 1])
233 | with tf.variable_scope('branch7x7'):
234 | branch7x7 = ops.conv2d(net, 192, [1, 1])
235 | branch7x7 = ops.conv2d(branch7x7, 192, [1, 7])
236 | branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
237 | with tf.variable_scope('branch7x7dbl'):
238 | branch7x7dbl = ops.conv2d(net, 192, [1, 1])
239 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1])
240 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
241 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1])
242 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
243 | with tf.variable_scope('branch_pool'):
244 | branch_pool = ops.avg_pool(net, [3, 3])
245 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
246 | net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
247 | end_points['mixed_17x17x768e'] = net
248 | # Auxiliary Head logits
249 | aux_logits = tf.identity(end_points['mixed_17x17x768e'])
250 | with tf.variable_scope('aux_logits'):
251 | aux_logits = ops.avg_pool(aux_logits, [5, 5], stride=3,
252 | padding='VALID')
253 | aux_logits = ops.conv2d(aux_logits, 128, [1, 1], scope='proj')
254 | # Shape of feature map before the final layer.
255 | shape = aux_logits.get_shape()
256 | aux_logits = ops.conv2d(aux_logits, 768, shape[1:3], stddev=0.01,
257 | padding='VALID')
258 | aux_logits = ops.flatten(aux_logits)
259 | aux_logits = ops.fc(aux_logits, num_classes, activation=None,
260 | stddev=0.001, restore=restore_logits)
261 | end_points['aux_logits'] = aux_logits
262 | # mixed_8: 8 x 8 x 1280.
263 | # Note that the scope below is not changed to not void previous
264 | # checkpoints.
265 | # (TODO) Fix the scope when appropriate.
266 | with tf.variable_scope('mixed_17x17x1280a'):
267 | with tf.variable_scope('branch3x3'):
268 | branch3x3 = ops.conv2d(net, 192, [1, 1])
269 | branch3x3 = ops.conv2d(branch3x3, 320, [3, 3], stride=2,
270 | padding='VALID')
271 | with tf.variable_scope('branch7x7x3'):
272 | branch7x7x3 = ops.conv2d(net, 192, [1, 1])
273 | branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7])
274 | branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1])
275 | branch7x7x3 = ops.conv2d(branch7x7x3, 192, [3, 3],
276 | stride=2, padding='VALID')
277 | with tf.variable_scope('branch_pool'):
278 | branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID')
279 | net = tf.concat( [branch3x3, branch7x7x3, branch_pool], 3)
280 | end_points['mixed_17x17x1280a'] = net
281 | # mixed_9: 8 x 8 x 2048.
282 | with tf.variable_scope('mixed_8x8x2048a'):
283 | with tf.variable_scope('branch1x1'):
284 | branch1x1 = ops.conv2d(net, 320, [1, 1])
285 | with tf.variable_scope('branch3x3'):
286 | branch3x3 = ops.conv2d(net, 384, [1, 1])
287 | branch3x3 = tf.concat( [ops.conv2d(branch3x3, 384, [1, 3]),
288 | ops.conv2d(branch3x3, 384, [3, 1])], 3)
289 | with tf.variable_scope('branch3x3dbl'):
290 | branch3x3dbl = ops.conv2d(net, 448, [1, 1])
291 | branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
292 | branch3x3dbl = tf.concat([ops.conv2d(branch3x3dbl, 384, [1, 3]),
293 | ops.conv2d(branch3x3dbl, 384, [3, 1])], 3)
294 | with tf.variable_scope('branch_pool'):
295 | branch_pool = ops.avg_pool(net, [3, 3])
296 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
297 | net = tf.concat( [branch1x1, branch3x3, branch3x3dbl, branch_pool],3)
298 | end_points['mixed_8x8x2048a'] = net
299 | # mixed_10: 8 x 8 x 2048.
300 | with tf.variable_scope('mixed_8x8x2048b'):
301 | with tf.variable_scope('branch1x1'):
302 | branch1x1 = ops.conv2d(net, 320, [1, 1])
303 | with tf.variable_scope('branch3x3'):
304 | branch3x3 = ops.conv2d(net, 384, [1, 1])
305 | branch3x3 = tf.concat( [ops.conv2d(branch3x3, 384, [1, 3]),
306 | ops.conv2d(branch3x3, 384, [3, 1])],3)
307 | with tf.variable_scope('branch3x3dbl'):
308 | branch3x3dbl = ops.conv2d(net, 448, [1, 1])
309 | branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
310 | branch3x3dbl = tf.concat( [ops.conv2d(branch3x3dbl, 384, [1, 3]),
311 | ops.conv2d(branch3x3dbl, 384, [3, 1])],3)
312 | with tf.variable_scope('branch_pool'):
313 | branch_pool = ops.avg_pool(net, [3, 3])
314 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
315 | net = tf.concat( [branch1x1, branch3x3, branch3x3dbl, branch_pool],3)
316 | end_points['mixed_8x8x2048b'] = net
317 | # Final pooling and prediction
318 | with tf.variable_scope('logits'):
319 | shape = net.get_shape()
320 | net = ops.avg_pool(net, shape[1:3], padding='VALID', scope='pool')
321 | # 1 x 1 x 2048
322 | net = ops.dropout(net, dropout_keep_prob, scope='dropout')
323 | net = ops.flatten(net, scope='flatten')
324 | highfeatures = net
325 | # 1 x 1 x 128
326 | net = ops.fc(net, 128, activation=tf.nn.sigmoid, restore=restore_logits)
327 | # 2048
328 | logits = ops.fc(net, num_classes, activation=None, scope='logits',
329 | restore=restore_logits)
330 | # 1000
331 | end_points['logits'] = logits
332 | end_points['predictions'] = tf.nn.softmax(logits, name='predictions')
333 | return net , highfeatures ,end_points
334 |
335 | def inception_v3_parameters(weight_decay=0.00004, stddev=0.1,
336 | batch_norm_decay=0.9997, batch_norm_epsilon=0.001):
337 | """Yields the scope with the default parameters for inception_v3.
338 |
339 | Args:
340 | weight_decay: the weight decay for weights variables.
341 | stddev: standard deviation of the truncated guassian weight distribution.
342 | batch_norm_decay: decay for the moving average of batch_norm momentums.
343 | batch_norm_epsilon: small float added to variance to avoid dividing by zero.
344 |
345 | Yields:
346 | a arg_scope with the parameters needed for inception_v3.
347 | """
348 | # Set weight_decay for weights in Conv and FC layers.
349 | with scopes.arg_scope([ops.conv2d, ops.fc],
350 | weight_decay=weight_decay):
351 | # Set stddev, activation and parameters for batch_norm.
352 | with scopes.arg_scope([ops.conv2d],
353 | stddev=stddev,
354 | activation=tf.nn.relu,
355 | batch_norm_params={
356 | 'decay': batch_norm_decay,
357 | 'epsilon': batch_norm_epsilon}) as arg_scope:
358 | yield arg_scope
359 |
--------------------------------------------------------------------------------
/src/inception/inception_module.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Build the Inception v3 network on ImageNet data set.
16 |
17 | The Inception v3 architecture is described in http://arxiv.org/abs/1512.00567
18 |
19 | Summary of available functions:
20 | inference: Compute inference on the model inputs to make a prediction
21 | loss: Compute the loss of the prediction with respect to the labels
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 |
27 | import re
28 |
29 | import tensorflow as tf
30 |
31 | from src.inception import slim
32 |
33 | FLAGS = tf.app.flags.FLAGS
34 |
35 | # If a model is trained using multiple GPUs, prefix all Op names with tower_name
36 | # to differentiate the operations. Note that this prefix is removed from the
37 | # names of the summaries when visualizing a model.
38 | TOWER_NAME = 'tower'
39 |
40 | # Batch normalization. Constant governing the exponential moving average of
41 | # the 'global' mean and variance for all activations.
42 | BATCHNORM_MOVING_AVERAGE_DECAY = 0.9997
43 |
44 | # The decay to use for the moving average.
45 | MOVING_AVERAGE_DECAY = 0.9999
46 |
47 |
48 | def inference(images, num_classes, for_training=False, restore_logits=True,
49 | scope=None):
50 | """Build Inception v3 model architecture.
51 |
52 | See here for reference: http://arxiv.org/abs/1512.00567
53 |
54 | Args:
55 | images: Images returned from inputs() or distorted_inputs().
56 | num_classes: number of classes
57 | for_training: If set to `True`, build the inference model for training.
58 | Kernels that operate differently for inference during training
59 | e.g. dropout, are appropriately configured.
60 | restore_logits: whether or not the logits layers should be restored.
61 | Useful for fine-tuning a model with different num_classes.
62 | scope: optional prefix string identifying the ImageNet tower.
63 |
64 | Returns:
65 | Logits. 2-D float Tensor.
66 | Auxiliary Logits. 2-D float Tensor of side-head. Used for training only.
67 | """
68 | # Parameters for BatchNorm.
69 | batch_norm_params = {
70 | # Decay for the moving averages.
71 | 'decay': BATCHNORM_MOVING_AVERAGE_DECAY,
72 | # epsilon to prevent 0s in variance.
73 | 'epsilon': 0.001,
74 | }
75 | # Set weight_decay for weights in Conv and FC layers.
76 | with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], weight_decay=0.00004):
77 | with slim.arg_scope([slim.ops.conv2d],
78 | stddev=0.1,
79 | activation=tf.nn.relu,
80 | batch_norm_params=batch_norm_params):
81 | logits,highfeatures , endpoints = slim.inception.inception_v3(
82 | images,
83 | dropout_keep_prob=0.8,
84 | num_classes=num_classes,
85 | is_training=for_training,
86 | restore_logits=restore_logits,
87 | scope=scope)
88 |
89 | # Add summaries for viewing model statistics on TensorBoard.
90 | _activation_summaries(endpoints)
91 |
92 | # Grab the logits associated with the side head. Employed during training.
93 | auxiliary_logits = endpoints['aux_logits']
94 |
95 | return logits, highfeatures ,auxiliary_logits
96 |
97 |
98 | def loss(logits, labels, batch_size=None):
99 | """Adds all losses for the model.
100 |
101 | Note the final loss is not returned. Instead, the list of losses are collected
102 | by slim.losses. The losses are accumulated in tower_loss() and summed to
103 | calculate the total loss.
104 |
105 | Args:
106 | logits: List of logits from inference(). Each entry is a 2-D float Tensor.
107 | labels: Labels from distorted_inputs or inputs(). 1-D tensor
108 | of shape [batch_size]
109 | batch_size: integer
110 | """
111 | if not batch_size:
112 | batch_size = FLAGS.batch_size
113 |
114 | # Reshape the labels into a dense Tensor of
115 | # shape [FLAGS.batch_size, num_classes].
116 | sparse_labels = tf.reshape(labels, [batch_size, 1])
117 | indices = tf.reshape(tf.range(batch_size), [batch_size, 1])
118 | concated = tf.concat(1, [indices, sparse_labels])
119 | num_classes = logits[0].get_shape()[-1].value
120 | dense_labels = tf.sparse_to_dense(concated,
121 | [batch_size, num_classes],
122 | 1.0, 0.0)
123 |
124 | # Cross entropy loss for the main softmax prediction.
125 | slim.losses.cross_entropy_loss(logits[0],
126 | dense_labels,
127 | label_smoothing=0.1,
128 | weight=1.0)
129 |
130 | # Cross entropy loss for the auxiliary softmax head.
131 | slim.losses.cross_entropy_loss(logits[1],
132 | dense_labels,
133 | label_smoothing=0.1,
134 | weight=0.4,
135 | scope='aux_loss')
136 |
137 |
138 | def _activation_summary(x):
139 | """Helper to create summaries for activations.
140 |
141 | Creates a summary that provides a histogram of activations.
142 | Creates a summary that measure the sparsity of activations.
143 |
144 | Args:
145 | x: Tensor
146 | """
147 | # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
148 | # session. This helps the clarity of presentation on tensorboard.
149 | tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
150 | tf.summary.histogram(tensor_name + '/activations', x)
151 | tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
152 |
153 |
154 | def _activation_summaries(endpoints):
155 | with tf.name_scope('summaries'):
156 | for act in endpoints.values():
157 | _activation_summary(act)
158 |
--------------------------------------------------------------------------------
/src/inception/losses.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains convenience wrappers for various Neural Network TensorFlow losses.
16 |
17 | All the losses defined here add themselves to the LOSSES_COLLECTION
18 | collection.
19 |
20 | l1_loss: Define a L1 Loss, useful for regularization, i.e. lasso.
21 | l2_loss: Define a L2 Loss, useful for regularization, i.e. weight decay.
22 | cross_entropy_loss: Define a cross entropy loss using
23 | softmax_cross_entropy_with_logits. Useful for classification.
24 | """
25 | from __future__ import absolute_import
26 | from __future__ import division
27 | from __future__ import print_function
28 |
29 | import tensorflow as tf
30 |
31 | # In order to gather all losses in a network, the user should use this
32 | # key for get_collection, i.e:
33 | # losses = tf.get_collection(slim.losses.LOSSES_COLLECTION)
34 | LOSSES_COLLECTION = '_losses'
35 |
36 |
37 | def l1_regularizer(weight=1.0, scope=None):
38 | """Define a L1 regularizer.
39 |
40 | Args:
41 | weight: scale the loss by this factor.
42 | scope: Optional scope for op_scope.
43 |
44 | Returns:
45 | a regularizer function.
46 | """
47 | def regularizer(tensor):
48 | with tf.op_scope([tensor], scope, 'L1Regularizer'):
49 | l1_weight = tf.convert_to_tensor(weight,
50 | dtype=tensor.dtype.base_dtype,
51 | name='weight')
52 | return tf.multiply(l1_weight, tf.reduce_sum(tf.abs(tensor)), name='value')
53 | return regularizer
54 |
55 |
56 | def l2_regularizer(weight=1.0, scope=None):
57 | """Define a L2 regularizer.
58 |
59 | Args:
60 | weight: scale the loss by this factor.
61 | scope: Optional scope for op_scope.
62 |
63 | Returns:
64 | a regularizer function.
65 | """
66 | def regularizer(tensor):
67 | with tf.op_scope([tensor], scope, 'L2Regularizer'):
68 | l2_weight = tf.convert_to_tensor(weight,
69 | dtype=tensor.dtype.base_dtype,
70 | name='weight')
71 | return tf.multiply(l2_weight, tf.nn.l2_loss(tensor), name='value')
72 | return regularizer
73 |
74 |
75 | def l1_l2_regularizer(weight_l1=1.0, weight_l2=1.0, scope=None):
76 | """Define a L1L2 regularizer.
77 |
78 | Args:
79 | weight_l1: scale the L1 loss by this factor.
80 | weight_l2: scale the L2 loss by this factor.
81 | scope: Optional scope for op_scope.
82 |
83 | Returns:
84 | a regularizer function.
85 | """
86 | def regularizer(tensor):
87 | with tf.op_scope([tensor], scope, 'L1L2Regularizer'):
88 | weight_l1_t = tf.convert_to_tensor(weight_l1,
89 | dtype=tensor.dtype.base_dtype,
90 | name='weight_l1')
91 | weight_l2_t = tf.convert_to_tensor(weight_l2,
92 | dtype=tensor.dtype.base_dtype,
93 | name='weight_l2')
94 | reg_l1 =tf.multiply(weight_l1_t, tf.reduce_sum(tf.abs(tensor)),
95 | name='value_l1')
96 | reg_l2 = tf.multiply(weight_l2_t, tf.nn.l2_loss(tensor),
97 | name='value_l2')
98 | return tf.add(reg_l1, reg_l2, name='value')
99 | return regularizer
100 |
101 |
102 | def l1_loss(tensor, weight=1.0, scope=None):
103 | """Define a L1Loss, useful for regularize, i.e. lasso.
104 |
105 | Args:
106 | tensor: tensor to regularize.
107 | weight: scale the loss by this factor.
108 | scope: Optional scope for op_scope.
109 |
110 | Returns:
111 | the L1 loss op.
112 | """
113 | with tf.op_scope([tensor], scope, 'L1Loss'):
114 | weight = tf.convert_to_tensor(weight,
115 | dtype=tensor.dtype.base_dtype,
116 | name='loss_weight')
117 | loss = tf.multiply(weight, tf.reduce_sum(tf.abs(tensor)), name='value')
118 | tf.add_to_collection(LOSSES_COLLECTION, loss)
119 | return loss
120 |
121 |
122 | def l2_loss(tensor, weight=1.0, scope=None):
123 | """Define a L2Loss, useful for regularize, i.e. weight decay.
124 |
125 | Args:
126 | tensor: tensor to regularize.
127 | weight: an optional weight to modulate the loss.
128 | scope: Optional scope for op_scope.
129 |
130 | Returns:
131 | the L2 loss op.
132 | """
133 | with tf.op_scope([tensor], scope, 'L2Loss'):
134 | weight = tf.convert_to_tensor(weight,
135 | dtype=tensor.dtype.base_dtype,
136 | name='loss_weight')
137 | loss = tf.multiply(weight, tf.nn.l2_loss(tensor), name='value')
138 | tf.add_to_collection(LOSSES_COLLECTION, loss)
139 | return loss
140 |
141 |
142 | def cross_entropy_loss(logits, one_hot_labels, label_smoothing=0,
143 | weight=1.0, scope=None):
144 | """Define a Cross Entropy loss using softmax_cross_entropy_with_logits.
145 |
146 | It can scale the loss by weight factor, and smooth the labels.
147 |
148 | Args:
149 | logits: [batch_size, num_classes] logits outputs of the network .
150 | one_hot_labels: [batch_size, num_classes] target one_hot_encoded labels.
151 | label_smoothing: if greater than 0 then smooth the labels.
152 | weight: scale the loss by this factor.
153 | scope: Optional scope for op_scope.
154 |
155 | Returns:
156 | A tensor with the softmax_cross_entropy loss.
157 | """
158 | logits.get_shape().assert_is_compatible_with(one_hot_labels.get_shape())
159 | with tf.op_scope([logits, one_hot_labels], scope, 'CrossEntropyLoss'):
160 | num_classes = one_hot_labels.get_shape()[-1].value
161 | one_hot_labels = tf.cast(one_hot_labels, logits.dtype)
162 | if label_smoothing > 0:
163 | smooth_positives = 1.0 - label_smoothing
164 | smooth_negatives = label_smoothing / num_classes
165 | one_hot_labels = one_hot_labels * smooth_positives + smooth_negatives
166 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
167 | one_hot_labels,
168 | name='xentropy')
169 | weight = tf.convert_to_tensor(weight,
170 | dtype=logits.dtype.base_dtype,
171 | name='loss_weight')
172 | loss = tf.multiply(weight, tf.reduce_mean(cross_entropy), name='value')
173 | tf.add_to_collection(LOSSES_COLLECTION, loss)
174 | return loss
175 |
--------------------------------------------------------------------------------
/src/inception/ops.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains convenience wrappers for typical Neural Network TensorFlow layers.
16 |
17 | Additionally it maintains a collection with update_ops that need to be
18 | updated after the ops have been computed, for exmaple to update moving means
19 | and moving variances of batch_norm.
20 |
21 | Ops that have different behavior during training or eval have an is_training
22 | parameter. Additionally Ops that contain variables.variable have a trainable
23 | parameter, which control if the ops variables are trainable or not.
24 | """
25 | from __future__ import absolute_import
26 | from __future__ import division
27 | from __future__ import print_function
28 |
29 |
30 | import tensorflow as tf
31 |
32 | from tensorflow.python.training import moving_averages
33 |
34 | from src.inception import losses
35 | from src.inception import scopes
36 | from src.inception import variables
37 |
38 | # Used to keep the update ops done by batch_norm.
39 | UPDATE_OPS_COLLECTION = '_update_ops_'
40 |
41 |
42 | @scopes.add_arg_scope
43 | def batch_norm(inputs,
44 | decay=0.999,
45 | center=True,
46 | scale=False,
47 | epsilon=0.001,
48 | moving_vars='moving_vars',
49 | activation=None,
50 | is_training=True,
51 | trainable=True,
52 | restore=True,
53 | scope=None,
54 | reuse=None):
55 | """Adds a Batch Normalization layer.
56 |
57 | Args:
58 | inputs: a tensor of size [batch_size, height, width, channels]
59 | or [batch_size, channels].
60 | decay: decay for the moving average.
61 | center: If True, subtract beta. If False, beta is not created and ignored.
62 | scale: If True, multiply by gamma. If False, gamma is
63 | not used. When the next layer is linear (also e.g. ReLU), this can be
64 | disabled since the scaling can be done by the next layer.
65 | epsilon: small float added to variance to avoid dividing by zero.
66 | moving_vars: collection to store the moving_mean and moving_variance.
67 | activation: activation function.
68 | is_training: whether or not the model is in training mode.
69 | trainable: whether or not the variables should be trainable or not.
70 | restore: whether or not the variables should be marked for restore.
71 | scope: Optional scope for variable_op_scope.
72 | reuse: whether or not the layer and its variables should be reused. To be
73 | able to reuse the layer scope must be given.
74 |
75 | Returns:
76 | a tensor representing the output of the operation.
77 |
78 | """
79 | inputs_shape = inputs.get_shape()
80 | with tf.variable_op_scope([inputs], scope, 'BatchNorm', reuse=reuse):
81 | axis = list(range(len(inputs_shape) - 1))
82 | params_shape = inputs_shape[-1:]
83 | # Allocate parameters for the beta and gamma of the normalization.
84 | beta, gamma = None, None
85 | if center:
86 | beta = variables.variable('beta',
87 | params_shape,
88 | initializer=tf.zeros_initializer,
89 | trainable=trainable,
90 | restore=restore)
91 | if scale:
92 | gamma = variables.variable('gamma',
93 | params_shape,
94 | initializer=tf.ones_initializer,
95 | trainable=trainable,
96 | restore=restore)
97 | # Create moving_mean and moving_variance add them to
98 | # GraphKeys.MOVING_AVERAGE_VARIABLES collections.
99 | moving_collections = [moving_vars, tf.GraphKeys.MOVING_AVERAGE_VARIABLES]
100 | moving_mean = variables.variable('moving_mean',
101 | params_shape,
102 | initializer=tf.zeros_initializer,
103 | trainable=False,
104 | restore=restore,
105 | collections=moving_collections)
106 | moving_variance = variables.variable('moving_variance',
107 | params_shape,
108 | initializer=tf.ones_initializer,
109 | trainable=False,
110 | restore=restore,
111 | collections=moving_collections)
112 | if is_training:
113 | # Calculate the moments based on the individual batch.
114 | mean, variance = tf.nn.moments(inputs, axis)
115 |
116 | update_moving_mean = moving_averages.assign_moving_average(
117 | moving_mean, mean, decay)
118 | tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
119 | update_moving_variance = moving_averages.assign_moving_average(
120 | moving_variance, variance, decay)
121 | tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
122 | else:
123 | # Just use the moving_mean and moving_variance.
124 | mean = moving_mean
125 | variance = moving_variance
126 | # Normalize the activations.
127 | outputs = tf.nn.batch_normalization(
128 | inputs, mean, variance, beta, gamma, epsilon)
129 | outputs.set_shape(inputs.get_shape())
130 | if activation:
131 | outputs = activation(outputs)
132 | return outputs
133 |
134 |
135 | def _two_element_tuple(int_or_tuple):
136 | """Converts `int_or_tuple` to height, width.
137 |
138 | Several of the functions that follow accept arguments as either
139 | a tuple of 2 integers or a single integer. A single integer
140 | indicates that the 2 values of the tuple are the same.
141 |
142 | This functions normalizes the input value by always returning a tuple.
143 |
144 | Args:
145 | int_or_tuple: A list of 2 ints, a single int or a tf.TensorShape.
146 |
147 | Returns:
148 | A tuple with 2 values.
149 |
150 | Raises:
151 | ValueError: If `int_or_tuple` it not well formed.
152 | """
153 | if isinstance(int_or_tuple, (list, tuple)):
154 | if len(int_or_tuple) != 2:
155 | raise ValueError('Must be a list with 2 elements: %s' % int_or_tuple)
156 | return int(int_or_tuple[0]), int(int_or_tuple[1])
157 | if isinstance(int_or_tuple, int):
158 | return int(int_or_tuple), int(int_or_tuple)
159 | if isinstance(int_or_tuple, tf.TensorShape):
160 | if len(int_or_tuple) == 2:
161 | return int_or_tuple[0], int_or_tuple[1]
162 | raise ValueError('Must be an int, a list with 2 elements or a TensorShape of '
163 | 'length 2')
164 |
165 |
166 | @scopes.add_arg_scope
167 | def conv2d(inputs,
168 | num_filters_out,
169 | kernel_size,
170 | stride=1,
171 | padding='SAME',
172 | activation=tf.nn.relu,
173 | stddev=0.01,
174 | bias=0.0,
175 | weight_decay=0,
176 | batch_norm_params=None,
177 | is_training=True,
178 | trainable=True,
179 | restore=True,
180 | scope=None,
181 | reuse=None):
182 | """Adds a 2D convolution followed by an optional batch_norm layer.
183 |
184 | conv2d creates a variable called 'weights', representing the convolutional
185 | kernel, that is convolved with the input. If `batch_norm_params` is None, a
186 | second variable called 'biases' is added to the result of the convolution
187 | operation.
188 |
189 | Args:
190 | inputs: a tensor of size [batch_size, height, width, channels].
191 | num_filters_out: the number of output filters.
192 | kernel_size: a list of length 2: [kernel_height, kernel_width] of
193 | of the filters. Can be an int if both values are the same.
194 | stride: a list of length 2: [stride_height, stride_width].
195 | Can be an int if both strides are the same. Note that presently
196 | both strides must have the same value.
197 | padding: one of 'VALID' or 'SAME'.
198 | activation: activation function.
199 | stddev: standard deviation of the truncated guassian weight distribution.
200 | bias: the initial value of the biases.
201 | weight_decay: the weight decay.
202 | batch_norm_params: parameters for the batch_norm. If is None don't use it.
203 | is_training: whether or not the model is in training mode.
204 | trainable: whether or not the variables should be trainable or not.
205 | restore: whether or not the variables should be marked for restore.
206 | scope: Optional scope for variable_op_scope.
207 | reuse: whether or not the layer and its variables should be reused. To be
208 | able to reuse the layer scope must be given.
209 | Returns:
210 | a tensor representing the output of the operation.
211 |
212 | """
213 | with tf.variable_op_scope([inputs], scope, 'Conv', reuse=reuse):
214 | kernel_h, kernel_w = _two_element_tuple(kernel_size)
215 | stride_h, stride_w = _two_element_tuple(stride)
216 | num_filters_in = inputs.get_shape()[-1]
217 | weights_shape = [kernel_h, kernel_w,
218 | num_filters_in, num_filters_out]
219 | weights_initializer = tf.truncated_normal_initializer(stddev=stddev)
220 | l2_regularizer = None
221 | if weight_decay and weight_decay > 0:
222 | l2_regularizer = losses.l2_regularizer(weight_decay)
223 | weights = variables.variable('weights',
224 | shape=weights_shape,
225 | initializer=weights_initializer,
226 | regularizer=l2_regularizer,
227 | trainable=trainable,
228 | restore=restore)
229 | conv = tf.nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1],
230 | padding=padding)
231 | if batch_norm_params is not None:
232 | with scopes.arg_scope([batch_norm], is_training=is_training,
233 | trainable=trainable, restore=restore):
234 | outputs = batch_norm(conv, **batch_norm_params)
235 | else:
236 | bias_shape = [num_filters_out,]
237 | bias_initializer = tf.constant_initializer(bias)
238 | biases = variables.variable('biases',
239 | shape=bias_shape,
240 | initializer=bias_initializer,
241 | trainable=trainable,
242 | restore=restore)
243 | outputs = tf.nn.bias_add(conv, biases)
244 | if activation:
245 | outputs = activation(outputs)
246 | return outputs
247 |
248 |
249 | @scopes.add_arg_scope
250 | def fc(inputs,
251 | num_units_out,
252 | activation=tf.nn.relu,
253 | stddev=0.01,
254 | bias=0.0,
255 | weight_decay=0,
256 | batch_norm_params=None,
257 | is_training=True,
258 | trainable=True,
259 | restore=True,
260 | scope=None,
261 | reuse=None):
262 | """Adds a fully connected layer followed by an optional batch_norm layer.
263 |
264 | FC creates a variable called 'weights', representing the fully connected
265 | weight matrix, that is multiplied by the input. If `batch_norm` is None, a
266 | second variable called 'biases' is added to the result of the initial
267 | vector-matrix multiplication.
268 |
269 | Args:
270 | inputs: a [B x N] tensor where B is the batch size and N is the number of
271 | input units in the layer.
272 | num_units_out: the number of output units in the layer.
273 | activation: activation function.
274 | stddev: the standard deviation for the weights.
275 | bias: the initial value of the biases.
276 | weight_decay: the weight decay.
277 | batch_norm_params: parameters for the batch_norm. If is None don't use it.
278 | is_training: whether or not the model is in training mode.
279 | trainable: whether or not the variables should be trainable or not.
280 | restore: whether or not the variables should be marked for restore.
281 | scope: Optional scope for variable_op_scope.
282 | reuse: whether or not the layer and its variables should be reused. To be
283 | able to reuse the layer scope must be given.
284 |
285 | Returns:
286 | the tensor variable representing the result of the series of operations.
287 | """
288 | with tf.variable_op_scope([inputs], scope, 'FC', reuse=reuse):
289 | num_units_in = inputs.get_shape()[1]
290 | weights_shape = [num_units_in, num_units_out]
291 | weights_initializer = tf.truncated_normal_initializer(stddev=stddev)
292 | l2_regularizer = None
293 | if weight_decay and weight_decay > 0:
294 | l2_regularizer = losses.l2_regularizer(weight_decay)
295 | weights = variables.variable('weights',
296 | shape=weights_shape,
297 | initializer=weights_initializer,
298 | regularizer=l2_regularizer,
299 | trainable=trainable,
300 | restore=restore)
301 | if batch_norm_params is not None:
302 | outputs = tf.matmul(inputs, weights)
303 | with scopes.arg_scope([batch_norm], is_training=is_training,
304 | trainable=trainable, restore=restore):
305 | outputs = batch_norm(outputs, **batch_norm_params)
306 | else:
307 | bias_shape = [num_units_out,]
308 | bias_initializer = tf.constant_initializer(bias)
309 | biases = variables.variable('biases',
310 | shape=bias_shape,
311 | initializer=bias_initializer,
312 | trainable=trainable,
313 | restore=restore)
314 | outputs = tf.nn.xw_plus_b(inputs, weights, biases)
315 | if activation:
316 | outputs = activation(outputs)
317 | return outputs
318 |
319 |
320 | def one_hot_encoding(labels, num_classes, scope=None):
321 | """Transform numeric labels into onehot_labels.
322 |
323 | Args:
324 | labels: [batch_size] target labels.
325 | num_classes: total number of classes.
326 | scope: Optional scope for op_scope.
327 | Returns:
328 | one hot encoding of the labels.
329 | """
330 | with tf.op_scope([labels], scope, 'OneHotEncoding'):
331 | batch_size = labels.get_shape()[0]
332 | indices = tf.expand_dims(tf.range(0, batch_size), 1)
333 | labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype)
334 | concated = tf.concat(1, [indices, labels])
335 | onehot_labels = tf.sparse_to_dense(
336 | concated, tf.pack([batch_size, num_classes]), 1.0, 0.0)
337 | onehot_labels.set_shape([batch_size, num_classes])
338 | return onehot_labels
339 |
340 |
341 | @scopes.add_arg_scope
342 | def max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None):
343 | """Adds a Max Pooling layer.
344 |
345 | It is assumed by the wrapper that the pooling is only done per image and not
346 | in depth or batch.
347 |
348 | Args:
349 | inputs: a tensor of size [batch_size, height, width, depth].
350 | kernel_size: a list of length 2: [kernel_height, kernel_width] of the
351 | pooling kernel over which the op is computed. Can be an int if both
352 | values are the same.
353 | stride: a list of length 2: [stride_height, stride_width].
354 | Can be an int if both strides are the same. Note that presently
355 | both strides must have the same value.
356 | padding: the padding method, either 'VALID' or 'SAME'.
357 | scope: Optional scope for op_scope.
358 |
359 | Returns:
360 | a tensor representing the results of the pooling operation.
361 | Raises:
362 | ValueError: if 'kernel_size' is not a 2-D list
363 | """
364 | with tf.op_scope([inputs], scope, 'MaxPool'):
365 | kernel_h, kernel_w = _two_element_tuple(kernel_size)
366 | stride_h, stride_w = _two_element_tuple(stride)
367 | return tf.nn.max_pool(inputs,
368 | ksize=[1, kernel_h, kernel_w, 1],
369 | strides=[1, stride_h, stride_w, 1],
370 | padding=padding)
371 |
372 |
373 | @scopes.add_arg_scope
374 | def avg_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None):
375 | """Adds a Avg Pooling layer.
376 |
377 | It is assumed by the wrapper that the pooling is only done per image and not
378 | in depth or batch.
379 |
380 | Args:
381 | inputs: a tensor of size [batch_size, height, width, depth].
382 | kernel_size: a list of length 2: [kernel_height, kernel_width] of the
383 | pooling kernel over which the op is computed. Can be an int if both
384 | values are the same.
385 | stride: a list of length 2: [stride_height, stride_width].
386 | Can be an int if both strides are the same. Note that presently
387 | both strides must have the same value.
388 | padding: the padding method, either 'VALID' or 'SAME'.
389 | scope: Optional scope for op_scope.
390 |
391 | Returns:
392 | a tensor representing the results of the pooling operation.
393 | """
394 | with tf.op_scope([inputs], scope, 'AvgPool'):
395 | kernel_h, kernel_w = _two_element_tuple(kernel_size)
396 | stride_h, stride_w = _two_element_tuple(stride)
397 | return tf.nn.avg_pool(inputs,
398 | ksize=[1, kernel_h, kernel_w, 1],
399 | strides=[1, stride_h, stride_w, 1],
400 | padding=padding)
401 |
402 |
403 | @scopes.add_arg_scope
404 | def dropout(inputs, keep_prob=0.5, is_training=True, scope=None):
405 | """Returns a dropout layer applied to the input.
406 |
407 | Args:
408 | inputs: the tensor to pass to the Dropout layer.
409 | keep_prob: the probability of keeping each input unit.
410 | is_training: whether or not the model is in training mode. If so, dropout is
411 | applied and values scaled. Otherwise, inputs is returned.
412 | scope: Optional scope for op_scope.
413 |
414 | Returns:
415 | a tensor representing the output of the operation.
416 | """
417 | if is_training and keep_prob > 0:
418 | with tf.op_scope([inputs], scope, 'Dropout'):
419 | return tf.nn.dropout(inputs, keep_prob)
420 | else:
421 | return inputs
422 |
423 |
424 | def flatten(inputs, scope=None):
425 | """Flattens the input while maintaining the batch_size.
426 |
427 | Assumes that the first dimension represents the batch.
428 |
429 | Args:
430 | inputs: a tensor of size [batch_size, ...].
431 | scope: Optional scope for op_scope.
432 |
433 | Returns:
434 | a flattened tensor with shape [batch_size, k].
435 | Raises:
436 | ValueError: if inputs.shape is wrong.
437 | """
438 | if len(inputs.get_shape()) < 2:
439 | raise ValueError('Inputs must be have a least 2 dimensions')
440 | dims = inputs.get_shape()[1:]
441 | k = dims.num_elements()
442 | with tf.op_scope([inputs], scope, 'Flatten'):
443 | return tf.reshape(inputs, [-1, k])
444 |
445 |
446 | def repeat_op(repetitions, inputs, op, *args, **kwargs):
447 | """Build a sequential Tower starting from inputs by using an op repeatedly.
448 |
449 | It creates new scopes for each operation by increasing the counter.
450 | Example: given repeat_op(3, _, ops.conv2d, 64, [3, 3], scope='conv1')
451 | it will repeat the given op under the following variable_scopes:
452 | conv1/Conv
453 | conv1/Conv_1
454 | conv1/Conv_2
455 |
456 | Args:
457 | repetitions: number or repetitions.
458 | inputs: a tensor of size [batch_size, height, width, channels].
459 | op: an operation.
460 | *args: args for the op.
461 | **kwargs: kwargs for the op.
462 |
463 | Returns:
464 | a tensor result of applying the operation op, num times.
465 | Raises:
466 | ValueError: if the op is unknown or wrong.
467 | """
468 | scope = kwargs.pop('scope', None)
469 | with tf.variable_op_scope([inputs], scope, 'RepeatOp'):
470 | tower = inputs
471 | for _ in range(repetitions):
472 | tower = op(tower, *args, **kwargs)
473 | return tower
474 |
--------------------------------------------------------------------------------
/src/inception/scopes.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains the new arg_scope used for TF-Slim ops.
16 |
17 | Allows one to define models much more compactly by eliminating boilerplate
18 | code. This is accomplished through the use of argument scoping (arg_scope).
19 |
20 | Example of how to use scopes.arg_scope:
21 |
22 | with scopes.arg_scope(ops.conv2d, padding='SAME',
23 | stddev=0.01, weight_decay=0.0005):
24 | net = ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1')
25 | net = ops.conv2d(net, 256, [5, 5], scope='conv2')
26 |
27 | The first call to conv2d will use predefined args:
28 | ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
29 | stddev=0.01, weight_decay=0.0005, scope='conv1')
30 |
31 | The second call to Conv will overwrite padding:
32 | ops.conv2d(inputs, 256, [5, 5], padding='SAME',
33 | stddev=0.01, weight_decay=0.0005, scope='conv2')
34 |
35 | Example of how to reuse an arg_scope:
36 | with scopes.arg_scope(ops.conv2d, padding='SAME',
37 | stddev=0.01, weight_decay=0.0005) as conv2d_arg_scope:
38 | net = ops.conv2d(net, 256, [5, 5], scope='conv1')
39 | ....
40 |
41 | with scopes.arg_scope(conv2d_arg_scope):
42 | net = ops.conv2d(net, 256, [5, 5], scope='conv2')
43 |
44 | Example of how to use scopes.add_arg_scope:
45 |
46 | @scopes.add_arg_scope
47 | def conv2d(*args, **kwargs)
48 | """
49 | from __future__ import absolute_import
50 | from __future__ import division
51 | from __future__ import print_function
52 |
53 | import contextlib
54 | import functools
55 |
56 | from tensorflow.python.framework import ops
57 |
58 | _ARGSTACK_KEY = ("__arg_stack",)
59 |
60 | _DECORATED_OPS = set()
61 |
62 |
63 | def _get_arg_stack():
64 | stack = ops.get_collection(_ARGSTACK_KEY)
65 | if stack:
66 | return stack[0]
67 | else:
68 | stack = [{}]
69 | ops.add_to_collection(_ARGSTACK_KEY, stack)
70 | return stack
71 |
72 |
73 | def _current_arg_scope():
74 | stack = _get_arg_stack()
75 | return stack[-1]
76 |
77 |
78 | def _add_op(op):
79 | key_op = (op.__module__, op.__name__)
80 | if key_op not in _DECORATED_OPS:
81 | _DECORATED_OPS.add(key_op)
82 |
83 |
84 | @contextlib.contextmanager
85 | def arg_scope(list_ops_or_scope, **kwargs):
86 | """Stores the default arguments for the given set of list_ops.
87 |
88 | For usage, please see examples at top of the file.
89 |
90 | Args:
91 | list_ops_or_scope: List or tuple of operations to set argument scope for or
92 | a dictionary containg the current scope. When list_ops_or_scope is a dict,
93 | kwargs must be empty. When list_ops_or_scope is a list or tuple, then
94 | every op in it need to be decorated with @add_arg_scope to work.
95 | **kwargs: keyword=value that will define the defaults for each op in
96 | list_ops. All the ops need to accept the given set of arguments.
97 |
98 | Yields:
99 | the current_scope, which is a dictionary of {op: {arg: value}}
100 | Raises:
101 | TypeError: if list_ops is not a list or a tuple.
102 | ValueError: if any op in list_ops has not be decorated with @add_arg_scope.
103 | """
104 | if isinstance(list_ops_or_scope, dict):
105 | # Assumes that list_ops_or_scope is a scope that is being reused.
106 | if kwargs:
107 | raise ValueError("When attempting to re-use a scope by suppling a"
108 | "dictionary, kwargs must be empty.")
109 | current_scope = list_ops_or_scope.copy()
110 | try:
111 | _get_arg_stack().append(current_scope)
112 | yield current_scope
113 | finally:
114 | _get_arg_stack().pop()
115 | else:
116 | # Assumes that list_ops_or_scope is a list/tuple of ops with kwargs.
117 | if not isinstance(list_ops_or_scope, (list, tuple)):
118 | raise TypeError("list_ops_or_scope must either be a list/tuple or reused"
119 | "scope (i.e. dict)")
120 | try:
121 | current_scope = _current_arg_scope().copy()
122 | for op in list_ops_or_scope:
123 | key_op = (op.__module__, op.__name__)
124 | if not has_arg_scope(op):
125 | raise ValueError("%s is not decorated with @add_arg_scope", key_op)
126 | if key_op in current_scope:
127 | current_kwargs = current_scope[key_op].copy()
128 | current_kwargs.update(kwargs)
129 | current_scope[key_op] = current_kwargs
130 | else:
131 | current_scope[key_op] = kwargs.copy()
132 | _get_arg_stack().append(current_scope)
133 | yield current_scope
134 | finally:
135 | _get_arg_stack().pop()
136 |
137 |
138 | def add_arg_scope(func):
139 | """Decorates a function with args so it can be used within an arg_scope.
140 |
141 | Args:
142 | func: function to decorate.
143 |
144 | Returns:
145 | A tuple with the decorated function func_with_args().
146 | """
147 | @functools.wraps(func)
148 | def func_with_args(*args, **kwargs):
149 | current_scope = _current_arg_scope()
150 | current_args = kwargs
151 | key_func = (func.__module__, func.__name__)
152 | if key_func in current_scope:
153 | current_args = current_scope[key_func].copy()
154 | current_args.update(kwargs)
155 | return func(*args, **current_args)
156 | _add_op(func)
157 | return func_with_args
158 |
159 |
160 | def has_arg_scope(func):
161 | """Checks whether a func has been decorated with @add_arg_scope or not.
162 |
163 | Args:
164 | func: function to check.
165 |
166 | Returns:
167 | a boolean.
168 | """
169 | key_op = (func.__module__, func.__name__)
170 | return key_op in _DECORATED_OPS
171 |
--------------------------------------------------------------------------------
/src/inception/slim.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF-Slim grouped API. Please see README.md for details and usage."""
16 | # pylint: disable=unused-import
17 |
18 | # Collapse tf-slim into a single namespace.
19 | from src.inception import inception_model as inception
20 | from src.inception import losses
21 | from src.inception import ops
22 | from src.inception import scopes
23 | from src.inception import variables
24 | from src.inception.scopes import arg_scope
25 |
--------------------------------------------------------------------------------
/src/inception/variables.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains convenience wrappers for creating variables in TF-Slim.
16 |
17 | The variables module is typically used for defining model variables from the
18 | ops routines (see slim.ops). Such variables are used for training, evaluation
19 | and inference of models.
20 |
21 | All the variables created through this module would be added to the
22 | MODEL_VARIABLES collection, if you create a model variable outside slim, it can
23 | be added with slim.variables.add_variable(external_variable, reuse).
24 |
25 | Usage:
26 | weights_initializer = tf.truncated_normal_initializer(stddev=0.01)
27 | l2_regularizer = lambda t: losses.l2_loss(t, weight=0.0005)
28 | weights = variables.variable('weights',
29 | shape=[100, 100],
30 | initializer=weights_initializer,
31 | regularizer=l2_regularizer,
32 | device='/cpu:0')
33 |
34 | biases = variables.variable('biases',
35 | shape=[100],
36 | initializer=tf.zeros_initializer,
37 | device='/cpu:0')
38 |
39 | # More complex example.
40 |
41 | net = slim.ops.conv2d(input, 32, [3, 3], scope='conv1')
42 | net = slim.ops.conv2d(net, 64, [3, 3], scope='conv2')
43 | with slim.arg_scope([variables.variable], restore=False):
44 | net = slim.ops.conv2d(net, 64, [3, 3], scope='conv3')
45 |
46 | # Get all model variables from all the layers.
47 | model_variables = slim.variables.get_variables()
48 |
49 | # Get all model variables from a specific the layer, i.e 'conv1'.
50 | conv1_variables = slim.variables.get_variables('conv1')
51 |
52 | # Get all weights from all the layers.
53 | weights = slim.variables.get_variables_by_name('weights')
54 |
55 | # Get all bias from all the layers.
56 | biases = slim.variables.get_variables_by_name('biases')
57 |
58 | # Get all variables to restore.
59 | # (i.e. only those created by 'conv1' and 'conv2')
60 | variables_to_restore = slim.variables.get_variables_to_restore()
61 |
62 | ************************************************
63 | * Initializing model variables from a checkpoint
64 | ************************************************
65 |
66 | # Create some variables.
67 | v1 = slim.variables.variable(name="v1", ..., restore=False)
68 | v2 = slim.variables.variable(name="v2", ...) # By default restore=True
69 | ...
70 | # The list of variables to restore should only contain 'v2'.
71 | variables_to_restore = slim.variables.get_variables_to_restore()
72 | restorer = tf.train.Saver(variables_to_restore)
73 | with tf.Session() as sess:
74 | # Restore variables from disk.
75 | restorer.restore(sess, "/tmp/model.ckpt")
76 | print("Model restored.")
77 | # Do some work with the model
78 | ...
79 |
80 | """
81 | from __future__ import absolute_import
82 | from __future__ import division
83 | from __future__ import print_function
84 |
85 | import tensorflow as tf
86 |
87 | from tensorflow.core.framework import graph_pb2
88 | from src.inception import scopes
89 |
90 | # Collection containing all the variables created using slim.variables
91 | MODEL_VARIABLES = '_model_variables_'
92 |
93 | # Collection containing the slim.variables that are created with restore=True.
94 | VARIABLES_TO_RESTORE = '_variables_to_restore_'
95 |
96 |
97 | def add_variable(var, restore=True):
98 | """Adds a variable to the MODEL_VARIABLES collection.
99 |
100 | Optionally it will add the variable to the VARIABLES_TO_RESTORE collection.
101 | Args:
102 | var: a variable.
103 | restore: whether the variable should be added to the
104 | VARIABLES_TO_RESTORE collection.
105 |
106 | """
107 | collections = [MODEL_VARIABLES]
108 | if restore:
109 | collections.append(VARIABLES_TO_RESTORE)
110 | for collection in collections:
111 | if var not in tf.get_collection(collection):
112 | tf.add_to_collection(collection, var)
113 |
114 |
115 | def get_variables(scope=None, suffix=None):
116 | """Gets the list of variables, filtered by scope and/or suffix.
117 |
118 | Args:
119 | scope: an optional scope for filtering the variables to return.
120 | suffix: an optional suffix for filtering the variables to return.
121 |
122 | Returns:
123 | a copied list of variables with scope and suffix.
124 | """
125 | candidates = tf.get_collection(MODEL_VARIABLES, scope)[:]
126 | if suffix is not None:
127 | candidates = [var for var in candidates if var.op.name.endswith(suffix)]
128 | return candidates
129 |
130 |
131 | def get_variables_to_restore():
132 | """Gets the list of variables to restore.
133 |
134 | Returns:
135 | a copied list of variables.
136 | """
137 | return tf.get_collection(VARIABLES_TO_RESTORE)[:]
138 |
139 |
140 | def get_variables_by_name(given_name, scope=None):
141 | """Gets the list of variables that were given that name.
142 |
143 | Args:
144 | given_name: name given to the variable without scope.
145 | scope: an optional scope for filtering the variables to return.
146 |
147 | Returns:
148 | a copied list of variables with the given name and prefix.
149 | """
150 | return get_variables(scope=scope, suffix=given_name)
151 |
152 |
153 | def get_unique_variable(name):
154 | """Gets the variable uniquely identified by that name.
155 |
156 | Args:
157 | name: a name that uniquely identifies the variable.
158 |
159 | Returns:
160 | a tensorflow variable.
161 |
162 | Raises:
163 | ValueError: if no variable uniquely identified by the name exists.
164 | """
165 | candidates = tf.get_collection(tf.GraphKeys.VARIABLES, name)
166 | if not candidates:
167 | raise ValueError('Couldnt find variable %s' % name)
168 |
169 | for candidate in candidates:
170 | if candidate.op.name == name:
171 | return candidate
172 | raise ValueError('Variable %s does not uniquely identify a variable', name)
173 |
174 |
175 | class VariableDeviceChooser(object):
176 | """Slim device chooser for variables.
177 |
178 | When using a parameter server it will assign them in a round-robin fashion.
179 | When not using a parameter server it allows GPU:0 placement otherwise CPU:0.
180 | """
181 |
182 | def __init__(self,
183 | num_parameter_servers=0,
184 | ps_device='/job:ps',
185 | placement='CPU:0'):
186 | """Initialize VariableDeviceChooser.
187 |
188 | Args:
189 | num_parameter_servers: number of parameter servers.
190 | ps_device: string representing the parameter server device.
191 | placement: string representing the placement of the variable either CPU:0
192 | or GPU:0. When using parameter servers forced to CPU:0.
193 | """
194 | self._num_ps = num_parameter_servers
195 | self._ps_device = ps_device
196 | self._placement = placement if num_parameter_servers == 0 else 'CPU:0'
197 | self._next_task_id = 0
198 |
199 | def __call__(self, op):
200 | device_string = ''
201 | if self._num_ps > 0:
202 | task_id = self._next_task_id
203 | self._next_task_id = (self._next_task_id + 1) % self._num_ps
204 | device_string = '%s/task:%d' % (self._ps_device, task_id)
205 | device_string += '/%s' % self._placement
206 | return device_string
207 |
208 |
209 | # TODO(sguada) Remove once get_variable is able to colocate op.devices.
210 | def variable_device(device, name):
211 | """Fix the variable device to colocate its ops."""
212 | if callable(device):
213 | var_name = tf.get_variable_scope().name + '/' + name
214 | var_def = graph_pb2.NodeDef(name=var_name, op='Variable')
215 | device = device(var_def)
216 | if device is None:
217 | device = ''
218 | return device
219 |
220 |
221 | @scopes.add_arg_scope
222 | def global_step(device=''):
223 | """Returns the global step variable.
224 |
225 | Args:
226 | device: Optional device to place the variable. It can be an string or a
227 | function that is called to get the device for the variable.
228 |
229 | Returns:
230 | the tensor representing the global step variable.
231 | """
232 | global_step_ref = tf.get_collection(tf.GraphKeys.GLOBAL_STEP)
233 | if global_step_ref:
234 | return global_step_ref[0]
235 | else:
236 | collections = [
237 | VARIABLES_TO_RESTORE,
238 | tf.GraphKeys.VARIABLES,
239 | tf.GraphKeys.GLOBAL_STEP,
240 | ]
241 | # Get the device for the variable.
242 | with tf.device(variable_device(device, 'global_step')):
243 | return tf.get_variable('global_step', shape=[], dtype=tf.int64,
244 | initializer=tf.zeros_initializer,
245 | trainable=False, collections=collections)
246 |
247 |
248 | @scopes.add_arg_scope
249 | def variable(name, shape=None, dtype=tf.float32, initializer=None,
250 | regularizer=None, trainable=True, collections=None, device='',
251 | restore=True):
252 | """Gets an existing variable with these parameters or creates a new one.
253 |
254 | It also add itself to a group with its name.
255 |
256 | Args:
257 | name: the name of the new or existing variable.
258 | shape: shape of the new or existing variable.
259 | dtype: type of the new or existing variable (defaults to `DT_FLOAT`).
260 | initializer: initializer for the variable if one is created.
261 | regularizer: a (Tensor -> Tensor or None) function; the result of
262 | applying it on a newly created variable will be added to the collection
263 | GraphKeys.REGULARIZATION_LOSSES and can be used for regularization.
264 | trainable: If `True` also add the variable to the graph collection
265 | `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
266 | collections: A list of collection names to which the Variable will be added.
267 | Note that the variable is always also added to the tf.GraphKeys.VARIABLES
268 | and MODEL_VARIABLES collections.
269 | device: Optional device to place the variable. It can be an string or a
270 | function that is called to get the device for the variable.
271 | restore: whether the variable should be added to the
272 | VARIABLES_TO_RESTORE collection.
273 |
274 | Returns:
275 | The created or existing variable.
276 | """
277 | collections = list(collections or [])
278 |
279 | # Make sure variables are added to tf.GraphKeys.VARIABLES and MODEL_VARIABLES
280 | collections += [tf.GraphKeys.VARIABLES, MODEL_VARIABLES]
281 | # Add to VARIABLES_TO_RESTORE if necessary
282 | if restore:
283 | collections.append(VARIABLES_TO_RESTORE)
284 | # Remove duplicates
285 | collections = set(collections)
286 | # Get the device for the variable.
287 | with tf.device(variable_device(device, name)):
288 | return tf.get_variable(name, shape=shape, dtype=dtype,
289 | initializer=initializer, regularizer=regularizer,
290 | trainable=trainable, collections=collections)
291 |
--------------------------------------------------------------------------------
/src/myComputePrecision.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import math
6 | import os.path
7 | import time
8 | import sys, os
9 | import numpy as np
10 | import tensorflow as tf
11 |
12 | from inception import image_processing
13 | from inception import inception_module
14 | FLAGS = tf.app.flags.FLAGS
15 |
16 | #checkpoint_path=E:\老电脑\实验室\海报检索1\海报检索\inceptionmodel.ckpt-42100
17 | #/home/imc/caffe/poster/mytensor0417/model11/model.ckpt-42100
18 | tf.app.flags.DEFINE_string('checkpoint_path', 'D:/dev/unicorn/model/model.ckpt-42100',
19 | """Directory where to read model checkpoints.""")
20 | #num_classes=11,训练数据的类别数
21 | tf.app.flags.DEFINE_string('num_classes', 11,
22 | """class numbers.""")
23 |
24 |
25 | class NetSaver():
26 | def __init__(self):
27 | self.loadNet()
28 | self.loadModel()
29 |
30 | def loadNet(self):
31 | self.__buildNet()
32 |
33 | def loadModel(self, model=FLAGS.checkpoint_path):
34 | self.saver.restore(self.__sess, model)
35 |
36 | #@property将私有成员变成具有只读属性的公有成员
37 | @property
38 | def classes(self):
39 | return self.__classes
40 |
41 | @property
42 | def sess(self):
43 | return self.__sess
44 |
45 | @property
46 | def output(self):
47 | return self.__output
48 |
49 | @property
50 | def image_buffer(self):
51 | return self.__image_buffer
52 |
53 | def __load_model(self):
54 | #self.__sess = tf.Session()
55 | self.saver.restore(self.__sess, self.model)
56 |
57 | def __buildInputImagePlaceholder(self):
58 | self.__image_buffer = tf.placeholder("string")
59 | image = tf.image.decode_jpeg(self.image_buffer, channels=3)
60 | image = tf.image.convert_image_dtype(image, dtype=tf.float32)
61 | image = tf.image.central_crop(image, central_fraction=0.875)
62 | # Resize the image to the original height and width.
63 | image = tf.expand_dims(image, 0)
64 | image = tf.image.resize_bilinear(image, [299, 299],align_corners=False)
65 | image_tensor = tf.squeeze(image, [0])
66 | self.x_image = tf.reshape(image_tensor, [-1, 299, 299, 3])
67 |
68 | def __buildNet(self):
69 | graph = tf.Graph().as_default()
70 | # Number of classes in the dataset label set plus 1.
71 | # Label 0 is reserved for an (unused) background class.
72 | num_classes = FLAGS.num_classes + 1
73 | #setup an input image placeholder to feed image buffer
74 | self.__buildInputImagePlaceholder()
75 | # Build a Graph that computes the logits predictions from the inference model.
76 | self.logits,self.highfeatures, self.ssss = inception_module.inference(self.x_image, num_classes)
77 | #result is the output of the softmax unit
78 | self.__output = tf.nn.softmax(self.ssss, name="result")
79 | # Restore the moving average version of the learned variables for eval.
80 | variable_averages = tf.train.ExponentialMovingAverage(
81 | inception_module.MOVING_AVERAGE_DECAY)
82 | variables_to_restore = variable_averages.variables_to_restore()
83 | self.saver = tf.train.Saver(variables_to_restore)
84 | self.__sess = tf.Session()
85 |
86 | def classify(self, image_path):
87 | image_data = tf.gfile.FastGFile(image_path, 'rb').read()
88 | # output:分到每一类的概率
89 | output,predictions,high = self.sess.run([self.output,self.logits,self.highfeatures],feed_dict={self.image_buffer: image_data})
90 | index=0
91 | pro=0
92 | for i in range(len(output[0])):
93 | if output[0][i]>pro:
94 | pro=output[0][i]
95 | index=i
96 | return index
97 |
98 | # def predict(self):
99 | # img1 = "E:/老电脑/实验室/海报检索1/测试视频/10.jpg"
100 | # feat1 = self.getOneFeatures(img1)
101 | # ss=self.classify(img1)
102 | # #print(len(feat1))
103 | # img2 = "/home/imc/caffe/data/package/500-999/highlight_resize150/614.2.jpg"
104 | # feat2 = self.getOneFeatures(img2)
105 | # #print(len(feat2))def computeprecision(self):
106 |
107 | # def computeprecision(self):
108 | # roots=[]
109 | # imgsroot0='D:/dev/unicorn/data/testVideo/video0/'
110 | # imgsroot1='D:/dev/unicorn/data/testVideo/video1/'
111 | # imgsroot2='D:/dev/unicorn/data/testVideo/video2/'
112 | # #roots.append(imgsroot0)
113 | # roots.append(imgsroot1)
114 | # #roots.append(imgsroot2)
115 | # count=0
116 | # lab=1
117 | # for imgroot in roots:
118 | # for root, dirs, files in os.walk(imgroot):
119 | # for i in range(len(files)):
120 | # img=imgroot+files[i]
121 | # index=self.classify(img)
122 | # if(index!=2):
123 | # count=count+1
124 | # #print (index)
125 | # lab=lab+1
126 |
127 | if __name__ == '__main__':
128 | a=NetSaver()
129 | print(a.classify('D:/dev/unicorn/data/testVideo/10.jpg'))
130 |
--------------------------------------------------------------------------------
/src/myGetFeatures.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import numpy as np
4 | import cv2
5 |
6 | from myComputePrecision import NetSaver
7 | from Config import Config
--------------------------------------------------------------------------------
/src/myRetrieval.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import cv2.flann
4 | import heapq
5 | # #import AA
6 | # from AA import AA
7 | # aa=AA.AA()
8 |
9 | HASH_LENGTH=1
10 | FEATURE_LENGTH=3
11 | K=2
12 |
13 | class Retrieval():
14 | #"""docstring fos Retrieval"""
15 | def __init__(self):
16 | self.buildHashFeatureIndex()
17 | self.buildEFeatureIndex()
18 |
19 | def buildHashFeatureIndex(self):
20 | pass
21 |
22 | def buildEFeatureIndex(self):
23 | pass
24 |
25 | def readImg(self,imgPath):
26 | image = tf.gfile.FastGFile(imgPath, 'rb').read()
27 | return image
28 |
29 | #提取待检索图片的特征
30 | def getFeatures(self,img):
31 | #hashfeature,feature,high = self.sess.run([self.output,self.logits,self.highfeatures],feed_dict={self.image_buffer: image_data})
32 | hashfeature = np.zeros(shape=[1, HASH_LENGTH], dtype='float32')
33 | feature = np.zeros(shape=[1, FEATURE_LENGTH], dtype='float32')
34 | #feature = np.squeeze(feature)
35 | #hashfeature = np.squeeze(high[0])
36 | # hashfeature = np.array([hashfeature])
37 | # feature = np.array([feature])
38 | # for i in range(len(feature[0])):
39 | # if feature[0][i] > 0.5:
40 | # feature[0][i] = 1
41 | # else:
42 | # feature[0][i] = 0
43 | return hashfeature, feature
44 |
45 | def search(self,feature1,feature2):
46 | candidate=self.findByHamming(feature1)
47 | label=self.findByE(candidate,feature2)
48 | return label
49 |
50 | def findByHamming(self,feature1):
51 | #kd-tree建索引
52 | FLANN_INDEX_KDTREE = 1
53 | self.flann_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=4)
54 | self.hashfeatureTest = np.zeros(shape=[3, 1], dtype='float32')
55 | self.hashfeatureTest[1][0] = 1
56 | self.flann = cv2.flann.Index(self.hashfeatureTest, self.flann_params)
57 | np.save("D://devPy/hashfeature1",self.hashfeatureTest)
58 | print(np.load("D://devPy/hashfeature1.npy"))
59 |
60 | #kNN找最近邻,找候选集对应的id
61 | idx,_ = self.flann.knnSearch(feature1, K, params={})
62 | print(idx,_)
63 | #取出idx(去掉外层[])中的所有元素(去内层[])
64 | candidate = idx[0].tolist()
65 | print(candidate)
66 |
67 | for i in range(K):
68 | # _condition=(feature1 == self.featureTest[candidate[i]])
69 | # condition = _condition.all(1)
70 | # res1=np.where(condition)
71 | # res2=res1[0]
72 | # print(_condition)
73 | # print(condition)
74 | # print(res1)
75 | # print(res2)
76 | same = np.where((feature1 == self.hashfeatureTest[candidate[i]]).all(1))[0]
77 | candidate.extend(same)
78 | candidate = list(set(candidate))
79 | print(candidate)
80 | return candidate
81 |
82 | def findByE(self,candidate,feature2):
83 | self.featureTest = np.zeros(shape=[3, 3], dtype='float32')
84 | self.featureTest[1][0] = 1
85 | self.featureTest[2][1] = 1
86 | #minDist=np.sqrt(np.sum(np.square(self.featureTest[candidate[0]] - feature2[0])))
87 | # label=candidate[0]
88 | # tag = candidate[0]
89 | # for i in range(1,len(candidate)):
90 | # #feature2[0]为待检索图片的feature
91 | # dist=np.sqrt(np.sum(np.square(self.featureTest[candidate[i]] - feature2[0])))
92 | # if minDist>dist:
93 | # minDist=dist
94 | # label=tag
95 | # tag=candidate[i]
96 | #如果检索的图片不在库中,无法返回不存在(因为没有保存dist)
97 | dist=lambda x: np.sum(np.square(self.featureTest[x] - feature2[0]))
98 | #dist = lambda x: np.linalg.norm(self.featureTest[x] - feature2[0])
99 | label=heapq.nsmallest(1, candidate, key=dist)
100 | print("lalalla",label)
101 | return label
102 |
103 | def find(self):
104 | img=self.readImg("C://Users/Mozhouting/Desktop/1.jpg")
105 | f1,f2=self.getFeatures(img)
106 | label=self.search(f1,f2)
107 | return
108 |
109 | aa=Retrieval()
110 | aa.find()
111 |
112 | # a=[4,3,2,1]
113 | # print(heapq.nsmallest(2,a,key=lambda x:-x))
114 |
--------------------------------------------------------------------------------
/test/MultiDbBulidTest.py:
--------------------------------------------------------------------------------
1 | '''
2 | '''
3 | # coding = 'utf-8'
4 | import numpy as np
5 | import os
6 | import random
7 | from src.Util import Path
8 | from src import DbBuild
9 | from src import FeatureExtract
10 |
11 | class AbnormalDbBuilder(object):
12 |
13 | def __init__(self, config, featureExtracter=None):
14 | pass
15 |
16 | def build(self):
17 | imageFeatures = np.zeros(shape=[1, 128], dtype='float32')
18 | imageFeatures.tofile("D:/3.bin")
19 | #pass
20 |
21 | # class DbBulidTester(object):
22 | # def __init__(self):
23 | # self.__config = None
24 | # self.__hashFeature = None
25 | # self.__imageFeature = None
26 | # self.__featureExtracter = None
27 | #
28 | # def __setup(self):
29 | # self.__config = {
30 | # 'imageRoot': 'D:/dev/unicorn/test/__1SingleImage/image/',
31 | # 'hashFeatureFilename': r'D:\dev\unicorn\test\__1SingleImage\feature\1.bin',
32 | # 'imageFeatureFilename': r'D:\dev\unicorn\test\__1SingleImage\feature\2.bin',
33 | # #'sample':[],
34 | # 'extracter': {
35 | # 'num_classes': 11,
36 | # 'checkpoint_path': 'D:/dev/unicorn/model/model.ckpt-42100'
37 | # }
38 | # }
39 | # self.__featureExtracter = FeatureExtract.FeatureExtracter(self.__config['extracter'])
40 | # self.__hashFeature, self.__imageFeature = self.__featureExtracter.extract(r'D:\dev\unicorn\test\__1SingleImage\image\10.jpg')
41 | #
42 | #
43 | # def __run(self):
44 | # builder = DbBuild.DbBuilder(self.__config, self.__featureExtracter)
45 | # #builder = AbnormalDbBuilder(self.__config, self.__featureExtracter)
46 | # builder.build()
47 | # isOK, reason= self.__compare()
48 | # print(reason if not isOK else 'OK')
49 | #
50 | #
51 | # def __compare(self):
52 | #
53 | # isOK, reason = self.__compareFeature(self.__config['imageFeatureFilename'], self.__imageFeature)
54 | # if not isOK:
55 | # return False, 'image feature ... , {}'.format(reason)
56 | #
57 | # isOK, reason = self.__compareFeature(self.__config['hashFeatureFilename'], self.__hashFeature)
58 | # if not isOK:
59 | # return False, 'hash feature ... , {}'.format(reason)
60 | #
61 | # return True, ''
62 | #
63 | #
64 | # def __compareFeature(self, filename, expectedFeature):
65 | #
66 | # try:
67 | # actualFeature = np.fromfile(filename, dtype=np.float32)
68 | # except Exception:
69 | # return False, "read file error!"
70 | #
71 | # if (expectedFeature.size != actualFeature.size):
72 | # return False, "size not match!"
73 | #
74 | # if np.linalg.norm(actualFeature - expectedFeature) >= (1e-6):
75 | # return False, "feature is wrong!"
76 | #
77 | # return True, "没毛病"
78 | #
79 | # def __cleanUp(self):
80 | # try:
81 | # os.remove(self.__config['imageFeatureFilename'])
82 | # os.remove(self.__config['hashFeatureFilename'])
83 | # except Exception as err:
84 | # print(err)
85 | #
86 | # def test(self):
87 | # self.__setup()
88 | # self.__run()
89 | # self.__cleanUp()
90 |
91 | class MultiDbBuildTester(object):
92 |
93 | _OK = (True, '')
94 | _ERROR_BAD_FILE = (False, 'read file error')
95 | _ERROR_INCORRECT_DBSIZE = (False, 'size not match')
96 | _ERROR_INCORRENT_FEATURE = (False, 'feature is wrong')
97 |
98 | def __init__(self):
99 | self.__config = None
100 | self.__featureExtracter = None
101 | self.__totalImages = -1
102 | self.__checkCandidates = None
103 | self.__hashFeatures = None
104 | self.__imageFeatures = None
105 |
106 | def __setup(self):
107 | self.__config = {
108 | 'imageRoot': r'D:\dev\unicorn\test\data\__NImage\image',
109 | 'hashFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\1.bin',
110 | 'imageFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\2.bin',
111 | # 'sample': [0, 10, 20]
112 | 'extracter': {
113 | 'num_classes': 11,
114 | 'checkpoint_path': 'D:/dev/unicorn/model/model.ckpt-42100'
115 | }
116 | }
117 | self.__featureExtracter = FeatureExtract.FeatureExtracter(self.__config['extracter'])
118 | self.__setupFeatures()
119 |
120 |
121 | def __setupFeatures(self):
122 | imagePaths = Path.listFiles(self.__config['imageRoot'])
123 | self.__totalImages = len(imagePaths)
124 | self.__checkCandidates = self.__config.get('sample', MultiDbBuildTester._createCheckCandidates(self.__totalImages))
125 | testImageNum = len(self.__checkCandidates)
126 | self.__hashFeatures = np.zeros(shape=[testImageNum, 128], dtype='float32')
127 | self.__imageFeatures = np.zeros(shape=[testImageNum, 2048], dtype='float32')
128 | for i, j in enumerate(self.__checkCandidates):
129 | self.__hashFeatures[i], self.__imageFeatures[i] = self.__featureExtracter.extract(imagePaths[j])
130 |
131 | @staticmethod
132 | def _createCheckCandidates(totalImages):
133 | seq = range(totalImages)
134 | n = (totalImages + 9) // 10
135 | return random.sample(seq, n)
136 |
137 | def __run(self):
138 | builder = DbBuild.DbBuilder(self.__config, self.__featureExtracter)
139 | #builder = AbnormalDbBuilder(self.__config, self.__featureExtracter)
140 | builder.build()
141 | isOK, reason = self.__compare()
142 | print((reason, self.__checkCandidates) if not isOK else 'OK')
143 |
144 |
145 | def __compare(self):
146 | isOK, reason = self.__compareFeatures(self.__config['imageFeatureFilename'],
147 | self.__imageFeatures,
148 | self.__totalImages,
149 | 2048)
150 | if not isOK:
151 | return False, 'image feature ... , {}'.format(reason)
152 |
153 | isOK, reason = self.__compareFeatures(self.__config['hashFeatureFilename'],
154 | self.__hashFeatures,
155 | self.__totalImages,
156 | 128)
157 | if not isOK:
158 | return False, 'hash feature ... , {}'.format(reason)
159 |
160 | return MultiDbBuildTester._OK
161 |
162 | def __compareFeatures(self, filename, expectedFeatures, totalFeatures, featureDim):
163 | try:
164 | actualFeatures = np.fromfile(filename, dtype=np.float32)
165 | except Exception:
166 | return MultiDbBuildTester._ERROR_BAD_FILE
167 |
168 | if totalFeatures * featureDim != actualFeatures.size:
169 | return MultiDbBuildTester._ERROR_INCORRECT_DBSIZE
170 |
171 | actualFeatures.shape = totalFeatures, featureDim
172 |
173 | isOK = all([self.__compareFeature(actualFeatures[j], expectedFeatures[i])
174 | for i, j in enumerate(self.__checkCandidates)])
175 | return MultiDbBuildTester._OK if isOK else MultiDbBuildTester._ERROR_INCORRENT_FEATURE
176 |
177 |
178 | def __compareFeature(self, feature0, feature1):
179 | return np.linalg.norm(feature0 - feature1) < 1e-6
180 |
181 |
182 | def __cleanUp(self):
183 | try:
184 | os.remove(self.__config['imageFeatureFilename'])
185 | os.remove(self.__config['hashFeatureFilename'])
186 | except Exception as err:
187 | print(err)
188 |
189 |
190 | def test(self):
191 | self.__setup()
192 | self.__run()
193 | self.__cleanUp()
194 |
195 |
196 |
197 | if __name__ == '__main__':
198 | Dbtester = MultiDbBuildTester()
199 | Dbtester.test()
200 | # imageRoot = config['imageRoot']
201 |
--------------------------------------------------------------------------------