├── .idea └── Unicorn.iml ├── README.md ├── src ├── Application.py ├── DbBuild.py ├── FeatureExtract.py ├── FilesMaker.py ├── ImageDb.py ├── Performance.py ├── Util │ ├── File.py │ └── Path.py ├── config.py ├── inception │ ├── image_processing.py │ ├── inception_model.py │ ├── inception_module.py │ ├── losses.py │ ├── ops.py │ ├── scopes.py │ ├── slim.py │ └── variables.py ├── myComputePrecision.py ├── myGetFeatures.py └── myRetrieval.py └── test └── MultiDbBulidTest.py /.idea/Unicorn.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ImageRetrieval 2 | 3 | ## Requirements 4 | - Tensorflow -------------------------------------------------------------------------------- /src/Application.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | # coding = 'utf-8' 4 | 5 | import ImageDb 6 | import FeatureExtract 7 | import numpy as np 8 | 9 | class ImageFinder(object): 10 | def __init__(self,config): 11 | self.__imageDb = ImageDb.ImageDb(config['db']) 12 | self.__featureExtracter = FeatureExtract.FeatureExtracter(config['extracter']) 13 | 14 | 15 | def find(self, imagePath): 16 | '''查找图像的Label 17 | 18 | 参数 19 | imagePath: 图像文件路径 20 | ''' 21 | hashFeature, imageFeature = self.__featureExtracter.extract(imagePath) 22 | # hashFeature = np.zeros(shape=[1, 128], dtype='float32') 23 | # imageFeature = np.zeros(shape=[1, 2048], dtype='float32') 24 | return self.__imageDb.find(hashFeature, imageFeature) 25 | 26 | 27 | 28 | if __name__ == '__main__': 29 | 30 | config = { 31 | 'db': { 32 | 'hashFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\1.bin', 33 | 'imageFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\2.bin', 34 | 'generatedLabelsFilename': r'D:\dev\unicorn\test\data\__NImage\feature\generatedLabels.txt', 35 | }, 36 | 'extracter': { 37 | 'num_classes': 11, 38 | 'checkpoint_path': 'D:/dev/unicorn/model/model.ckpt-42100' 39 | } 40 | } 41 | imageFinder = ImageFinder(config) 42 | imagePath = r'D:\dev\unicorn\test\data\__NImage\image\0\0.jpg ' 43 | imageFinder.find(imagePath) 44 | 45 | -------------------------------------------------------------------------------- /src/DbBuild.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | # coding = 'utf-8' 4 | 5 | import numpy as np 6 | import os 7 | import colorama 8 | 9 | from src import FeatureExtract 10 | from src.Util import Path 11 | 12 | 13 | class DbBuilder(object): 14 | def __init__(self, config, featureExtracter=None): 15 | self.__config = config 16 | self.__count = 0 17 | self.__hashFeatures = None 18 | self.__imageFeatures = None 19 | self.__featureExtracter = featureExtracter if featureExtracter is not None else FeatureExtract.FeatureExtracter(config['extracter']) 20 | 21 | 22 | def __initFeatures(self, totalImages): 23 | self.__hashFeatures = np.zeros(shape=[totalImages, 128], dtype='float32') 24 | self.__imageFeatures = np.zeros(shape=[totalImages, 2048], dtype='float32') 25 | 26 | 27 | def __add(self, hashFeature, imageFeature): 28 | self.__hashFeatures[self.__count] = hashFeature 29 | self.__imageFeatures[self.__count] = imageFeature 30 | self.__count += 1 31 | 32 | 33 | def __save(self): 34 | self.__hashFeatures.tofile(self.__config['db']['hashFeatureFilename']) 35 | self.__imageFeatures.tofile(self.__config['db']['imageFeatureFilename']) 36 | np.savetxt(self.__config['db']['labelsFilename'],self.__labels,fmt='%s',newline='\n') 37 | 38 | def build(self): 39 | '''构建图像特征数据库 40 | ''' 41 | self.__count = 0 42 | with open(self.__config['imagePathLabelsFilename'], 'r') as file: 43 | imagePaths = [imagePath for imagePath, label in (line.strip().split(' ') for line in file)] 44 | with open(self.__config['imagePathLabelsFilename'], 'r') as file: 45 | self.__labels = [label for imagePath, label in (line.strip().split(' ') for line in file)] 46 | #imagePaths = [i for i in Path.ilistFileEx(self.__config['imageRoot'])] 47 | self.__initFeatures(len(imagePaths)) 48 | for imagePath in imagePaths: 49 | hashFeature, imageFeature = self.__featureExtracter.extract(imagePath) 50 | self.__add(hashFeature, imageFeature) 51 | self.__save() 52 | 53 | 54 | 55 | if __name__ == '__main__': 56 | # config = { 57 | # #'imageRoot': r'D:\dev\unicorn\test\data\__NImage\image', 58 | # 'db': { 59 | # 'hashFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\1.bin', 60 | # 'imageFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\2.bin', 61 | # 'generatedLabelsFilename': r'D:\dev\unicorn\test\data\__NImage\feature\generatedLabels.txt', 62 | # }, 63 | # 'imagePathLabelsFilename': r'D:\dev\unicorn\test\data\__NImage\feature\imagePathLabels.txt', 64 | # 'extracter': { 65 | # 'num_classes': 11, 66 | # 'checkpoint_path': 'D:/dev/unicorn/model/model.ckpt-42100' 67 | # } 68 | # } 69 | # builder = DbBuilder(config) 70 | # builder.build() 71 | pass 72 | -------------------------------------------------------------------------------- /src/FeatureExtract.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | # coding = 'utf-8' 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | from src.inception import inception_module 8 | 9 | class FeatureExtracter(): 10 | def __init__(self,config): 11 | self.__config=config 12 | self.x_image = None 13 | self.__image_buffer = None 14 | self.logits = None 15 | self.highfeatures = None 16 | self.ssss = None 17 | self.__output = None 18 | self.saver = None 19 | self.__sess = None 20 | self.__sess = None 21 | self.__buildNet() 22 | self.__loadModel() 23 | 24 | def __loadModel(self): 25 | self.saver.restore(self.__sess, self.__config['checkpoint_path']) 26 | 27 | def __buildInputImagePlaceholder(self): 28 | self.__image_buffer = tf.placeholder("string") 29 | image = tf.image.decode_jpeg(self.__image_buffer, channels=3) 30 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 31 | image = tf.image.central_crop(image, central_fraction=0.875) 32 | # Resize the image to the original height and width. 33 | image = tf.expand_dims(image, 0) 34 | image = tf.image.resize_bilinear(image, [299, 299], align_corners=False) 35 | image_tensor = tf.squeeze(image, [0]) 36 | self.x_image = tf.reshape(image_tensor, [-1, 299, 299, 3]) 37 | 38 | def __buildNet(self): 39 | #graph = tf.Graph().as_default() 40 | # Number of classes in the dataset label set plus 1. 41 | # Label 0 is reserved for an (unused) background class. 42 | num_classes = self.__config['num_classes']+1 43 | # setup an input image placeholder to feed image buffer 44 | self.__buildInputImagePlaceholder() 45 | # Build a Graph that computes the logits predictions from the inference model. 46 | # WARNING!!! 47 | self.logits,self.highfeatures, self.ssss = inception_module.inference(self.x_image, num_classes) 48 | # result is the output of the softmax unit 49 | self.__output = tf.nn.softmax(self.ssss, name="result") 50 | # Restore the moving average version of the learned variables for eval. 51 | variable_averages = tf.train.ExponentialMovingAverage( 52 | inception_module.MOVING_AVERAGE_DECAY) 53 | variables_to_restore = variable_averages.variables_to_restore() 54 | self.saver = tf.train.Saver(variables_to_restore) 55 | self.__sess = tf.Session() 56 | 57 | def extract(self, image_path): 58 | '''提取图像特征 59 | 60 | 参数 61 | image_path: 图像检索路径 62 | ''' 63 | image_data = tf.gfile.FastGFile(image_path, 'rb').read() 64 | output, hashFeature, imageFeature = self.__sess.run([self.__output, self.logits, self.highfeatures], 65 | feed_dict={self.__image_buffer: image_data}) 66 | hashFeature = np.squeeze(hashFeature) 67 | imageFeature = np.squeeze(imageFeature[0]) 68 | imageFeature = np.array([imageFeature]) 69 | hashFeature = np.array([hashFeature]) 70 | for i in range(len(hashFeature[0])): 71 | if hashFeature[0][i] > 0.5: 72 | hashFeature[0][i] = 1 73 | else: 74 | hashFeature[0][i] = 0 75 | return hashFeature, imageFeature 76 | 77 | if __name__ == '__main__': 78 | pass -------------------------------------------------------------------------------- /src/FilesMaker.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | # coding = 'utf-8' 4 | from Util import Path 5 | from Util import File 6 | from pathlib import PurePath 7 | import itertools 8 | import operator 9 | import numpy as np 10 | import subprocess 11 | import collections 12 | import functools 13 | 14 | ResolveResult = collections.namedtuple('ResolveResult', ['label', 'path', 'alias']) 15 | 16 | 17 | @functools.lru_cache() 18 | def _resolve(path): 19 | _path = PurePath(path) 20 | label = _path.parts[-2] 21 | imgAlias = '_'.join([label, _path.stem]) 22 | return ResolveResult(label, path, imgAlias) 23 | 24 | 25 | @functools.lru_cache() 26 | def _extractFields(tp, fields): 27 | return ' '.join([_extractField(field, tp) for field in fields]) 28 | 29 | 30 | @functools.singledispatch 31 | def _extractField(field, tp): 32 | pass 33 | 34 | 35 | @_extractField.register(str) 36 | def _(field, tp): 37 | return operator.attrgetter(field)(tp) 38 | 39 | 40 | @_extractField.register(int) 41 | def _(field, tp): 42 | return operator.itemgetter(field)(tp) 43 | 44 | 45 | _RESOLVER_BY_NAME = { 46 | 'default': _resolve 47 | } 48 | 49 | 50 | def makeConfigFile(config, resolver=None, openFileAfterWrite=False): 51 | imageRoot = config['imageRoot'] 52 | resolver = resolver or _RESOLVER_BY_NAME[config.get('resolver', 'default')] 53 | 54 | for filename, fields in config['targets']: 55 | _fields = fields if isinstance(fields, tuple) else tuple(fields) 56 | 57 | 58 | seq = (_extractFields(resolver(imagePath), _fields) for imagePath in Path.ilistFileEx(imageRoot)) 59 | 60 | File.writeLines(filename,seq) 61 | if openFileAfterWrite: 62 | subprocess.Popen(f'notepad {filename}', shell=True) 63 | 64 | 65 | if __name__ == '__main__': 66 | # config = { 67 | # 'imageRoot': r'D:\dev\unicorn\test\data\__NImage\image', 68 | # 'targets': [(r'D:\dev\unicorn\test\data\__NImage\feature\imagePathLabels.txt', ['path','label']), 69 | # (r'D:\dev\unicorn\test\data\__NImage\feature\imagePathAlias.txt', ['path', 'alias']), 70 | # (r'D:\dev\unicorn\test\data\__NImage\feature\aliasLabel.txt',['alias','label'])] 71 | # } 72 | # # FM = FileMaker(config) 73 | # # FM.run() 74 | # makeConfigFile(config, openFileAfterWrite=True) 75 | 76 | 77 | pass 78 | -------------------------------------------------------------------------------- /src/ImageDb.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | # coding = 'utf-8' 4 | 5 | import cv2 6 | import numpy as np 7 | 8 | 9 | class ImageDb(object): 10 | '''图像库 11 | ''' 12 | _FLANN_INDEX_KDTREE = 1 13 | _FLANN_INDEX_PARAM = {'algorithm': _FLANN_INDEX_KDTREE, 'trees': 4} 14 | _K_OF_KNN = 1 15 | _HASH_FEATURE_DIM = 128 16 | _IMGE_FEATURE_DIM = 2048 17 | 18 | 19 | def __init__(self, config): 20 | ''' 21 | 参数 22 | config: 配置 23 | ''' 24 | 25 | self.__config = config 26 | self.__hashFeatures = np.fromfile(config['hashFeatureFilename'], dtype=np.float32) 27 | self.__hashFeatures.shape = self.__hashFeatures.size // ImageDb._HASH_FEATURE_DIM, ImageDb._HASH_FEATURE_DIM 28 | self.__imageFeatures = np.fromfile(config['imageFeatureFilename'],dtype=np.float32) 29 | self.__imageFeatures.shape = self.__imageFeatures.size // ImageDb._IMGE_FEATURE_DIM, ImageDb._IMGE_FEATURE_DIM 30 | # self.__imageLabels = np.fromfile(config['imageLabelsFilename'],dtype=np.string_) 31 | imageLabels = np.loadtxt(config['labelsFilename'],dtype=np.string_).astype(str).tolist() 32 | self.__imageLabels = imageLabels if isinstance(imageLabels, list) else [imageLabels] 33 | self.__indexer = cv2.flann_Index(self.__hashFeatures, ImageDb._FLANN_INDEX_PARAM) 34 | 35 | 36 | def find(self, hashFeature, imageFeature): 37 | '''查找图像Label 38 | 39 | 参数 40 | hashFeature: 图像的Hash特征 41 | imageFeature: 图像特征 42 | ''' 43 | candidates = self.__findCandidate(hashFeature) 44 | refinedCandidates = self.__refineCandidate(candidates, imageFeature) 45 | label = [self.__imageLabels[refinedCandidate] for refinedCandidate in refinedCandidates] 46 | print(label) 47 | return label 48 | 49 | 50 | def __findCandidate(self, hashFeature): 51 | _seedCandidates, _ = self.__indexer.knnSearch(hashFeature, ImageDb._K_OF_KNN, params={}) 52 | seedCandidates = _seedCandidates[0].tolist() 53 | candidates = list(seedCandidates) 54 | for i in seedCandidates: 55 | #all(1)输出一个bool型矩阵,每个元素指示原矩阵一行中所有元素是否都是true 56 | auxCandidates = np.where((self.__hashFeatures == self.__hashFeatures[i]).all(1))[0] 57 | candidates.extend(auxCandidates) 58 | return list(set(candidates)) 59 | 60 | 61 | def __refineCandidate(self, candidates, imageFeature): 62 | distWithQuery = lambda index: np.linalg.norm(self.__imageFeatures[index] - imageFeature[0]) 63 | refinedCandidates = min(candidates, key=distWithQuery) 64 | return [refinedCandidates] 65 | 66 | 67 | if __name__ == '__main__': 68 | pass 69 | -------------------------------------------------------------------------------- /src/Performance.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | # coding = 'utf-8' 4 | import sys 5 | sys.path.append('d:/dev/unicorn/') 6 | from src import Application 7 | import FilesMaker 8 | import DbBuild 9 | from pathlib import PurePath 10 | 11 | 12 | def fakeIlistFileEx(num,name,_): 13 | for i in range(num): 14 | yield name 15 | 16 | 17 | class PerformanceEvaluater(object): 18 | def __init__(self,config): 19 | self.__actual = None 20 | self.__expected = None 21 | self.__config = config 22 | 23 | 24 | def evaluate(self): 25 | self.__getActual() 26 | self.__loadExpected() 27 | isOK, reason = self.__compare() 28 | print(reason if not isOK else 'ok') 29 | 30 | 31 | def __getActual(self): 32 | imageFinder = Application.ImageFinder(self.__config) 33 | print('Retrieval Start!') 34 | self.__actual ={} 35 | with open(self.__config['imagePathAlias'], 'r') as file: 36 | #WARNING!!!imageFinder.find()目前只能返回1个结果 37 | count = 0 38 | for imagePath, alias in (line.strip().split(' ') for line in file): 39 | self.__actual[alias] = imageFinder.find(imagePath)[0] 40 | count += 1 41 | import time 42 | time.sleep(0.0001) 43 | if count%100 == 0: 44 | print(f'Processing Up To {count} Images', end='\r') 45 | print(f'Processed {count} Images Totally') 46 | 47 | #self.__actual = {alias:imageFinder.find(imagePath)[0] for imagePath,alias in (line.strip().split(' ') for line in file)} 48 | 49 | 50 | def __loadExpected(self): 51 | with open(self.__config['groundTruth'], 'r') as file: 52 | self.__expected = dict(line.strip().split(' ') for line in file) 53 | 54 | 55 | def __compare(self): 56 | print('Compute Accuracy Start!') 57 | if set(self.__actual.keys()) != set(self.__expected.keys()): 58 | return False,'keyNum is wrong!' 59 | # wrong = 0 60 | # for (k,v) in self.__actual.items(): 61 | # wrong += 1 if self.__expected[k] != v else 0 62 | # print(1-wrong/len(self.__expected)) 63 | print('Compute Accuracy Completed ,is',len(set(self.__actual.items()) & set(self.__expected.items())) / len(self.__expected)) 64 | 65 | return True,'OK' 66 | 67 | from time import clock 68 | if __name__ == '__main__': 69 | config = { 70 | #15张图 71 | #'imageRoot': r'D:\dev\unicorn\test\data\__NImage\image', 72 | #10k张图 73 | #'imageRoot': r'D:\1000000\image\images0\images\0', 74 | #100k张图 75 | 'imageRoot': r'D:\1000000\image\images0\images', 76 | 77 | 'targets': [(r'D:\dev\unicorn\test\data\__NImage\feature\imagePathLabels.txt', ['path', 'label']), 78 | (r'D:\dev\unicorn\test\data\__NImage\feature\imagePathAlias.txt', ['path', 'alias']), 79 | (r'D:\dev\unicorn\test\data\__NImage\feature\aliasLabel.txt', ['alias', 'label'])], 80 | 'db': { 81 | 'hashFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\1.bin', 82 | 'imageFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\2.bin', 83 | 'labelsFilename': r'D:\dev\unicorn\test\data\__NImage\feature\labels.txt', 84 | }, 85 | 86 | 'imagePathLabelsFilename': r'D:\dev\unicorn\test\data\__NImage\feature\imagePathLabels.txt', 87 | 'groundTruth': r'D:\dev\unicorn\test\data\__NImage\feature\aliasLabel.txt', 88 | 'imagePathAlias': r'D:\dev\unicorn\test\data\__NImage\feature\imagePathAlias.txt', 89 | # 'sample': [0, 10, 20] 90 | 'extracter': { 91 | 'num_classes': 11, 92 | 'checkpoint_path': 'D:/dev/unicorn/model/model.ckpt-42100' 93 | } 94 | 95 | } 96 | 97 | start = clock() 98 | # step1 99 | def _resolve2(path): 100 | _path = PurePath(path) 101 | label = '_'.join([_path.parts[-2], _path.stem]) 102 | imgAlias = '_'.join([label, _path.stem]) 103 | return FilesMaker.ResolveResult(label, path, imgAlias) 104 | FilesMaker.makeConfigFile(config, resolver=_resolve2, openFileAfterWrite=True) 105 | finish=clock() 106 | print (finish-start) 107 | 108 | # WARNING!:imageFinder对象被重复构建 109 | # step2 110 | builder = DbBuild.DbBuilder(config) 111 | builder.build() 112 | finish=clock() 113 | print (finish-start) 114 | 115 | # step3 116 | # 117 | # PE = PerformanceEvaluater(config) 118 | # PE.evaluate() 119 | # finish=clock() 120 | # print (f'Time Consumed {finish-start} s') -------------------------------------------------------------------------------- /src/Util/File.py: -------------------------------------------------------------------------------- 1 | # -- coding:utf-8 -- 2 | 3 | def writeLines(filename:str, content): 4 | with open(filename, 'w') as file: 5 | file.writelines((str(line) + '\n') for line in content) 6 | 7 | if __name__ == '__main__': 8 | pass -------------------------------------------------------------------------------- /src/Util/Path.py: -------------------------------------------------------------------------------- 1 | # -- coding:utf-8 -- 2 | 3 | import os 4 | 5 | # pip install pypiwin32 6 | import pythoncom 7 | from win32com.shell import shell 8 | 9 | def ilistFiles(dir): 10 | '''获取目录下的所有文件列表(generator) 11 | 12 | 参数 13 | dir : 目录名 14 | ''' 15 | 16 | _, _, files = next(os.walk(dir)) 17 | return (os.path.join(dir, file) for file in files) 18 | 19 | def listFiles(dir): 20 | '''获取目录下的所有文件列表(list) 21 | 22 | 参数 23 | dir : 目录名 24 | ''' 25 | 26 | _, _, files = next(os.walk(dir)) 27 | #print([os.path.join(dir, file) for file in files]) 28 | return [os.path.join(dir, file) for file in files] 29 | 30 | def _getShorcutRealPath(path): 31 | try: 32 | pythoncom.CoInitialize() 33 | shortcut = pythoncom.CoCreateInstance( 34 | shell.CLSID_ShellLink, 35 | None, 36 | pythoncom.CLSCTX_INPROC_SERVER, 37 | shell.IID_IShellLink) 38 | shortcut.QueryInterface(pythoncom.IID_IPersistFile).Load(path) 39 | realPath = shortcut.GetPath(shell.SLGP_SHORTPATH)[0] 40 | return realPath 41 | except Exception as err: 42 | return path 43 | 44 | def ilistFileEx(dir, recursive=True): 45 | '''获取目录下所有文件列表(generator) 46 | 47 | 参数 48 | dir: 目录名 49 | ''' 50 | for root, dirs, files in os.walk(dir): 51 | for file in files: 52 | # 不是.lnk文件,返回文件名 53 | if os.path.splitext(file)[1].lower() != '.lnk': 54 | yield os.path.normpath(os.path.join(root, file)) 55 | continue 56 | 57 | # 是.lnk文件,但不是快捷方式,返回文件名 58 | realName = _getShorcutRealPath(os.path.join(root, file)) 59 | if not realName: 60 | yield os.path.normpath(os.path.join(root, file)) 61 | continue 62 | 63 | # 快捷方式指向文件,返回真实文件名 64 | if not os.path.isdir(realName): 65 | yield os.path.normpath(realName) 66 | continue 67 | 68 | # 快捷方式指向目录,则遍历该目录 69 | yield from ilistFileEx(realName) 70 | 71 | for dir in dirs: 72 | yield from ilistFileEx(dir) 73 | 74 | 75 | if __name__ == '__main__': 76 | pass 77 | # i1 = [i for i in ilistFileEx(r'D:\dev\unicorn\test\data\__NImage\image')] 78 | # print(i1) -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | class Config(): 6 | instance = None 7 | 8 | def __init__(self): 9 | # self.currenpath = sys.path[0] 10 | 11 | # wu 12 | # self.currenpath = "/home/imc/caffe/poster/0406/tensorflow" 13 | self.currenpath = "e:/caffe_poster_0406_tensorflow" 14 | 15 | # self.library_path = "/home/imc/caffe/poster/0406/library" 16 | # wu 17 | # self.library_path = "/home/imc/models-master/inception/mydata/postertest1" 18 | self.library_path = "e:/models-master/inception/mydata/postertest1" 19 | 20 | self.hashfeature_path = self.currenpath + "/hashfeature.bin" 21 | self.feature_path = self.currenpath + "/feature.bin" 22 | self.groundtruth_path = self.currenpath + "/groundtruth.txt" 23 | self.FEATURE_LENGTH = 2048 24 | self.HASH_LENGTH = 128 25 | 26 | self.caffe_root = '/home/imc/caffe/' 27 | self.net_file = self.caffe_root + 'poster/models/bvlc_alexnet/deploy_hash.prototxt' 28 | self.caffe_model = self.caffe_root + 'poster/models/bvlc_alexnet/caffe_alexnet_train_iter_826.caffemodel' 29 | self.mean_file = self.caffe_root + 'poster/data/all/mean.npy' 30 | 31 | self.khash = 5 32 | 33 | def Getlibrary_path(self): 34 | return self.library_path 35 | 36 | def Getgroundtruth_path(self): 37 | return self.groundtruth_path 38 | 39 | def Gethashfeature_path(self): 40 | return self.hashfeature_path 41 | 42 | def Getfeature_path(self): 43 | return self.feature_path 44 | 45 | def GetFEATURE_LENGTH(self): 46 | return self.FEATURE_LENGTH 47 | 48 | def GetHASH_LENGTH(self): 49 | return self.HASH_LENGTH 50 | 51 | def Getcaffe_root(self): 52 | return self.caffe_root 53 | 54 | def Getnet_file(self): 55 | return self.net_file 56 | 57 | def Getcaffe_model(self): 58 | return self.caffe_model 59 | 60 | def Getmean_file(self): 61 | return self.mean_file 62 | 63 | def Getkhash(self): 64 | return self.khash 65 | 66 | 67 | a = Config(); 68 | 69 | -------------------------------------------------------------------------------- /src/inception/image_processing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Read and preprocess image data. 16 | 17 | Image processing occurs on a single image at a time. Image are read and 18 | preprocessed in pararllel across mulitple threads. The resulting images 19 | are concatenated together to form a single batch for training or evaluation. 20 | 21 | -- Provide processed image data for a network: 22 | inputs: Construct batches of evaluation examples of images. 23 | distorted_inputs: Construct batches of training examples of images. 24 | batch_inputs: Construct batches of training or evaluation examples of images. 25 | 26 | -- Data processing: 27 | parse_example_proto: Parses an Example proto containing a training example 28 | of an image. 29 | 30 | -- Image decoding: 31 | decode_jpeg: Decode a JPEG encoded string into a 3-D float32 Tensor. 32 | 33 | -- Image preprocessing: 34 | image_preprocessing: Decode and preprocess one image for evaluation or training 35 | distort_image: Distort one image for training a network. 36 | eval_image: Prepare one image for evaluation. 37 | distort_color: Distort the color in one image for training. 38 | """ 39 | from __future__ import absolute_import 40 | from __future__ import division 41 | from __future__ import print_function 42 | 43 | import tensorflow as tf 44 | 45 | FLAGS = tf.app.flags.FLAGS 46 | 47 | tf.app.flags.DEFINE_integer('batch_size', 32, 48 | """Number of images to process in a batch.""") 49 | tf.app.flags.DEFINE_integer('image_size', 299, 50 | """Provide square images of this size.""") 51 | tf.app.flags.DEFINE_integer('num_preprocess_threads', 4, 52 | """Number of preprocessing threads per tower. """ 53 | """Please make this a multiple of 4.""") 54 | tf.app.flags.DEFINE_integer('num_readers', 4, 55 | """Number of parallel readers during train.""") 56 | 57 | # Images are preprocessed asynchronously using multiple threads specifed by 58 | # --num_preprocss_threads and the resulting processed images are stored in a 59 | # random shuffling queue. The shuffling queue dequeues --batch_size images 60 | # for processing on a given Inception tower. A larger shuffling queue guarantees 61 | # better mixing across examples within a batch and results in slightly higher 62 | # predictive performance in a trained model. Empirically, 63 | # --input_queue_memory_factor=16 works well. A value of 16 implies a queue size 64 | # of 1024*16 images. Assuming RGB 299x299 images, this implies a queue size of 65 | # 16GB. If the machine is memory limited, then decrease this factor to 66 | # decrease the CPU memory footprint, accordingly. 67 | tf.app.flags.DEFINE_integer('input_queue_memory_factor', 16, 68 | """Size of the queue of preprocessed images. """ 69 | """Default is ideal but try smaller values, e.g. """ 70 | """4, 2 or 1, if host memory is constrained. See """ 71 | """comments in code for more details.""") 72 | 73 | 74 | def inputs(dataset, batch_size=None, num_preprocess_threads=None): 75 | """Generate batches of ImageNet images for evaluation. 76 | 77 | Use this function as the inputs for evaluating a network. 78 | 79 | Note that some (minimal) image preprocessing occurs during evaluation 80 | including central cropping and resizing of the image to fit the network. 81 | 82 | Args: 83 | dataset: instance of Dataset class specifying the dataset. 84 | batch_size: integer, number of examples in batch 85 | num_preprocess_threads: integer, total number of preprocessing threads but 86 | None defaults to FLAGS.num_preprocess_threads. 87 | 88 | Returns: 89 | images: Images. 4D tensor of size [batch_size, FLAGS.image_size, 90 | image_size, 3]. 91 | labels: 1-D integer Tensor of [FLAGS.batch_size]. 92 | """ 93 | if not batch_size: 94 | batch_size = FLAGS.batch_size 95 | 96 | # Force all input processing onto CPU in order to reserve the GPU for 97 | # the forward inference and back-propagation. 98 | with tf.device('/cpu:0'): 99 | images, labels = batch_inputs( 100 | dataset, batch_size, train=False, 101 | num_preprocess_threads=num_preprocess_threads, 102 | num_readers=1) 103 | 104 | return images, labels 105 | 106 | 107 | def distorted_inputs(dataset, batch_size=None, num_preprocess_threads=None): 108 | """Generate batches of distorted versions of ImageNet images. 109 | 110 | Use this function as the inputs for training a network. 111 | 112 | Distorting images provides a useful technique for augmenting the data 113 | set during training in order to make the network invariant to aspects 114 | of the image that do not effect the label. 115 | 116 | Args: 117 | dataset: instance of Dataset class specifying the dataset. 118 | batch_size: integer, number of examples in batch 119 | num_preprocess_threads: integer, total number of preprocessing threads but 120 | None defaults to FLAGS.num_preprocess_threads. 121 | 122 | Returns: 123 | images: Images. 4D tensor of size [batch_size, FLAGS.image_size, 124 | FLAGS.image_size, 3]. 125 | labels: 1-D integer Tensor of [batch_size]. 126 | """ 127 | if not batch_size: 128 | batch_size = FLAGS.batch_size 129 | 130 | # Force all input processing onto CPU in order to reserve the GPU for 131 | # the forward inference and back-propagation. 132 | with tf.device('/cpu:0'): 133 | images, labels = batch_inputs( 134 | dataset, batch_size, train=True, 135 | num_preprocess_threads=num_preprocess_threads, 136 | num_readers=FLAGS.num_readers) 137 | return images, labels 138 | 139 | 140 | def decode_jpeg(image_buffer, scope=None): 141 | """Decode a JPEG string into one 3-D float image Tensor. 142 | 143 | Args: 144 | image_buffer: scalar string Tensor. 145 | scope: Optional scope for op_scope. 146 | Returns: 147 | 3-D float Tensor with values ranging from [0, 1). 148 | """ 149 | with tf.op_scope([image_buffer], scope, 'decode_jpeg'): 150 | # Decode the string as an RGB JPEG. 151 | # Note that the resulting image contains an unknown height and width 152 | # that is set dynamically by decode_jpeg. In other words, the height 153 | # and width of image is unknown at compile-time. 154 | image = tf.image.decode_jpeg(image_buffer, channels=3) 155 | 156 | # After this point, all image pixels reside in [0,1) 157 | # until the very end, when they're rescaled to (-1, 1). The various 158 | # adjust_* ops all require this range for dtype float. 159 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 160 | return image 161 | 162 | 163 | def distort_color(image, thread_id=0, scope=None): 164 | """Distort the color of the image. 165 | 166 | Each color distortion is non-commutative and thus ordering of the color ops 167 | matters. Ideally we would randomly permute the ordering of the color ops. 168 | Rather then adding that level of complication, we select a distinct ordering 169 | of color ops for each preprocessing thread. 170 | 171 | Args: 172 | image: Tensor containing single image. 173 | thread_id: preprocessing thread ID. 174 | scope: Optional scope for op_scope. 175 | Returns: 176 | color-distorted image 177 | """ 178 | with tf.op_scope([image], scope, 'distort_color'): 179 | color_ordering = thread_id % 2 180 | 181 | if color_ordering == 0: 182 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 183 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 184 | image = tf.image.random_hue(image, max_delta=0.2) 185 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 186 | elif color_ordering == 1: 187 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 188 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 189 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 190 | image = tf.image.random_hue(image, max_delta=0.2) 191 | 192 | # The random_* ops do not necessarily clamp. 193 | image = tf.clip_by_value(image, 0.0, 1.0) 194 | return image 195 | 196 | 197 | def distort_image(image, height, width, bbox, thread_id=0, scope=None): 198 | """Distort one image for training a network. 199 | 200 | Distorting images provides a useful technique for augmenting the data 201 | set during training in order to make the network invariant to aspects 202 | of the image that do not effect the label. 203 | 204 | Args: 205 | image: 3-D float Tensor of image 206 | height: integer 207 | width: integer 208 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 209 | where each coordinate is [0, 1) and the coordinates are arranged 210 | as [ymin, xmin, ymax, xmax]. 211 | thread_id: integer indicating the preprocessing thread. 212 | scope: Optional scope for op_scope. 213 | Returns: 214 | 3-D float Tensor of distorted image used for training. 215 | """ 216 | with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): 217 | # Each bounding box has shape [1, num_boxes, box coords] and 218 | # the coordinates are ordered [ymin, xmin, ymax, xmax]. 219 | 220 | # Display the bounding box in the first thread only. 221 | if not thread_id: 222 | image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), 223 | bbox) 224 | tf.image_summary('image_with_bounding_boxes', image_with_box) 225 | 226 | # A large fraction of image datasets contain a human-annotated bounding 227 | # box delineating the region of the image containing the object of interest. 228 | # We choose to create a new bounding box for the object which is a randomly 229 | # distorted version of the human-annotated bounding box that obeys an allowed 230 | # range of aspect ratios, sizes and overlap with the human-annotated 231 | # bounding box. If no box is supplied, then we assume the bounding box is 232 | # the entire image. 233 | sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( 234 | tf.shape(image), 235 | bounding_boxes=bbox, 236 | min_object_covered=0.1, 237 | aspect_ratio_range=[0.75, 1.33], 238 | area_range=[0.05, 1.0], 239 | max_attempts=100, 240 | use_image_if_no_bounding_boxes=True) 241 | bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box 242 | if not thread_id: 243 | image_with_distorted_box = tf.image.draw_bounding_boxes( 244 | tf.expand_dims(image, 0), distort_bbox) 245 | tf.image_summary('images_with_distorted_bounding_box', 246 | image_with_distorted_box) 247 | 248 | # Crop the image to the specified bounding box. 249 | distorted_image = tf.slice(image, bbox_begin, bbox_size) 250 | 251 | # This resizing operation may distort the images because the aspect 252 | # ratio is not respected. We select a resize method in a round robin 253 | # fashion based on the thread number. 254 | # Note that ResizeMethod contains 4 enumerated resizing methods. 255 | resize_method = thread_id % 4 256 | distorted_image = tf.image.resize_images(distorted_image, height, width, 257 | resize_method) 258 | # Restore the shape since the dynamic slice based upon the bbox_size loses 259 | # the third dimension. 260 | distorted_image.set_shape([height, width, 3]) 261 | if not thread_id: 262 | tf.image_summary('cropped_resized_image', 263 | tf.expand_dims(distorted_image, 0)) 264 | 265 | # Randomly flip the image horizontally. 266 | distorted_image = tf.image.random_flip_left_right(distorted_image) 267 | 268 | # Randomly distort the colors. 269 | distorted_image = distort_color(distorted_image, thread_id) 270 | 271 | if not thread_id: 272 | tf.image_summary('final_distorted_image', 273 | tf.expand_dims(distorted_image, 0)) 274 | return distorted_image 275 | 276 | 277 | def eval_image(image, height, width, scope=None): 278 | """Prepare one image for evaluation. 279 | 280 | Args: 281 | image: 3-D float Tensor 282 | height: integer 283 | width: integer 284 | scope: Optional scope for op_scope. 285 | Returns: 286 | 3-D float Tensor of prepared image. 287 | """ 288 | with tf.op_scope([image, height, width], scope, 'eval_image'): 289 | # Crop the central region of the image with an area containing 87.5% of 290 | # the original image. 291 | image = tf.image.central_crop(image, central_fraction=0.875) 292 | 293 | # Resize the image to the original height and width. 294 | image = tf.expand_dims(image, 0) 295 | image = tf.image.resize_bilinear(image, [height, width], 296 | align_corners=False) 297 | image = tf.squeeze(image, [0]) 298 | return image 299 | 300 | 301 | def image_preprocessing(image_buffer, bbox, train, thread_id=0): 302 | """Decode and preprocess one image for evaluation or training. 303 | 304 | Args: 305 | image_buffer: JPEG encoded string Tensor 306 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 307 | where each coordinate is [0, 1) and the coordinates are arranged as 308 | [ymin, xmin, ymax, xmax]. 309 | train: boolean 310 | thread_id: integer indicating preprocessing thread 311 | 312 | Returns: 313 | 3-D float Tensor containing an appropriately scaled image 314 | 315 | Raises: 316 | ValueError: if user does not provide bounding box 317 | """ 318 | if bbox is None: 319 | raise ValueError('Please supply a bounding box.') 320 | 321 | image = decode_jpeg(image_buffer) 322 | height = FLAGS.image_size 323 | width = FLAGS.image_size 324 | 325 | if train: 326 | image = distort_image(image, height, width, bbox, thread_id) 327 | else: 328 | image = eval_image(image, height, width) 329 | 330 | # Finally, rescale to [-1,1] instead of [0, 1) 331 | image = tf.sub(image, 0.5) 332 | image = tf.mul(image, 2.0) 333 | return image 334 | 335 | 336 | def parse_example_proto(example_serialized): 337 | """Parses an Example proto containing a training example of an image. 338 | 339 | The output of the build_image_data.py image preprocessing script is a dataset 340 | containing serialized Example protocol buffers. Each Example proto contains 341 | the following fields: 342 | 343 | image/height: 462 344 | image/width: 581 345 | image/colorspace: 'RGB' 346 | image/channels: 3 347 | image/class/label: 615 348 | image/class/synset: 'n03623198' 349 | image/class/text: 'knee pad' 350 | image/object/bbox/xmin: 0.1 351 | image/object/bbox/xmax: 0.9 352 | image/object/bbox/ymin: 0.2 353 | image/object/bbox/ymax: 0.6 354 | image/object/bbox/label: 615 355 | image/format: 'JPEG' 356 | image/filename: 'ILSVRC2012_val_00041207.JPEG' 357 | image/encoded: 358 | 359 | Args: 360 | example_serialized: scalar Tensor tf.string containing a serialized 361 | Example protocol buffer. 362 | 363 | Returns: 364 | image_buffer: Tensor tf.string containing the contents of a JPEG file. 365 | label: Tensor tf.int32 containing the label. 366 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 367 | where each coordinate is [0, 1) and the coordinates are arranged as 368 | [ymin, xmin, ymax, xmax]. 369 | text: Tensor tf.string containing the human-readable label. 370 | """ 371 | # Dense features in Example proto. 372 | feature_map = { 373 | 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, 374 | default_value=''), 375 | 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, 376 | default_value=-1), 377 | 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, 378 | default_value=''), 379 | } 380 | sparse_float32 = tf.VarLenFeature(dtype=tf.float32) 381 | # Sparse features in Example proto. 382 | feature_map.update( 383 | {k: sparse_float32 for k in ['image/object/bbox/xmin', 384 | 'image/object/bbox/ymin', 385 | 'image/object/bbox/xmax', 386 | 'image/object/bbox/ymax']}) 387 | 388 | features = tf.parse_single_example(example_serialized, feature_map) 389 | label = tf.cast(features['image/class/label'], dtype=tf.int32) 390 | 391 | xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) 392 | ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) 393 | xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) 394 | ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) 395 | 396 | # Note that we impose an ordering of (y, x) just to make life difficult. 397 | bbox = tf.concat(0, [ymin, xmin, ymax, xmax]) 398 | 399 | # Force the variable number of bounding boxes into the shape 400 | # [1, num_boxes, coords]. 401 | bbox = tf.expand_dims(bbox, 0) 402 | bbox = tf.transpose(bbox, [0, 2, 1]) 403 | 404 | return features['image/encoded'], label, bbox, features['image/class/text'] 405 | 406 | 407 | def batch_inputs(dataset, batch_size, train, num_preprocess_threads=None, 408 | num_readers=1): 409 | """Contruct batches of training or evaluation examples from the image dataset. 410 | 411 | Args: 412 | dataset: instance of Dataset class specifying the dataset. 413 | See dataset.py for details. 414 | batch_size: integer 415 | train: boolean 416 | num_preprocess_threads: integer, total number of preprocessing threads 417 | num_readers: integer, number of parallel readers 418 | 419 | Returns: 420 | images: 4-D float Tensor of a batch of images 421 | labels: 1-D integer Tensor of [batch_size]. 422 | 423 | Raises: 424 | ValueError: if data is not found 425 | """ 426 | with tf.name_scope('batch_processing'): 427 | data_files = dataset.data_files() 428 | if data_files is None: 429 | raise ValueError('No data files found for this dataset') 430 | print(type(data_files)) 431 | # Create filename_queue 432 | if train: 433 | filename_queue = tf.train.string_input_producer(data_files, 434 | shuffle=True, 435 | capacity=16) 436 | else: 437 | filename_queue = tf.train.string_input_producer(data_files, 438 | shuffle=False, 439 | capacity=1) 440 | if num_preprocess_threads is None: 441 | num_preprocess_threads = FLAGS.num_preprocess_threads 442 | 443 | if num_preprocess_threads % 4: 444 | raise ValueError('Please make num_preprocess_threads a multiple ' 445 | 'of 4 (%d % 4 != 0).', num_preprocess_threads) 446 | 447 | if num_readers is None: 448 | num_readers = FLAGS.num_readers 449 | 450 | if num_readers < 1: 451 | raise ValueError('Please make num_readers at least 1') 452 | 453 | # Approximate number of examples per shard. 454 | examples_per_shard = 1024 455 | # Size the random shuffle queue to balance between good global 456 | # mixing (more examples) and memory use (fewer examples). 457 | # 1 image uses 299*299*3*4 bytes = 1MB 458 | # The default input_queue_memory_factor is 16 implying a shuffling queue 459 | # size: examples_per_shard * 16 * 1MB = 17.6GB 460 | min_queue_examples = examples_per_shard * FLAGS.input_queue_memory_factor 461 | if train: 462 | examples_queue = tf.RandomShuffleQueue( 463 | capacity=min_queue_examples + 3 * batch_size, 464 | min_after_dequeue=min_queue_examples, 465 | dtypes=[tf.string]) 466 | else: 467 | examples_queue = tf.FIFOQueue( 468 | capacity=examples_per_shard + 3 * batch_size, 469 | dtypes=[tf.string]) 470 | 471 | # Create multiple readers to populate the queue of examples. 472 | if num_readers > 1: 473 | enqueue_ops = [] 474 | for _ in range(num_readers): 475 | reader = dataset.reader() 476 | _, value = reader.read(filename_queue) 477 | enqueue_ops.append(examples_queue.enqueue([value])) 478 | 479 | tf.train.queue_runner.add_queue_runner( 480 | tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops)) 481 | example_serialized = examples_queue.dequeue() 482 | else: 483 | reader = dataset.reader() 484 | _, example_serialized = reader.read(filename_queue) 485 | 486 | images_and_labels = [] 487 | for thread_id in range(num_preprocess_threads): 488 | # Parse a serialized Example proto to extract the image and metadata. 489 | image_buffer, label_index, bbox, _ = parse_example_proto( 490 | example_serialized) 491 | image = image_preprocessing(image_buffer, bbox, train, thread_id) 492 | images_and_labels.append([image, label_index]) 493 | 494 | images, label_index_batch = tf.train.batch_join( 495 | images_and_labels, 496 | batch_size=batch_size, 497 | capacity=2 * num_preprocess_threads * batch_size) 498 | 499 | # Reshape images into these desired dimensions. 500 | height = FLAGS.image_size 501 | width = FLAGS.image_size 502 | depth = 3 503 | 504 | images = tf.cast(images, tf.float32) 505 | images = tf.reshape(images, shape=[batch_size, height, width, depth]) 506 | 507 | # Display the training images in the visualizer. 508 | tf.image_summary('images', images) 509 | 510 | return images, tf.reshape(label_index_batch, [batch_size]) 511 | 512 | 513 | 514 | -------------------------------------------------------------------------------- /src/inception/inception_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Inception-v3 expressed in TensorFlow-Slim. 16 | 17 | Usage: 18 | 19 | # Parameters for BatchNorm. 20 | batch_norm_params = { 21 | # Decay for the batch_norm moving averages. 22 | 'decay': BATCHNORM_MOVING_AVERAGE_DECAY, 23 | # epsilon to prevent 0s in variance. 24 | 'epsilon': 0.001, 25 | } 26 | # Set weight_decay for weights in Conv and FC layers. 27 | with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], weight_decay=0.00004): 28 | with slim.arg_scope([slim.ops.conv2d], 29 | stddev=0.1, 30 | activation=tf.nn.relu, 31 | batch_norm_params=batch_norm_params): 32 | # Force all Variables to reside on the CPU. 33 | with slim.arg_scope([slim.variables.variable], device='/cpu:0'): 34 | logits, endpoints = slim.inception.inception_v3( 35 | images, 36 | dropout_keep_prob=0.8, 37 | num_classes=num_classes, 38 | is_training=for_training, 39 | restore_logits=restore_logits, 40 | scope=scope) 41 | """ 42 | from __future__ import absolute_import 43 | from __future__ import division 44 | from __future__ import print_function 45 | 46 | import tensorflow as tf 47 | 48 | from src.inception import ops 49 | from src.inception import scopes 50 | 51 | 52 | def inception_v3(inputs, 53 | dropout_keep_prob=0.8, 54 | num_classes=1000, 55 | is_training=True, 56 | restore_logits=True, 57 | scope=''): 58 | """Latest Inception from http://arxiv.org/abs/1512.00567. 59 | 60 | "Rethinking the Inception Architecture for Computer Vision" 61 | 62 | Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, 63 | Zbigniew Wojna 64 | 65 | Args: 66 | inputs: a tensor of size [batch_size, height, width, channels]. 67 | dropout_keep_prob: dropout keep_prob. 68 | num_classes: number of predicted classes. 69 | is_training: whether is training or not. 70 | restore_logits: whether or not the logits layers should be restored. 71 | Useful for fine-tuning a model with different num_classes. 72 | scope: Optional scope for op_scope. 73 | 74 | Returns: 75 | a list containing 'logits', 'aux_logits' Tensors. 76 | """ 77 | # end_points will collect relevant activations for external use, for example 78 | # summaries or losses. 79 | end_points = {} 80 | with tf.op_scope([inputs], scope, 'inception_v3'): 81 | with scopes.arg_scope([ops.conv2d, ops.fc, ops.batch_norm, ops.dropout], 82 | is_training=is_training): 83 | with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], 84 | stride=1, padding='VALID'): 85 | # 299 x 299 x 3 86 | end_points['conv0'] = ops.conv2d(inputs, 32, [3, 3], stride=2, 87 | scope='conv0') 88 | # 149 x 149 x 32 89 | end_points['conv1'] = ops.conv2d(end_points['conv0'], 32, [3, 3], 90 | scope='conv1') 91 | # 147 x 147 x 32 92 | end_points['conv2'] = ops.conv2d(end_points['conv1'], 64, [3, 3], 93 | padding='SAME', scope='conv2') 94 | # 147 x 147 x 64 95 | end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3], 96 | stride=2, scope='pool1') 97 | # 73 x 73 x 64 98 | end_points['conv3'] = ops.conv2d(end_points['pool1'], 80, [1, 1], 99 | scope='conv3') 100 | # 73 x 73 x 80. 101 | end_points['conv4'] = ops.conv2d(end_points['conv3'], 192, [3, 3], 102 | scope='conv4') 103 | # 71 x 71 x 192. 104 | end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3], 105 | stride=2, scope='pool2') 106 | # 35 x 35 x 192. 107 | net = end_points['pool2'] 108 | # Inception blocks 109 | with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], 110 | stride=1, padding='SAME'): 111 | # mixed: 35 x 35 x 256. 112 | with tf.variable_scope('mixed_35x35x256a'): 113 | with tf.variable_scope('branch1x1'): 114 | branch1x1 = ops.conv2d(net, 64, [1, 1]) 115 | with tf.variable_scope('branch5x5'): 116 | branch5x5 = ops.conv2d(net, 48, [1, 1]) 117 | branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) 118 | with tf.variable_scope('branch3x3dbl'): 119 | branch3x3dbl = ops.conv2d(net, 64, [1, 1]) 120 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) 121 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) 122 | with tf.variable_scope('branch_pool'): 123 | branch_pool = ops.avg_pool(net, [3, 3]) 124 | branch_pool = ops.conv2d(branch_pool, 32, [1, 1]) 125 | net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool],3) 126 | end_points['mixed_35x35x256a'] = net 127 | # mixed_1: 35 x 35 x 288. 128 | with tf.variable_scope('mixed_35x35x288a'): 129 | with tf.variable_scope('branch1x1'): 130 | branch1x1 = ops.conv2d(net, 64, [1, 1]) 131 | with tf.variable_scope('branch5x5'): 132 | branch5x5 = ops.conv2d(net, 48, [1, 1]) 133 | branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) 134 | with tf.variable_scope('branch3x3dbl'): 135 | branch3x3dbl = ops.conv2d(net, 64, [1, 1]) 136 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) 137 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) 138 | with tf.variable_scope('branch_pool'): 139 | branch_pool = ops.avg_pool(net, [3, 3]) 140 | branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) 141 | net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool],3) 142 | end_points['mixed_35x35x288a'] = net 143 | # mixed_2: 35 x 35 x 288. 144 | with tf.variable_scope('mixed_35x35x288b'): 145 | with tf.variable_scope('branch1x1'): 146 | branch1x1 = ops.conv2d(net, 64, [1, 1]) 147 | with tf.variable_scope('branch5x5'): 148 | branch5x5 = ops.conv2d(net, 48, [1, 1]) 149 | branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) 150 | with tf.variable_scope('branch3x3dbl'): 151 | branch3x3dbl = ops.conv2d(net, 64, [1, 1]) 152 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) 153 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) 154 | with tf.variable_scope('branch_pool'): 155 | branch_pool = ops.avg_pool(net, [3, 3]) 156 | branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) 157 | net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool],3) 158 | end_points['mixed_35x35x288b'] = net 159 | # mixed_3: 17 x 17 x 768. 160 | with tf.variable_scope('mixed_17x17x768a'): 161 | with tf.variable_scope('branch3x3'): 162 | branch3x3 = ops.conv2d(net, 384, [3, 3], stride=2, padding='VALID') 163 | with tf.variable_scope('branch3x3dbl'): 164 | branch3x3dbl = ops.conv2d(net, 64, [1, 1]) 165 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) 166 | branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3], 167 | stride=2, padding='VALID') 168 | with tf.variable_scope('branch_pool'): 169 | branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') 170 | net = tf.concat( [branch3x3, branch3x3dbl, branch_pool],3) 171 | end_points['mixed_17x17x768a'] = net 172 | # mixed4: 17 x 17 x 768. 173 | with tf.variable_scope('mixed_17x17x768b'): 174 | with tf.variable_scope('branch1x1'): 175 | branch1x1 = ops.conv2d(net, 192, [1, 1]) 176 | with tf.variable_scope('branch7x7'): 177 | branch7x7 = ops.conv2d(net, 128, [1, 1]) 178 | branch7x7 = ops.conv2d(branch7x7, 128, [1, 7]) 179 | branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) 180 | with tf.variable_scope('branch7x7dbl'): 181 | branch7x7dbl = ops.conv2d(net, 128, [1, 1]) 182 | branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) 183 | branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7]) 184 | branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) 185 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) 186 | with tf.variable_scope('branch_pool'): 187 | branch_pool = ops.avg_pool(net, [3, 3]) 188 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) 189 | net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool],3) 190 | end_points['mixed_17x17x768b'] = net 191 | # mixed_5: 17 x 17 x 768. 192 | with tf.variable_scope('mixed_17x17x768c'): 193 | with tf.variable_scope('branch1x1'): 194 | branch1x1 = ops.conv2d(net, 192, [1, 1]) 195 | with tf.variable_scope('branch7x7'): 196 | branch7x7 = ops.conv2d(net, 160, [1, 1]) 197 | branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) 198 | branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) 199 | with tf.variable_scope('branch7x7dbl'): 200 | branch7x7dbl = ops.conv2d(net, 160, [1, 1]) 201 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) 202 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) 203 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) 204 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) 205 | with tf.variable_scope('branch_pool'): 206 | branch_pool = ops.avg_pool(net, [3, 3]) 207 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) 208 | net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool],3) 209 | end_points['mixed_17x17x768c'] = net 210 | # mixed_6: 17 x 17 x 768. 211 | with tf.variable_scope('mixed_17x17x768d'): 212 | with tf.variable_scope('branch1x1'): 213 | branch1x1 = ops.conv2d(net, 192, [1, 1]) 214 | with tf.variable_scope('branch7x7'): 215 | branch7x7 = ops.conv2d(net, 160, [1, 1]) 216 | branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) 217 | branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) 218 | with tf.variable_scope('branch7x7dbl'): 219 | branch7x7dbl = ops.conv2d(net, 160, [1, 1]) 220 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) 221 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) 222 | branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) 223 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) 224 | with tf.variable_scope('branch_pool'): 225 | branch_pool = ops.avg_pool(net, [3, 3]) 226 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) 227 | net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool],3 ) 228 | end_points['mixed_17x17x768d'] = net 229 | # mixed_7: 17 x 17 x 768. 230 | with tf.variable_scope('mixed_17x17x768e'): 231 | with tf.variable_scope('branch1x1'): 232 | branch1x1 = ops.conv2d(net, 192, [1, 1]) 233 | with tf.variable_scope('branch7x7'): 234 | branch7x7 = ops.conv2d(net, 192, [1, 1]) 235 | branch7x7 = ops.conv2d(branch7x7, 192, [1, 7]) 236 | branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) 237 | with tf.variable_scope('branch7x7dbl'): 238 | branch7x7dbl = ops.conv2d(net, 192, [1, 1]) 239 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) 240 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) 241 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) 242 | branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) 243 | with tf.variable_scope('branch_pool'): 244 | branch_pool = ops.avg_pool(net, [3, 3]) 245 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) 246 | net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) 247 | end_points['mixed_17x17x768e'] = net 248 | # Auxiliary Head logits 249 | aux_logits = tf.identity(end_points['mixed_17x17x768e']) 250 | with tf.variable_scope('aux_logits'): 251 | aux_logits = ops.avg_pool(aux_logits, [5, 5], stride=3, 252 | padding='VALID') 253 | aux_logits = ops.conv2d(aux_logits, 128, [1, 1], scope='proj') 254 | # Shape of feature map before the final layer. 255 | shape = aux_logits.get_shape() 256 | aux_logits = ops.conv2d(aux_logits, 768, shape[1:3], stddev=0.01, 257 | padding='VALID') 258 | aux_logits = ops.flatten(aux_logits) 259 | aux_logits = ops.fc(aux_logits, num_classes, activation=None, 260 | stddev=0.001, restore=restore_logits) 261 | end_points['aux_logits'] = aux_logits 262 | # mixed_8: 8 x 8 x 1280. 263 | # Note that the scope below is not changed to not void previous 264 | # checkpoints. 265 | # (TODO) Fix the scope when appropriate. 266 | with tf.variable_scope('mixed_17x17x1280a'): 267 | with tf.variable_scope('branch3x3'): 268 | branch3x3 = ops.conv2d(net, 192, [1, 1]) 269 | branch3x3 = ops.conv2d(branch3x3, 320, [3, 3], stride=2, 270 | padding='VALID') 271 | with tf.variable_scope('branch7x7x3'): 272 | branch7x7x3 = ops.conv2d(net, 192, [1, 1]) 273 | branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7]) 274 | branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1]) 275 | branch7x7x3 = ops.conv2d(branch7x7x3, 192, [3, 3], 276 | stride=2, padding='VALID') 277 | with tf.variable_scope('branch_pool'): 278 | branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') 279 | net = tf.concat( [branch3x3, branch7x7x3, branch_pool], 3) 280 | end_points['mixed_17x17x1280a'] = net 281 | # mixed_9: 8 x 8 x 2048. 282 | with tf.variable_scope('mixed_8x8x2048a'): 283 | with tf.variable_scope('branch1x1'): 284 | branch1x1 = ops.conv2d(net, 320, [1, 1]) 285 | with tf.variable_scope('branch3x3'): 286 | branch3x3 = ops.conv2d(net, 384, [1, 1]) 287 | branch3x3 = tf.concat( [ops.conv2d(branch3x3, 384, [1, 3]), 288 | ops.conv2d(branch3x3, 384, [3, 1])], 3) 289 | with tf.variable_scope('branch3x3dbl'): 290 | branch3x3dbl = ops.conv2d(net, 448, [1, 1]) 291 | branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) 292 | branch3x3dbl = tf.concat([ops.conv2d(branch3x3dbl, 384, [1, 3]), 293 | ops.conv2d(branch3x3dbl, 384, [3, 1])], 3) 294 | with tf.variable_scope('branch_pool'): 295 | branch_pool = ops.avg_pool(net, [3, 3]) 296 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) 297 | net = tf.concat( [branch1x1, branch3x3, branch3x3dbl, branch_pool],3) 298 | end_points['mixed_8x8x2048a'] = net 299 | # mixed_10: 8 x 8 x 2048. 300 | with tf.variable_scope('mixed_8x8x2048b'): 301 | with tf.variable_scope('branch1x1'): 302 | branch1x1 = ops.conv2d(net, 320, [1, 1]) 303 | with tf.variable_scope('branch3x3'): 304 | branch3x3 = ops.conv2d(net, 384, [1, 1]) 305 | branch3x3 = tf.concat( [ops.conv2d(branch3x3, 384, [1, 3]), 306 | ops.conv2d(branch3x3, 384, [3, 1])],3) 307 | with tf.variable_scope('branch3x3dbl'): 308 | branch3x3dbl = ops.conv2d(net, 448, [1, 1]) 309 | branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) 310 | branch3x3dbl = tf.concat( [ops.conv2d(branch3x3dbl, 384, [1, 3]), 311 | ops.conv2d(branch3x3dbl, 384, [3, 1])],3) 312 | with tf.variable_scope('branch_pool'): 313 | branch_pool = ops.avg_pool(net, [3, 3]) 314 | branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) 315 | net = tf.concat( [branch1x1, branch3x3, branch3x3dbl, branch_pool],3) 316 | end_points['mixed_8x8x2048b'] = net 317 | # Final pooling and prediction 318 | with tf.variable_scope('logits'): 319 | shape = net.get_shape() 320 | net = ops.avg_pool(net, shape[1:3], padding='VALID', scope='pool') 321 | # 1 x 1 x 2048 322 | net = ops.dropout(net, dropout_keep_prob, scope='dropout') 323 | net = ops.flatten(net, scope='flatten') 324 | highfeatures = net 325 | # 1 x 1 x 128 326 | net = ops.fc(net, 128, activation=tf.nn.sigmoid, restore=restore_logits) 327 | # 2048 328 | logits = ops.fc(net, num_classes, activation=None, scope='logits', 329 | restore=restore_logits) 330 | # 1000 331 | end_points['logits'] = logits 332 | end_points['predictions'] = tf.nn.softmax(logits, name='predictions') 333 | return net , highfeatures ,end_points 334 | 335 | def inception_v3_parameters(weight_decay=0.00004, stddev=0.1, 336 | batch_norm_decay=0.9997, batch_norm_epsilon=0.001): 337 | """Yields the scope with the default parameters for inception_v3. 338 | 339 | Args: 340 | weight_decay: the weight decay for weights variables. 341 | stddev: standard deviation of the truncated guassian weight distribution. 342 | batch_norm_decay: decay for the moving average of batch_norm momentums. 343 | batch_norm_epsilon: small float added to variance to avoid dividing by zero. 344 | 345 | Yields: 346 | a arg_scope with the parameters needed for inception_v3. 347 | """ 348 | # Set weight_decay for weights in Conv and FC layers. 349 | with scopes.arg_scope([ops.conv2d, ops.fc], 350 | weight_decay=weight_decay): 351 | # Set stddev, activation and parameters for batch_norm. 352 | with scopes.arg_scope([ops.conv2d], 353 | stddev=stddev, 354 | activation=tf.nn.relu, 355 | batch_norm_params={ 356 | 'decay': batch_norm_decay, 357 | 'epsilon': batch_norm_epsilon}) as arg_scope: 358 | yield arg_scope 359 | -------------------------------------------------------------------------------- /src/inception/inception_module.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Build the Inception v3 network on ImageNet data set. 16 | 17 | The Inception v3 architecture is described in http://arxiv.org/abs/1512.00567 18 | 19 | Summary of available functions: 20 | inference: Compute inference on the model inputs to make a prediction 21 | loss: Compute the loss of the prediction with respect to the labels 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import re 28 | 29 | import tensorflow as tf 30 | 31 | from src.inception import slim 32 | 33 | FLAGS = tf.app.flags.FLAGS 34 | 35 | # If a model is trained using multiple GPUs, prefix all Op names with tower_name 36 | # to differentiate the operations. Note that this prefix is removed from the 37 | # names of the summaries when visualizing a model. 38 | TOWER_NAME = 'tower' 39 | 40 | # Batch normalization. Constant governing the exponential moving average of 41 | # the 'global' mean and variance for all activations. 42 | BATCHNORM_MOVING_AVERAGE_DECAY = 0.9997 43 | 44 | # The decay to use for the moving average. 45 | MOVING_AVERAGE_DECAY = 0.9999 46 | 47 | 48 | def inference(images, num_classes, for_training=False, restore_logits=True, 49 | scope=None): 50 | """Build Inception v3 model architecture. 51 | 52 | See here for reference: http://arxiv.org/abs/1512.00567 53 | 54 | Args: 55 | images: Images returned from inputs() or distorted_inputs(). 56 | num_classes: number of classes 57 | for_training: If set to `True`, build the inference model for training. 58 | Kernels that operate differently for inference during training 59 | e.g. dropout, are appropriately configured. 60 | restore_logits: whether or not the logits layers should be restored. 61 | Useful for fine-tuning a model with different num_classes. 62 | scope: optional prefix string identifying the ImageNet tower. 63 | 64 | Returns: 65 | Logits. 2-D float Tensor. 66 | Auxiliary Logits. 2-D float Tensor of side-head. Used for training only. 67 | """ 68 | # Parameters for BatchNorm. 69 | batch_norm_params = { 70 | # Decay for the moving averages. 71 | 'decay': BATCHNORM_MOVING_AVERAGE_DECAY, 72 | # epsilon to prevent 0s in variance. 73 | 'epsilon': 0.001, 74 | } 75 | # Set weight_decay for weights in Conv and FC layers. 76 | with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], weight_decay=0.00004): 77 | with slim.arg_scope([slim.ops.conv2d], 78 | stddev=0.1, 79 | activation=tf.nn.relu, 80 | batch_norm_params=batch_norm_params): 81 | logits,highfeatures , endpoints = slim.inception.inception_v3( 82 | images, 83 | dropout_keep_prob=0.8, 84 | num_classes=num_classes, 85 | is_training=for_training, 86 | restore_logits=restore_logits, 87 | scope=scope) 88 | 89 | # Add summaries for viewing model statistics on TensorBoard. 90 | _activation_summaries(endpoints) 91 | 92 | # Grab the logits associated with the side head. Employed during training. 93 | auxiliary_logits = endpoints['aux_logits'] 94 | 95 | return logits, highfeatures ,auxiliary_logits 96 | 97 | 98 | def loss(logits, labels, batch_size=None): 99 | """Adds all losses for the model. 100 | 101 | Note the final loss is not returned. Instead, the list of losses are collected 102 | by slim.losses. The losses are accumulated in tower_loss() and summed to 103 | calculate the total loss. 104 | 105 | Args: 106 | logits: List of logits from inference(). Each entry is a 2-D float Tensor. 107 | labels: Labels from distorted_inputs or inputs(). 1-D tensor 108 | of shape [batch_size] 109 | batch_size: integer 110 | """ 111 | if not batch_size: 112 | batch_size = FLAGS.batch_size 113 | 114 | # Reshape the labels into a dense Tensor of 115 | # shape [FLAGS.batch_size, num_classes]. 116 | sparse_labels = tf.reshape(labels, [batch_size, 1]) 117 | indices = tf.reshape(tf.range(batch_size), [batch_size, 1]) 118 | concated = tf.concat(1, [indices, sparse_labels]) 119 | num_classes = logits[0].get_shape()[-1].value 120 | dense_labels = tf.sparse_to_dense(concated, 121 | [batch_size, num_classes], 122 | 1.0, 0.0) 123 | 124 | # Cross entropy loss for the main softmax prediction. 125 | slim.losses.cross_entropy_loss(logits[0], 126 | dense_labels, 127 | label_smoothing=0.1, 128 | weight=1.0) 129 | 130 | # Cross entropy loss for the auxiliary softmax head. 131 | slim.losses.cross_entropy_loss(logits[1], 132 | dense_labels, 133 | label_smoothing=0.1, 134 | weight=0.4, 135 | scope='aux_loss') 136 | 137 | 138 | def _activation_summary(x): 139 | """Helper to create summaries for activations. 140 | 141 | Creates a summary that provides a histogram of activations. 142 | Creates a summary that measure the sparsity of activations. 143 | 144 | Args: 145 | x: Tensor 146 | """ 147 | # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training 148 | # session. This helps the clarity of presentation on tensorboard. 149 | tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name) 150 | tf.summary.histogram(tensor_name + '/activations', x) 151 | tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x)) 152 | 153 | 154 | def _activation_summaries(endpoints): 155 | with tf.name_scope('summaries'): 156 | for act in endpoints.values(): 157 | _activation_summary(act) 158 | -------------------------------------------------------------------------------- /src/inception/losses.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains convenience wrappers for various Neural Network TensorFlow losses. 16 | 17 | All the losses defined here add themselves to the LOSSES_COLLECTION 18 | collection. 19 | 20 | l1_loss: Define a L1 Loss, useful for regularization, i.e. lasso. 21 | l2_loss: Define a L2 Loss, useful for regularization, i.e. weight decay. 22 | cross_entropy_loss: Define a cross entropy loss using 23 | softmax_cross_entropy_with_logits. Useful for classification. 24 | """ 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | import tensorflow as tf 30 | 31 | # In order to gather all losses in a network, the user should use this 32 | # key for get_collection, i.e: 33 | # losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) 34 | LOSSES_COLLECTION = '_losses' 35 | 36 | 37 | def l1_regularizer(weight=1.0, scope=None): 38 | """Define a L1 regularizer. 39 | 40 | Args: 41 | weight: scale the loss by this factor. 42 | scope: Optional scope for op_scope. 43 | 44 | Returns: 45 | a regularizer function. 46 | """ 47 | def regularizer(tensor): 48 | with tf.op_scope([tensor], scope, 'L1Regularizer'): 49 | l1_weight = tf.convert_to_tensor(weight, 50 | dtype=tensor.dtype.base_dtype, 51 | name='weight') 52 | return tf.multiply(l1_weight, tf.reduce_sum(tf.abs(tensor)), name='value') 53 | return regularizer 54 | 55 | 56 | def l2_regularizer(weight=1.0, scope=None): 57 | """Define a L2 regularizer. 58 | 59 | Args: 60 | weight: scale the loss by this factor. 61 | scope: Optional scope for op_scope. 62 | 63 | Returns: 64 | a regularizer function. 65 | """ 66 | def regularizer(tensor): 67 | with tf.op_scope([tensor], scope, 'L2Regularizer'): 68 | l2_weight = tf.convert_to_tensor(weight, 69 | dtype=tensor.dtype.base_dtype, 70 | name='weight') 71 | return tf.multiply(l2_weight, tf.nn.l2_loss(tensor), name='value') 72 | return regularizer 73 | 74 | 75 | def l1_l2_regularizer(weight_l1=1.0, weight_l2=1.0, scope=None): 76 | """Define a L1L2 regularizer. 77 | 78 | Args: 79 | weight_l1: scale the L1 loss by this factor. 80 | weight_l2: scale the L2 loss by this factor. 81 | scope: Optional scope for op_scope. 82 | 83 | Returns: 84 | a regularizer function. 85 | """ 86 | def regularizer(tensor): 87 | with tf.op_scope([tensor], scope, 'L1L2Regularizer'): 88 | weight_l1_t = tf.convert_to_tensor(weight_l1, 89 | dtype=tensor.dtype.base_dtype, 90 | name='weight_l1') 91 | weight_l2_t = tf.convert_to_tensor(weight_l2, 92 | dtype=tensor.dtype.base_dtype, 93 | name='weight_l2') 94 | reg_l1 =tf.multiply(weight_l1_t, tf.reduce_sum(tf.abs(tensor)), 95 | name='value_l1') 96 | reg_l2 = tf.multiply(weight_l2_t, tf.nn.l2_loss(tensor), 97 | name='value_l2') 98 | return tf.add(reg_l1, reg_l2, name='value') 99 | return regularizer 100 | 101 | 102 | def l1_loss(tensor, weight=1.0, scope=None): 103 | """Define a L1Loss, useful for regularize, i.e. lasso. 104 | 105 | Args: 106 | tensor: tensor to regularize. 107 | weight: scale the loss by this factor. 108 | scope: Optional scope for op_scope. 109 | 110 | Returns: 111 | the L1 loss op. 112 | """ 113 | with tf.op_scope([tensor], scope, 'L1Loss'): 114 | weight = tf.convert_to_tensor(weight, 115 | dtype=tensor.dtype.base_dtype, 116 | name='loss_weight') 117 | loss = tf.multiply(weight, tf.reduce_sum(tf.abs(tensor)), name='value') 118 | tf.add_to_collection(LOSSES_COLLECTION, loss) 119 | return loss 120 | 121 | 122 | def l2_loss(tensor, weight=1.0, scope=None): 123 | """Define a L2Loss, useful for regularize, i.e. weight decay. 124 | 125 | Args: 126 | tensor: tensor to regularize. 127 | weight: an optional weight to modulate the loss. 128 | scope: Optional scope for op_scope. 129 | 130 | Returns: 131 | the L2 loss op. 132 | """ 133 | with tf.op_scope([tensor], scope, 'L2Loss'): 134 | weight = tf.convert_to_tensor(weight, 135 | dtype=tensor.dtype.base_dtype, 136 | name='loss_weight') 137 | loss = tf.multiply(weight, tf.nn.l2_loss(tensor), name='value') 138 | tf.add_to_collection(LOSSES_COLLECTION, loss) 139 | return loss 140 | 141 | 142 | def cross_entropy_loss(logits, one_hot_labels, label_smoothing=0, 143 | weight=1.0, scope=None): 144 | """Define a Cross Entropy loss using softmax_cross_entropy_with_logits. 145 | 146 | It can scale the loss by weight factor, and smooth the labels. 147 | 148 | Args: 149 | logits: [batch_size, num_classes] logits outputs of the network . 150 | one_hot_labels: [batch_size, num_classes] target one_hot_encoded labels. 151 | label_smoothing: if greater than 0 then smooth the labels. 152 | weight: scale the loss by this factor. 153 | scope: Optional scope for op_scope. 154 | 155 | Returns: 156 | A tensor with the softmax_cross_entropy loss. 157 | """ 158 | logits.get_shape().assert_is_compatible_with(one_hot_labels.get_shape()) 159 | with tf.op_scope([logits, one_hot_labels], scope, 'CrossEntropyLoss'): 160 | num_classes = one_hot_labels.get_shape()[-1].value 161 | one_hot_labels = tf.cast(one_hot_labels, logits.dtype) 162 | if label_smoothing > 0: 163 | smooth_positives = 1.0 - label_smoothing 164 | smooth_negatives = label_smoothing / num_classes 165 | one_hot_labels = one_hot_labels * smooth_positives + smooth_negatives 166 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, 167 | one_hot_labels, 168 | name='xentropy') 169 | weight = tf.convert_to_tensor(weight, 170 | dtype=logits.dtype.base_dtype, 171 | name='loss_weight') 172 | loss = tf.multiply(weight, tf.reduce_mean(cross_entropy), name='value') 173 | tf.add_to_collection(LOSSES_COLLECTION, loss) 174 | return loss 175 | -------------------------------------------------------------------------------- /src/inception/ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains convenience wrappers for typical Neural Network TensorFlow layers. 16 | 17 | Additionally it maintains a collection with update_ops that need to be 18 | updated after the ops have been computed, for exmaple to update moving means 19 | and moving variances of batch_norm. 20 | 21 | Ops that have different behavior during training or eval have an is_training 22 | parameter. Additionally Ops that contain variables.variable have a trainable 23 | parameter, which control if the ops variables are trainable or not. 24 | """ 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | 30 | import tensorflow as tf 31 | 32 | from tensorflow.python.training import moving_averages 33 | 34 | from src.inception import losses 35 | from src.inception import scopes 36 | from src.inception import variables 37 | 38 | # Used to keep the update ops done by batch_norm. 39 | UPDATE_OPS_COLLECTION = '_update_ops_' 40 | 41 | 42 | @scopes.add_arg_scope 43 | def batch_norm(inputs, 44 | decay=0.999, 45 | center=True, 46 | scale=False, 47 | epsilon=0.001, 48 | moving_vars='moving_vars', 49 | activation=None, 50 | is_training=True, 51 | trainable=True, 52 | restore=True, 53 | scope=None, 54 | reuse=None): 55 | """Adds a Batch Normalization layer. 56 | 57 | Args: 58 | inputs: a tensor of size [batch_size, height, width, channels] 59 | or [batch_size, channels]. 60 | decay: decay for the moving average. 61 | center: If True, subtract beta. If False, beta is not created and ignored. 62 | scale: If True, multiply by gamma. If False, gamma is 63 | not used. When the next layer is linear (also e.g. ReLU), this can be 64 | disabled since the scaling can be done by the next layer. 65 | epsilon: small float added to variance to avoid dividing by zero. 66 | moving_vars: collection to store the moving_mean and moving_variance. 67 | activation: activation function. 68 | is_training: whether or not the model is in training mode. 69 | trainable: whether or not the variables should be trainable or not. 70 | restore: whether or not the variables should be marked for restore. 71 | scope: Optional scope for variable_op_scope. 72 | reuse: whether or not the layer and its variables should be reused. To be 73 | able to reuse the layer scope must be given. 74 | 75 | Returns: 76 | a tensor representing the output of the operation. 77 | 78 | """ 79 | inputs_shape = inputs.get_shape() 80 | with tf.variable_op_scope([inputs], scope, 'BatchNorm', reuse=reuse): 81 | axis = list(range(len(inputs_shape) - 1)) 82 | params_shape = inputs_shape[-1:] 83 | # Allocate parameters for the beta and gamma of the normalization. 84 | beta, gamma = None, None 85 | if center: 86 | beta = variables.variable('beta', 87 | params_shape, 88 | initializer=tf.zeros_initializer, 89 | trainable=trainable, 90 | restore=restore) 91 | if scale: 92 | gamma = variables.variable('gamma', 93 | params_shape, 94 | initializer=tf.ones_initializer, 95 | trainable=trainable, 96 | restore=restore) 97 | # Create moving_mean and moving_variance add them to 98 | # GraphKeys.MOVING_AVERAGE_VARIABLES collections. 99 | moving_collections = [moving_vars, tf.GraphKeys.MOVING_AVERAGE_VARIABLES] 100 | moving_mean = variables.variable('moving_mean', 101 | params_shape, 102 | initializer=tf.zeros_initializer, 103 | trainable=False, 104 | restore=restore, 105 | collections=moving_collections) 106 | moving_variance = variables.variable('moving_variance', 107 | params_shape, 108 | initializer=tf.ones_initializer, 109 | trainable=False, 110 | restore=restore, 111 | collections=moving_collections) 112 | if is_training: 113 | # Calculate the moments based on the individual batch. 114 | mean, variance = tf.nn.moments(inputs, axis) 115 | 116 | update_moving_mean = moving_averages.assign_moving_average( 117 | moving_mean, mean, decay) 118 | tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) 119 | update_moving_variance = moving_averages.assign_moving_average( 120 | moving_variance, variance, decay) 121 | tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) 122 | else: 123 | # Just use the moving_mean and moving_variance. 124 | mean = moving_mean 125 | variance = moving_variance 126 | # Normalize the activations. 127 | outputs = tf.nn.batch_normalization( 128 | inputs, mean, variance, beta, gamma, epsilon) 129 | outputs.set_shape(inputs.get_shape()) 130 | if activation: 131 | outputs = activation(outputs) 132 | return outputs 133 | 134 | 135 | def _two_element_tuple(int_or_tuple): 136 | """Converts `int_or_tuple` to height, width. 137 | 138 | Several of the functions that follow accept arguments as either 139 | a tuple of 2 integers or a single integer. A single integer 140 | indicates that the 2 values of the tuple are the same. 141 | 142 | This functions normalizes the input value by always returning a tuple. 143 | 144 | Args: 145 | int_or_tuple: A list of 2 ints, a single int or a tf.TensorShape. 146 | 147 | Returns: 148 | A tuple with 2 values. 149 | 150 | Raises: 151 | ValueError: If `int_or_tuple` it not well formed. 152 | """ 153 | if isinstance(int_or_tuple, (list, tuple)): 154 | if len(int_or_tuple) != 2: 155 | raise ValueError('Must be a list with 2 elements: %s' % int_or_tuple) 156 | return int(int_or_tuple[0]), int(int_or_tuple[1]) 157 | if isinstance(int_or_tuple, int): 158 | return int(int_or_tuple), int(int_or_tuple) 159 | if isinstance(int_or_tuple, tf.TensorShape): 160 | if len(int_or_tuple) == 2: 161 | return int_or_tuple[0], int_or_tuple[1] 162 | raise ValueError('Must be an int, a list with 2 elements or a TensorShape of ' 163 | 'length 2') 164 | 165 | 166 | @scopes.add_arg_scope 167 | def conv2d(inputs, 168 | num_filters_out, 169 | kernel_size, 170 | stride=1, 171 | padding='SAME', 172 | activation=tf.nn.relu, 173 | stddev=0.01, 174 | bias=0.0, 175 | weight_decay=0, 176 | batch_norm_params=None, 177 | is_training=True, 178 | trainable=True, 179 | restore=True, 180 | scope=None, 181 | reuse=None): 182 | """Adds a 2D convolution followed by an optional batch_norm layer. 183 | 184 | conv2d creates a variable called 'weights', representing the convolutional 185 | kernel, that is convolved with the input. If `batch_norm_params` is None, a 186 | second variable called 'biases' is added to the result of the convolution 187 | operation. 188 | 189 | Args: 190 | inputs: a tensor of size [batch_size, height, width, channels]. 191 | num_filters_out: the number of output filters. 192 | kernel_size: a list of length 2: [kernel_height, kernel_width] of 193 | of the filters. Can be an int if both values are the same. 194 | stride: a list of length 2: [stride_height, stride_width]. 195 | Can be an int if both strides are the same. Note that presently 196 | both strides must have the same value. 197 | padding: one of 'VALID' or 'SAME'. 198 | activation: activation function. 199 | stddev: standard deviation of the truncated guassian weight distribution. 200 | bias: the initial value of the biases. 201 | weight_decay: the weight decay. 202 | batch_norm_params: parameters for the batch_norm. If is None don't use it. 203 | is_training: whether or not the model is in training mode. 204 | trainable: whether or not the variables should be trainable or not. 205 | restore: whether or not the variables should be marked for restore. 206 | scope: Optional scope for variable_op_scope. 207 | reuse: whether or not the layer and its variables should be reused. To be 208 | able to reuse the layer scope must be given. 209 | Returns: 210 | a tensor representing the output of the operation. 211 | 212 | """ 213 | with tf.variable_op_scope([inputs], scope, 'Conv', reuse=reuse): 214 | kernel_h, kernel_w = _two_element_tuple(kernel_size) 215 | stride_h, stride_w = _two_element_tuple(stride) 216 | num_filters_in = inputs.get_shape()[-1] 217 | weights_shape = [kernel_h, kernel_w, 218 | num_filters_in, num_filters_out] 219 | weights_initializer = tf.truncated_normal_initializer(stddev=stddev) 220 | l2_regularizer = None 221 | if weight_decay and weight_decay > 0: 222 | l2_regularizer = losses.l2_regularizer(weight_decay) 223 | weights = variables.variable('weights', 224 | shape=weights_shape, 225 | initializer=weights_initializer, 226 | regularizer=l2_regularizer, 227 | trainable=trainable, 228 | restore=restore) 229 | conv = tf.nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1], 230 | padding=padding) 231 | if batch_norm_params is not None: 232 | with scopes.arg_scope([batch_norm], is_training=is_training, 233 | trainable=trainable, restore=restore): 234 | outputs = batch_norm(conv, **batch_norm_params) 235 | else: 236 | bias_shape = [num_filters_out,] 237 | bias_initializer = tf.constant_initializer(bias) 238 | biases = variables.variable('biases', 239 | shape=bias_shape, 240 | initializer=bias_initializer, 241 | trainable=trainable, 242 | restore=restore) 243 | outputs = tf.nn.bias_add(conv, biases) 244 | if activation: 245 | outputs = activation(outputs) 246 | return outputs 247 | 248 | 249 | @scopes.add_arg_scope 250 | def fc(inputs, 251 | num_units_out, 252 | activation=tf.nn.relu, 253 | stddev=0.01, 254 | bias=0.0, 255 | weight_decay=0, 256 | batch_norm_params=None, 257 | is_training=True, 258 | trainable=True, 259 | restore=True, 260 | scope=None, 261 | reuse=None): 262 | """Adds a fully connected layer followed by an optional batch_norm layer. 263 | 264 | FC creates a variable called 'weights', representing the fully connected 265 | weight matrix, that is multiplied by the input. If `batch_norm` is None, a 266 | second variable called 'biases' is added to the result of the initial 267 | vector-matrix multiplication. 268 | 269 | Args: 270 | inputs: a [B x N] tensor where B is the batch size and N is the number of 271 | input units in the layer. 272 | num_units_out: the number of output units in the layer. 273 | activation: activation function. 274 | stddev: the standard deviation for the weights. 275 | bias: the initial value of the biases. 276 | weight_decay: the weight decay. 277 | batch_norm_params: parameters for the batch_norm. If is None don't use it. 278 | is_training: whether or not the model is in training mode. 279 | trainable: whether or not the variables should be trainable or not. 280 | restore: whether or not the variables should be marked for restore. 281 | scope: Optional scope for variable_op_scope. 282 | reuse: whether or not the layer and its variables should be reused. To be 283 | able to reuse the layer scope must be given. 284 | 285 | Returns: 286 | the tensor variable representing the result of the series of operations. 287 | """ 288 | with tf.variable_op_scope([inputs], scope, 'FC', reuse=reuse): 289 | num_units_in = inputs.get_shape()[1] 290 | weights_shape = [num_units_in, num_units_out] 291 | weights_initializer = tf.truncated_normal_initializer(stddev=stddev) 292 | l2_regularizer = None 293 | if weight_decay and weight_decay > 0: 294 | l2_regularizer = losses.l2_regularizer(weight_decay) 295 | weights = variables.variable('weights', 296 | shape=weights_shape, 297 | initializer=weights_initializer, 298 | regularizer=l2_regularizer, 299 | trainable=trainable, 300 | restore=restore) 301 | if batch_norm_params is not None: 302 | outputs = tf.matmul(inputs, weights) 303 | with scopes.arg_scope([batch_norm], is_training=is_training, 304 | trainable=trainable, restore=restore): 305 | outputs = batch_norm(outputs, **batch_norm_params) 306 | else: 307 | bias_shape = [num_units_out,] 308 | bias_initializer = tf.constant_initializer(bias) 309 | biases = variables.variable('biases', 310 | shape=bias_shape, 311 | initializer=bias_initializer, 312 | trainable=trainable, 313 | restore=restore) 314 | outputs = tf.nn.xw_plus_b(inputs, weights, biases) 315 | if activation: 316 | outputs = activation(outputs) 317 | return outputs 318 | 319 | 320 | def one_hot_encoding(labels, num_classes, scope=None): 321 | """Transform numeric labels into onehot_labels. 322 | 323 | Args: 324 | labels: [batch_size] target labels. 325 | num_classes: total number of classes. 326 | scope: Optional scope for op_scope. 327 | Returns: 328 | one hot encoding of the labels. 329 | """ 330 | with tf.op_scope([labels], scope, 'OneHotEncoding'): 331 | batch_size = labels.get_shape()[0] 332 | indices = tf.expand_dims(tf.range(0, batch_size), 1) 333 | labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype) 334 | concated = tf.concat(1, [indices, labels]) 335 | onehot_labels = tf.sparse_to_dense( 336 | concated, tf.pack([batch_size, num_classes]), 1.0, 0.0) 337 | onehot_labels.set_shape([batch_size, num_classes]) 338 | return onehot_labels 339 | 340 | 341 | @scopes.add_arg_scope 342 | def max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None): 343 | """Adds a Max Pooling layer. 344 | 345 | It is assumed by the wrapper that the pooling is only done per image and not 346 | in depth or batch. 347 | 348 | Args: 349 | inputs: a tensor of size [batch_size, height, width, depth]. 350 | kernel_size: a list of length 2: [kernel_height, kernel_width] of the 351 | pooling kernel over which the op is computed. Can be an int if both 352 | values are the same. 353 | stride: a list of length 2: [stride_height, stride_width]. 354 | Can be an int if both strides are the same. Note that presently 355 | both strides must have the same value. 356 | padding: the padding method, either 'VALID' or 'SAME'. 357 | scope: Optional scope for op_scope. 358 | 359 | Returns: 360 | a tensor representing the results of the pooling operation. 361 | Raises: 362 | ValueError: if 'kernel_size' is not a 2-D list 363 | """ 364 | with tf.op_scope([inputs], scope, 'MaxPool'): 365 | kernel_h, kernel_w = _two_element_tuple(kernel_size) 366 | stride_h, stride_w = _two_element_tuple(stride) 367 | return tf.nn.max_pool(inputs, 368 | ksize=[1, kernel_h, kernel_w, 1], 369 | strides=[1, stride_h, stride_w, 1], 370 | padding=padding) 371 | 372 | 373 | @scopes.add_arg_scope 374 | def avg_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None): 375 | """Adds a Avg Pooling layer. 376 | 377 | It is assumed by the wrapper that the pooling is only done per image and not 378 | in depth or batch. 379 | 380 | Args: 381 | inputs: a tensor of size [batch_size, height, width, depth]. 382 | kernel_size: a list of length 2: [kernel_height, kernel_width] of the 383 | pooling kernel over which the op is computed. Can be an int if both 384 | values are the same. 385 | stride: a list of length 2: [stride_height, stride_width]. 386 | Can be an int if both strides are the same. Note that presently 387 | both strides must have the same value. 388 | padding: the padding method, either 'VALID' or 'SAME'. 389 | scope: Optional scope for op_scope. 390 | 391 | Returns: 392 | a tensor representing the results of the pooling operation. 393 | """ 394 | with tf.op_scope([inputs], scope, 'AvgPool'): 395 | kernel_h, kernel_w = _two_element_tuple(kernel_size) 396 | stride_h, stride_w = _two_element_tuple(stride) 397 | return tf.nn.avg_pool(inputs, 398 | ksize=[1, kernel_h, kernel_w, 1], 399 | strides=[1, stride_h, stride_w, 1], 400 | padding=padding) 401 | 402 | 403 | @scopes.add_arg_scope 404 | def dropout(inputs, keep_prob=0.5, is_training=True, scope=None): 405 | """Returns a dropout layer applied to the input. 406 | 407 | Args: 408 | inputs: the tensor to pass to the Dropout layer. 409 | keep_prob: the probability of keeping each input unit. 410 | is_training: whether or not the model is in training mode. If so, dropout is 411 | applied and values scaled. Otherwise, inputs is returned. 412 | scope: Optional scope for op_scope. 413 | 414 | Returns: 415 | a tensor representing the output of the operation. 416 | """ 417 | if is_training and keep_prob > 0: 418 | with tf.op_scope([inputs], scope, 'Dropout'): 419 | return tf.nn.dropout(inputs, keep_prob) 420 | else: 421 | return inputs 422 | 423 | 424 | def flatten(inputs, scope=None): 425 | """Flattens the input while maintaining the batch_size. 426 | 427 | Assumes that the first dimension represents the batch. 428 | 429 | Args: 430 | inputs: a tensor of size [batch_size, ...]. 431 | scope: Optional scope for op_scope. 432 | 433 | Returns: 434 | a flattened tensor with shape [batch_size, k]. 435 | Raises: 436 | ValueError: if inputs.shape is wrong. 437 | """ 438 | if len(inputs.get_shape()) < 2: 439 | raise ValueError('Inputs must be have a least 2 dimensions') 440 | dims = inputs.get_shape()[1:] 441 | k = dims.num_elements() 442 | with tf.op_scope([inputs], scope, 'Flatten'): 443 | return tf.reshape(inputs, [-1, k]) 444 | 445 | 446 | def repeat_op(repetitions, inputs, op, *args, **kwargs): 447 | """Build a sequential Tower starting from inputs by using an op repeatedly. 448 | 449 | It creates new scopes for each operation by increasing the counter. 450 | Example: given repeat_op(3, _, ops.conv2d, 64, [3, 3], scope='conv1') 451 | it will repeat the given op under the following variable_scopes: 452 | conv1/Conv 453 | conv1/Conv_1 454 | conv1/Conv_2 455 | 456 | Args: 457 | repetitions: number or repetitions. 458 | inputs: a tensor of size [batch_size, height, width, channels]. 459 | op: an operation. 460 | *args: args for the op. 461 | **kwargs: kwargs for the op. 462 | 463 | Returns: 464 | a tensor result of applying the operation op, num times. 465 | Raises: 466 | ValueError: if the op is unknown or wrong. 467 | """ 468 | scope = kwargs.pop('scope', None) 469 | with tf.variable_op_scope([inputs], scope, 'RepeatOp'): 470 | tower = inputs 471 | for _ in range(repetitions): 472 | tower = op(tower, *args, **kwargs) 473 | return tower 474 | -------------------------------------------------------------------------------- /src/inception/scopes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the new arg_scope used for TF-Slim ops. 16 | 17 | Allows one to define models much more compactly by eliminating boilerplate 18 | code. This is accomplished through the use of argument scoping (arg_scope). 19 | 20 | Example of how to use scopes.arg_scope: 21 | 22 | with scopes.arg_scope(ops.conv2d, padding='SAME', 23 | stddev=0.01, weight_decay=0.0005): 24 | net = ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') 25 | net = ops.conv2d(net, 256, [5, 5], scope='conv2') 26 | 27 | The first call to conv2d will use predefined args: 28 | ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 29 | stddev=0.01, weight_decay=0.0005, scope='conv1') 30 | 31 | The second call to Conv will overwrite padding: 32 | ops.conv2d(inputs, 256, [5, 5], padding='SAME', 33 | stddev=0.01, weight_decay=0.0005, scope='conv2') 34 | 35 | Example of how to reuse an arg_scope: 36 | with scopes.arg_scope(ops.conv2d, padding='SAME', 37 | stddev=0.01, weight_decay=0.0005) as conv2d_arg_scope: 38 | net = ops.conv2d(net, 256, [5, 5], scope='conv1') 39 | .... 40 | 41 | with scopes.arg_scope(conv2d_arg_scope): 42 | net = ops.conv2d(net, 256, [5, 5], scope='conv2') 43 | 44 | Example of how to use scopes.add_arg_scope: 45 | 46 | @scopes.add_arg_scope 47 | def conv2d(*args, **kwargs) 48 | """ 49 | from __future__ import absolute_import 50 | from __future__ import division 51 | from __future__ import print_function 52 | 53 | import contextlib 54 | import functools 55 | 56 | from tensorflow.python.framework import ops 57 | 58 | _ARGSTACK_KEY = ("__arg_stack",) 59 | 60 | _DECORATED_OPS = set() 61 | 62 | 63 | def _get_arg_stack(): 64 | stack = ops.get_collection(_ARGSTACK_KEY) 65 | if stack: 66 | return stack[0] 67 | else: 68 | stack = [{}] 69 | ops.add_to_collection(_ARGSTACK_KEY, stack) 70 | return stack 71 | 72 | 73 | def _current_arg_scope(): 74 | stack = _get_arg_stack() 75 | return stack[-1] 76 | 77 | 78 | def _add_op(op): 79 | key_op = (op.__module__, op.__name__) 80 | if key_op not in _DECORATED_OPS: 81 | _DECORATED_OPS.add(key_op) 82 | 83 | 84 | @contextlib.contextmanager 85 | def arg_scope(list_ops_or_scope, **kwargs): 86 | """Stores the default arguments for the given set of list_ops. 87 | 88 | For usage, please see examples at top of the file. 89 | 90 | Args: 91 | list_ops_or_scope: List or tuple of operations to set argument scope for or 92 | a dictionary containg the current scope. When list_ops_or_scope is a dict, 93 | kwargs must be empty. When list_ops_or_scope is a list or tuple, then 94 | every op in it need to be decorated with @add_arg_scope to work. 95 | **kwargs: keyword=value that will define the defaults for each op in 96 | list_ops. All the ops need to accept the given set of arguments. 97 | 98 | Yields: 99 | the current_scope, which is a dictionary of {op: {arg: value}} 100 | Raises: 101 | TypeError: if list_ops is not a list or a tuple. 102 | ValueError: if any op in list_ops has not be decorated with @add_arg_scope. 103 | """ 104 | if isinstance(list_ops_or_scope, dict): 105 | # Assumes that list_ops_or_scope is a scope that is being reused. 106 | if kwargs: 107 | raise ValueError("When attempting to re-use a scope by suppling a" 108 | "dictionary, kwargs must be empty.") 109 | current_scope = list_ops_or_scope.copy() 110 | try: 111 | _get_arg_stack().append(current_scope) 112 | yield current_scope 113 | finally: 114 | _get_arg_stack().pop() 115 | else: 116 | # Assumes that list_ops_or_scope is a list/tuple of ops with kwargs. 117 | if not isinstance(list_ops_or_scope, (list, tuple)): 118 | raise TypeError("list_ops_or_scope must either be a list/tuple or reused" 119 | "scope (i.e. dict)") 120 | try: 121 | current_scope = _current_arg_scope().copy() 122 | for op in list_ops_or_scope: 123 | key_op = (op.__module__, op.__name__) 124 | if not has_arg_scope(op): 125 | raise ValueError("%s is not decorated with @add_arg_scope", key_op) 126 | if key_op in current_scope: 127 | current_kwargs = current_scope[key_op].copy() 128 | current_kwargs.update(kwargs) 129 | current_scope[key_op] = current_kwargs 130 | else: 131 | current_scope[key_op] = kwargs.copy() 132 | _get_arg_stack().append(current_scope) 133 | yield current_scope 134 | finally: 135 | _get_arg_stack().pop() 136 | 137 | 138 | def add_arg_scope(func): 139 | """Decorates a function with args so it can be used within an arg_scope. 140 | 141 | Args: 142 | func: function to decorate. 143 | 144 | Returns: 145 | A tuple with the decorated function func_with_args(). 146 | """ 147 | @functools.wraps(func) 148 | def func_with_args(*args, **kwargs): 149 | current_scope = _current_arg_scope() 150 | current_args = kwargs 151 | key_func = (func.__module__, func.__name__) 152 | if key_func in current_scope: 153 | current_args = current_scope[key_func].copy() 154 | current_args.update(kwargs) 155 | return func(*args, **current_args) 156 | _add_op(func) 157 | return func_with_args 158 | 159 | 160 | def has_arg_scope(func): 161 | """Checks whether a func has been decorated with @add_arg_scope or not. 162 | 163 | Args: 164 | func: function to check. 165 | 166 | Returns: 167 | a boolean. 168 | """ 169 | key_op = (func.__module__, func.__name__) 170 | return key_op in _DECORATED_OPS 171 | -------------------------------------------------------------------------------- /src/inception/slim.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """TF-Slim grouped API. Please see README.md for details and usage.""" 16 | # pylint: disable=unused-import 17 | 18 | # Collapse tf-slim into a single namespace. 19 | from src.inception import inception_model as inception 20 | from src.inception import losses 21 | from src.inception import ops 22 | from src.inception import scopes 23 | from src.inception import variables 24 | from src.inception.scopes import arg_scope 25 | -------------------------------------------------------------------------------- /src/inception/variables.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains convenience wrappers for creating variables in TF-Slim. 16 | 17 | The variables module is typically used for defining model variables from the 18 | ops routines (see slim.ops). Such variables are used for training, evaluation 19 | and inference of models. 20 | 21 | All the variables created through this module would be added to the 22 | MODEL_VARIABLES collection, if you create a model variable outside slim, it can 23 | be added with slim.variables.add_variable(external_variable, reuse). 24 | 25 | Usage: 26 | weights_initializer = tf.truncated_normal_initializer(stddev=0.01) 27 | l2_regularizer = lambda t: losses.l2_loss(t, weight=0.0005) 28 | weights = variables.variable('weights', 29 | shape=[100, 100], 30 | initializer=weights_initializer, 31 | regularizer=l2_regularizer, 32 | device='/cpu:0') 33 | 34 | biases = variables.variable('biases', 35 | shape=[100], 36 | initializer=tf.zeros_initializer, 37 | device='/cpu:0') 38 | 39 | # More complex example. 40 | 41 | net = slim.ops.conv2d(input, 32, [3, 3], scope='conv1') 42 | net = slim.ops.conv2d(net, 64, [3, 3], scope='conv2') 43 | with slim.arg_scope([variables.variable], restore=False): 44 | net = slim.ops.conv2d(net, 64, [3, 3], scope='conv3') 45 | 46 | # Get all model variables from all the layers. 47 | model_variables = slim.variables.get_variables() 48 | 49 | # Get all model variables from a specific the layer, i.e 'conv1'. 50 | conv1_variables = slim.variables.get_variables('conv1') 51 | 52 | # Get all weights from all the layers. 53 | weights = slim.variables.get_variables_by_name('weights') 54 | 55 | # Get all bias from all the layers. 56 | biases = slim.variables.get_variables_by_name('biases') 57 | 58 | # Get all variables to restore. 59 | # (i.e. only those created by 'conv1' and 'conv2') 60 | variables_to_restore = slim.variables.get_variables_to_restore() 61 | 62 | ************************************************ 63 | * Initializing model variables from a checkpoint 64 | ************************************************ 65 | 66 | # Create some variables. 67 | v1 = slim.variables.variable(name="v1", ..., restore=False) 68 | v2 = slim.variables.variable(name="v2", ...) # By default restore=True 69 | ... 70 | # The list of variables to restore should only contain 'v2'. 71 | variables_to_restore = slim.variables.get_variables_to_restore() 72 | restorer = tf.train.Saver(variables_to_restore) 73 | with tf.Session() as sess: 74 | # Restore variables from disk. 75 | restorer.restore(sess, "/tmp/model.ckpt") 76 | print("Model restored.") 77 | # Do some work with the model 78 | ... 79 | 80 | """ 81 | from __future__ import absolute_import 82 | from __future__ import division 83 | from __future__ import print_function 84 | 85 | import tensorflow as tf 86 | 87 | from tensorflow.core.framework import graph_pb2 88 | from src.inception import scopes 89 | 90 | # Collection containing all the variables created using slim.variables 91 | MODEL_VARIABLES = '_model_variables_' 92 | 93 | # Collection containing the slim.variables that are created with restore=True. 94 | VARIABLES_TO_RESTORE = '_variables_to_restore_' 95 | 96 | 97 | def add_variable(var, restore=True): 98 | """Adds a variable to the MODEL_VARIABLES collection. 99 | 100 | Optionally it will add the variable to the VARIABLES_TO_RESTORE collection. 101 | Args: 102 | var: a variable. 103 | restore: whether the variable should be added to the 104 | VARIABLES_TO_RESTORE collection. 105 | 106 | """ 107 | collections = [MODEL_VARIABLES] 108 | if restore: 109 | collections.append(VARIABLES_TO_RESTORE) 110 | for collection in collections: 111 | if var not in tf.get_collection(collection): 112 | tf.add_to_collection(collection, var) 113 | 114 | 115 | def get_variables(scope=None, suffix=None): 116 | """Gets the list of variables, filtered by scope and/or suffix. 117 | 118 | Args: 119 | scope: an optional scope for filtering the variables to return. 120 | suffix: an optional suffix for filtering the variables to return. 121 | 122 | Returns: 123 | a copied list of variables with scope and suffix. 124 | """ 125 | candidates = tf.get_collection(MODEL_VARIABLES, scope)[:] 126 | if suffix is not None: 127 | candidates = [var for var in candidates if var.op.name.endswith(suffix)] 128 | return candidates 129 | 130 | 131 | def get_variables_to_restore(): 132 | """Gets the list of variables to restore. 133 | 134 | Returns: 135 | a copied list of variables. 136 | """ 137 | return tf.get_collection(VARIABLES_TO_RESTORE)[:] 138 | 139 | 140 | def get_variables_by_name(given_name, scope=None): 141 | """Gets the list of variables that were given that name. 142 | 143 | Args: 144 | given_name: name given to the variable without scope. 145 | scope: an optional scope for filtering the variables to return. 146 | 147 | Returns: 148 | a copied list of variables with the given name and prefix. 149 | """ 150 | return get_variables(scope=scope, suffix=given_name) 151 | 152 | 153 | def get_unique_variable(name): 154 | """Gets the variable uniquely identified by that name. 155 | 156 | Args: 157 | name: a name that uniquely identifies the variable. 158 | 159 | Returns: 160 | a tensorflow variable. 161 | 162 | Raises: 163 | ValueError: if no variable uniquely identified by the name exists. 164 | """ 165 | candidates = tf.get_collection(tf.GraphKeys.VARIABLES, name) 166 | if not candidates: 167 | raise ValueError('Couldnt find variable %s' % name) 168 | 169 | for candidate in candidates: 170 | if candidate.op.name == name: 171 | return candidate 172 | raise ValueError('Variable %s does not uniquely identify a variable', name) 173 | 174 | 175 | class VariableDeviceChooser(object): 176 | """Slim device chooser for variables. 177 | 178 | When using a parameter server it will assign them in a round-robin fashion. 179 | When not using a parameter server it allows GPU:0 placement otherwise CPU:0. 180 | """ 181 | 182 | def __init__(self, 183 | num_parameter_servers=0, 184 | ps_device='/job:ps', 185 | placement='CPU:0'): 186 | """Initialize VariableDeviceChooser. 187 | 188 | Args: 189 | num_parameter_servers: number of parameter servers. 190 | ps_device: string representing the parameter server device. 191 | placement: string representing the placement of the variable either CPU:0 192 | or GPU:0. When using parameter servers forced to CPU:0. 193 | """ 194 | self._num_ps = num_parameter_servers 195 | self._ps_device = ps_device 196 | self._placement = placement if num_parameter_servers == 0 else 'CPU:0' 197 | self._next_task_id = 0 198 | 199 | def __call__(self, op): 200 | device_string = '' 201 | if self._num_ps > 0: 202 | task_id = self._next_task_id 203 | self._next_task_id = (self._next_task_id + 1) % self._num_ps 204 | device_string = '%s/task:%d' % (self._ps_device, task_id) 205 | device_string += '/%s' % self._placement 206 | return device_string 207 | 208 | 209 | # TODO(sguada) Remove once get_variable is able to colocate op.devices. 210 | def variable_device(device, name): 211 | """Fix the variable device to colocate its ops.""" 212 | if callable(device): 213 | var_name = tf.get_variable_scope().name + '/' + name 214 | var_def = graph_pb2.NodeDef(name=var_name, op='Variable') 215 | device = device(var_def) 216 | if device is None: 217 | device = '' 218 | return device 219 | 220 | 221 | @scopes.add_arg_scope 222 | def global_step(device=''): 223 | """Returns the global step variable. 224 | 225 | Args: 226 | device: Optional device to place the variable. It can be an string or a 227 | function that is called to get the device for the variable. 228 | 229 | Returns: 230 | the tensor representing the global step variable. 231 | """ 232 | global_step_ref = tf.get_collection(tf.GraphKeys.GLOBAL_STEP) 233 | if global_step_ref: 234 | return global_step_ref[0] 235 | else: 236 | collections = [ 237 | VARIABLES_TO_RESTORE, 238 | tf.GraphKeys.VARIABLES, 239 | tf.GraphKeys.GLOBAL_STEP, 240 | ] 241 | # Get the device for the variable. 242 | with tf.device(variable_device(device, 'global_step')): 243 | return tf.get_variable('global_step', shape=[], dtype=tf.int64, 244 | initializer=tf.zeros_initializer, 245 | trainable=False, collections=collections) 246 | 247 | 248 | @scopes.add_arg_scope 249 | def variable(name, shape=None, dtype=tf.float32, initializer=None, 250 | regularizer=None, trainable=True, collections=None, device='', 251 | restore=True): 252 | """Gets an existing variable with these parameters or creates a new one. 253 | 254 | It also add itself to a group with its name. 255 | 256 | Args: 257 | name: the name of the new or existing variable. 258 | shape: shape of the new or existing variable. 259 | dtype: type of the new or existing variable (defaults to `DT_FLOAT`). 260 | initializer: initializer for the variable if one is created. 261 | regularizer: a (Tensor -> Tensor or None) function; the result of 262 | applying it on a newly created variable will be added to the collection 263 | GraphKeys.REGULARIZATION_LOSSES and can be used for regularization. 264 | trainable: If `True` also add the variable to the graph collection 265 | `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). 266 | collections: A list of collection names to which the Variable will be added. 267 | Note that the variable is always also added to the tf.GraphKeys.VARIABLES 268 | and MODEL_VARIABLES collections. 269 | device: Optional device to place the variable. It can be an string or a 270 | function that is called to get the device for the variable. 271 | restore: whether the variable should be added to the 272 | VARIABLES_TO_RESTORE collection. 273 | 274 | Returns: 275 | The created or existing variable. 276 | """ 277 | collections = list(collections or []) 278 | 279 | # Make sure variables are added to tf.GraphKeys.VARIABLES and MODEL_VARIABLES 280 | collections += [tf.GraphKeys.VARIABLES, MODEL_VARIABLES] 281 | # Add to VARIABLES_TO_RESTORE if necessary 282 | if restore: 283 | collections.append(VARIABLES_TO_RESTORE) 284 | # Remove duplicates 285 | collections = set(collections) 286 | # Get the device for the variable. 287 | with tf.device(variable_device(device, name)): 288 | return tf.get_variable(name, shape=shape, dtype=dtype, 289 | initializer=initializer, regularizer=regularizer, 290 | trainable=trainable, collections=collections) 291 | -------------------------------------------------------------------------------- /src/myComputePrecision.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import math 6 | import os.path 7 | import time 8 | import sys, os 9 | import numpy as np 10 | import tensorflow as tf 11 | 12 | from inception import image_processing 13 | from inception import inception_module 14 | FLAGS = tf.app.flags.FLAGS 15 | 16 | #checkpoint_path=E:\老电脑\实验室\海报检索1\海报检索\inceptionmodel.ckpt-42100 17 | #/home/imc/caffe/poster/mytensor0417/model11/model.ckpt-42100 18 | tf.app.flags.DEFINE_string('checkpoint_path', 'D:/dev/unicorn/model/model.ckpt-42100', 19 | """Directory where to read model checkpoints.""") 20 | #num_classes=11,训练数据的类别数 21 | tf.app.flags.DEFINE_string('num_classes', 11, 22 | """class numbers.""") 23 | 24 | 25 | class NetSaver(): 26 | def __init__(self): 27 | self.loadNet() 28 | self.loadModel() 29 | 30 | def loadNet(self): 31 | self.__buildNet() 32 | 33 | def loadModel(self, model=FLAGS.checkpoint_path): 34 | self.saver.restore(self.__sess, model) 35 | 36 | #@property将私有成员变成具有只读属性的公有成员 37 | @property 38 | def classes(self): 39 | return self.__classes 40 | 41 | @property 42 | def sess(self): 43 | return self.__sess 44 | 45 | @property 46 | def output(self): 47 | return self.__output 48 | 49 | @property 50 | def image_buffer(self): 51 | return self.__image_buffer 52 | 53 | def __load_model(self): 54 | #self.__sess = tf.Session() 55 | self.saver.restore(self.__sess, self.model) 56 | 57 | def __buildInputImagePlaceholder(self): 58 | self.__image_buffer = tf.placeholder("string") 59 | image = tf.image.decode_jpeg(self.image_buffer, channels=3) 60 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 61 | image = tf.image.central_crop(image, central_fraction=0.875) 62 | # Resize the image to the original height and width. 63 | image = tf.expand_dims(image, 0) 64 | image = tf.image.resize_bilinear(image, [299, 299],align_corners=False) 65 | image_tensor = tf.squeeze(image, [0]) 66 | self.x_image = tf.reshape(image_tensor, [-1, 299, 299, 3]) 67 | 68 | def __buildNet(self): 69 | graph = tf.Graph().as_default() 70 | # Number of classes in the dataset label set plus 1. 71 | # Label 0 is reserved for an (unused) background class. 72 | num_classes = FLAGS.num_classes + 1 73 | #setup an input image placeholder to feed image buffer 74 | self.__buildInputImagePlaceholder() 75 | # Build a Graph that computes the logits predictions from the inference model. 76 | self.logits,self.highfeatures, self.ssss = inception_module.inference(self.x_image, num_classes) 77 | #result is the output of the softmax unit 78 | self.__output = tf.nn.softmax(self.ssss, name="result") 79 | # Restore the moving average version of the learned variables for eval. 80 | variable_averages = tf.train.ExponentialMovingAverage( 81 | inception_module.MOVING_AVERAGE_DECAY) 82 | variables_to_restore = variable_averages.variables_to_restore() 83 | self.saver = tf.train.Saver(variables_to_restore) 84 | self.__sess = tf.Session() 85 | 86 | def classify(self, image_path): 87 | image_data = tf.gfile.FastGFile(image_path, 'rb').read() 88 | # output:分到每一类的概率 89 | output,predictions,high = self.sess.run([self.output,self.logits,self.highfeatures],feed_dict={self.image_buffer: image_data}) 90 | index=0 91 | pro=0 92 | for i in range(len(output[0])): 93 | if output[0][i]>pro: 94 | pro=output[0][i] 95 | index=i 96 | return index 97 | 98 | # def predict(self): 99 | # img1 = "E:/老电脑/实验室/海报检索1/测试视频/10.jpg" 100 | # feat1 = self.getOneFeatures(img1) 101 | # ss=self.classify(img1) 102 | # #print(len(feat1)) 103 | # img2 = "/home/imc/caffe/data/package/500-999/highlight_resize150/614.2.jpg" 104 | # feat2 = self.getOneFeatures(img2) 105 | # #print(len(feat2))def computeprecision(self): 106 | 107 | # def computeprecision(self): 108 | # roots=[] 109 | # imgsroot0='D:/dev/unicorn/data/testVideo/video0/' 110 | # imgsroot1='D:/dev/unicorn/data/testVideo/video1/' 111 | # imgsroot2='D:/dev/unicorn/data/testVideo/video2/' 112 | # #roots.append(imgsroot0) 113 | # roots.append(imgsroot1) 114 | # #roots.append(imgsroot2) 115 | # count=0 116 | # lab=1 117 | # for imgroot in roots: 118 | # for root, dirs, files in os.walk(imgroot): 119 | # for i in range(len(files)): 120 | # img=imgroot+files[i] 121 | # index=self.classify(img) 122 | # if(index!=2): 123 | # count=count+1 124 | # #print (index) 125 | # lab=lab+1 126 | 127 | if __name__ == '__main__': 128 | a=NetSaver() 129 | print(a.classify('D:/dev/unicorn/data/testVideo/10.jpg')) 130 | -------------------------------------------------------------------------------- /src/myGetFeatures.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import cv2 5 | 6 | from myComputePrecision import NetSaver 7 | from Config import Config -------------------------------------------------------------------------------- /src/myRetrieval.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cv2.flann 4 | import heapq 5 | # #import AA 6 | # from AA import AA 7 | # aa=AA.AA() 8 | 9 | HASH_LENGTH=1 10 | FEATURE_LENGTH=3 11 | K=2 12 | 13 | class Retrieval(): 14 | #"""docstring fos Retrieval""" 15 | def __init__(self): 16 | self.buildHashFeatureIndex() 17 | self.buildEFeatureIndex() 18 | 19 | def buildHashFeatureIndex(self): 20 | pass 21 | 22 | def buildEFeatureIndex(self): 23 | pass 24 | 25 | def readImg(self,imgPath): 26 | image = tf.gfile.FastGFile(imgPath, 'rb').read() 27 | return image 28 | 29 | #提取待检索图片的特征 30 | def getFeatures(self,img): 31 | #hashfeature,feature,high = self.sess.run([self.output,self.logits,self.highfeatures],feed_dict={self.image_buffer: image_data}) 32 | hashfeature = np.zeros(shape=[1, HASH_LENGTH], dtype='float32') 33 | feature = np.zeros(shape=[1, FEATURE_LENGTH], dtype='float32') 34 | #feature = np.squeeze(feature) 35 | #hashfeature = np.squeeze(high[0]) 36 | # hashfeature = np.array([hashfeature]) 37 | # feature = np.array([feature]) 38 | # for i in range(len(feature[0])): 39 | # if feature[0][i] > 0.5: 40 | # feature[0][i] = 1 41 | # else: 42 | # feature[0][i] = 0 43 | return hashfeature, feature 44 | 45 | def search(self,feature1,feature2): 46 | candidate=self.findByHamming(feature1) 47 | label=self.findByE(candidate,feature2) 48 | return label 49 | 50 | def findByHamming(self,feature1): 51 | #kd-tree建索引 52 | FLANN_INDEX_KDTREE = 1 53 | self.flann_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=4) 54 | self.hashfeatureTest = np.zeros(shape=[3, 1], dtype='float32') 55 | self.hashfeatureTest[1][0] = 1 56 | self.flann = cv2.flann.Index(self.hashfeatureTest, self.flann_params) 57 | np.save("D://devPy/hashfeature1",self.hashfeatureTest) 58 | print(np.load("D://devPy/hashfeature1.npy")) 59 | 60 | #kNN找最近邻,找候选集对应的id 61 | idx,_ = self.flann.knnSearch(feature1, K, params={}) 62 | print(idx,_) 63 | #取出idx(去掉外层[])中的所有元素(去内层[]) 64 | candidate = idx[0].tolist() 65 | print(candidate) 66 | 67 | for i in range(K): 68 | # _condition=(feature1 == self.featureTest[candidate[i]]) 69 | # condition = _condition.all(1) 70 | # res1=np.where(condition) 71 | # res2=res1[0] 72 | # print(_condition) 73 | # print(condition) 74 | # print(res1) 75 | # print(res2) 76 | same = np.where((feature1 == self.hashfeatureTest[candidate[i]]).all(1))[0] 77 | candidate.extend(same) 78 | candidate = list(set(candidate)) 79 | print(candidate) 80 | return candidate 81 | 82 | def findByE(self,candidate,feature2): 83 | self.featureTest = np.zeros(shape=[3, 3], dtype='float32') 84 | self.featureTest[1][0] = 1 85 | self.featureTest[2][1] = 1 86 | #minDist=np.sqrt(np.sum(np.square(self.featureTest[candidate[0]] - feature2[0]))) 87 | # label=candidate[0] 88 | # tag = candidate[0] 89 | # for i in range(1,len(candidate)): 90 | # #feature2[0]为待检索图片的feature 91 | # dist=np.sqrt(np.sum(np.square(self.featureTest[candidate[i]] - feature2[0]))) 92 | # if minDist>dist: 93 | # minDist=dist 94 | # label=tag 95 | # tag=candidate[i] 96 | #如果检索的图片不在库中,无法返回不存在(因为没有保存dist) 97 | dist=lambda x: np.sum(np.square(self.featureTest[x] - feature2[0])) 98 | #dist = lambda x: np.linalg.norm(self.featureTest[x] - feature2[0]) 99 | label=heapq.nsmallest(1, candidate, key=dist) 100 | print("lalalla",label) 101 | return label 102 | 103 | def find(self): 104 | img=self.readImg("C://Users/Mozhouting/Desktop/1.jpg") 105 | f1,f2=self.getFeatures(img) 106 | label=self.search(f1,f2) 107 | return 108 | 109 | aa=Retrieval() 110 | aa.find() 111 | 112 | # a=[4,3,2,1] 113 | # print(heapq.nsmallest(2,a,key=lambda x:-x)) 114 | -------------------------------------------------------------------------------- /test/MultiDbBulidTest.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | # coding = 'utf-8' 4 | import numpy as np 5 | import os 6 | import random 7 | from src.Util import Path 8 | from src import DbBuild 9 | from src import FeatureExtract 10 | 11 | class AbnormalDbBuilder(object): 12 | 13 | def __init__(self, config, featureExtracter=None): 14 | pass 15 | 16 | def build(self): 17 | imageFeatures = np.zeros(shape=[1, 128], dtype='float32') 18 | imageFeatures.tofile("D:/3.bin") 19 | #pass 20 | 21 | # class DbBulidTester(object): 22 | # def __init__(self): 23 | # self.__config = None 24 | # self.__hashFeature = None 25 | # self.__imageFeature = None 26 | # self.__featureExtracter = None 27 | # 28 | # def __setup(self): 29 | # self.__config = { 30 | # 'imageRoot': 'D:/dev/unicorn/test/__1SingleImage/image/', 31 | # 'hashFeatureFilename': r'D:\dev\unicorn\test\__1SingleImage\feature\1.bin', 32 | # 'imageFeatureFilename': r'D:\dev\unicorn\test\__1SingleImage\feature\2.bin', 33 | # #'sample':[], 34 | # 'extracter': { 35 | # 'num_classes': 11, 36 | # 'checkpoint_path': 'D:/dev/unicorn/model/model.ckpt-42100' 37 | # } 38 | # } 39 | # self.__featureExtracter = FeatureExtract.FeatureExtracter(self.__config['extracter']) 40 | # self.__hashFeature, self.__imageFeature = self.__featureExtracter.extract(r'D:\dev\unicorn\test\__1SingleImage\image\10.jpg') 41 | # 42 | # 43 | # def __run(self): 44 | # builder = DbBuild.DbBuilder(self.__config, self.__featureExtracter) 45 | # #builder = AbnormalDbBuilder(self.__config, self.__featureExtracter) 46 | # builder.build() 47 | # isOK, reason= self.__compare() 48 | # print(reason if not isOK else 'OK') 49 | # 50 | # 51 | # def __compare(self): 52 | # 53 | # isOK, reason = self.__compareFeature(self.__config['imageFeatureFilename'], self.__imageFeature) 54 | # if not isOK: 55 | # return False, 'image feature ... , {}'.format(reason) 56 | # 57 | # isOK, reason = self.__compareFeature(self.__config['hashFeatureFilename'], self.__hashFeature) 58 | # if not isOK: 59 | # return False, 'hash feature ... , {}'.format(reason) 60 | # 61 | # return True, '' 62 | # 63 | # 64 | # def __compareFeature(self, filename, expectedFeature): 65 | # 66 | # try: 67 | # actualFeature = np.fromfile(filename, dtype=np.float32) 68 | # except Exception: 69 | # return False, "read file error!" 70 | # 71 | # if (expectedFeature.size != actualFeature.size): 72 | # return False, "size not match!" 73 | # 74 | # if np.linalg.norm(actualFeature - expectedFeature) >= (1e-6): 75 | # return False, "feature is wrong!" 76 | # 77 | # return True, "没毛病" 78 | # 79 | # def __cleanUp(self): 80 | # try: 81 | # os.remove(self.__config['imageFeatureFilename']) 82 | # os.remove(self.__config['hashFeatureFilename']) 83 | # except Exception as err: 84 | # print(err) 85 | # 86 | # def test(self): 87 | # self.__setup() 88 | # self.__run() 89 | # self.__cleanUp() 90 | 91 | class MultiDbBuildTester(object): 92 | 93 | _OK = (True, '') 94 | _ERROR_BAD_FILE = (False, 'read file error') 95 | _ERROR_INCORRECT_DBSIZE = (False, 'size not match') 96 | _ERROR_INCORRENT_FEATURE = (False, 'feature is wrong') 97 | 98 | def __init__(self): 99 | self.__config = None 100 | self.__featureExtracter = None 101 | self.__totalImages = -1 102 | self.__checkCandidates = None 103 | self.__hashFeatures = None 104 | self.__imageFeatures = None 105 | 106 | def __setup(self): 107 | self.__config = { 108 | 'imageRoot': r'D:\dev\unicorn\test\data\__NImage\image', 109 | 'hashFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\1.bin', 110 | 'imageFeatureFilename': r'D:\dev\unicorn\test\data\__NImage\feature\2.bin', 111 | # 'sample': [0, 10, 20] 112 | 'extracter': { 113 | 'num_classes': 11, 114 | 'checkpoint_path': 'D:/dev/unicorn/model/model.ckpt-42100' 115 | } 116 | } 117 | self.__featureExtracter = FeatureExtract.FeatureExtracter(self.__config['extracter']) 118 | self.__setupFeatures() 119 | 120 | 121 | def __setupFeatures(self): 122 | imagePaths = Path.listFiles(self.__config['imageRoot']) 123 | self.__totalImages = len(imagePaths) 124 | self.__checkCandidates = self.__config.get('sample', MultiDbBuildTester._createCheckCandidates(self.__totalImages)) 125 | testImageNum = len(self.__checkCandidates) 126 | self.__hashFeatures = np.zeros(shape=[testImageNum, 128], dtype='float32') 127 | self.__imageFeatures = np.zeros(shape=[testImageNum, 2048], dtype='float32') 128 | for i, j in enumerate(self.__checkCandidates): 129 | self.__hashFeatures[i], self.__imageFeatures[i] = self.__featureExtracter.extract(imagePaths[j]) 130 | 131 | @staticmethod 132 | def _createCheckCandidates(totalImages): 133 | seq = range(totalImages) 134 | n = (totalImages + 9) // 10 135 | return random.sample(seq, n) 136 | 137 | def __run(self): 138 | builder = DbBuild.DbBuilder(self.__config, self.__featureExtracter) 139 | #builder = AbnormalDbBuilder(self.__config, self.__featureExtracter) 140 | builder.build() 141 | isOK, reason = self.__compare() 142 | print((reason, self.__checkCandidates) if not isOK else 'OK') 143 | 144 | 145 | def __compare(self): 146 | isOK, reason = self.__compareFeatures(self.__config['imageFeatureFilename'], 147 | self.__imageFeatures, 148 | self.__totalImages, 149 | 2048) 150 | if not isOK: 151 | return False, 'image feature ... , {}'.format(reason) 152 | 153 | isOK, reason = self.__compareFeatures(self.__config['hashFeatureFilename'], 154 | self.__hashFeatures, 155 | self.__totalImages, 156 | 128) 157 | if not isOK: 158 | return False, 'hash feature ... , {}'.format(reason) 159 | 160 | return MultiDbBuildTester._OK 161 | 162 | def __compareFeatures(self, filename, expectedFeatures, totalFeatures, featureDim): 163 | try: 164 | actualFeatures = np.fromfile(filename, dtype=np.float32) 165 | except Exception: 166 | return MultiDbBuildTester._ERROR_BAD_FILE 167 | 168 | if totalFeatures * featureDim != actualFeatures.size: 169 | return MultiDbBuildTester._ERROR_INCORRECT_DBSIZE 170 | 171 | actualFeatures.shape = totalFeatures, featureDim 172 | 173 | isOK = all([self.__compareFeature(actualFeatures[j], expectedFeatures[i]) 174 | for i, j in enumerate(self.__checkCandidates)]) 175 | return MultiDbBuildTester._OK if isOK else MultiDbBuildTester._ERROR_INCORRENT_FEATURE 176 | 177 | 178 | def __compareFeature(self, feature0, feature1): 179 | return np.linalg.norm(feature0 - feature1) < 1e-6 180 | 181 | 182 | def __cleanUp(self): 183 | try: 184 | os.remove(self.__config['imageFeatureFilename']) 185 | os.remove(self.__config['hashFeatureFilename']) 186 | except Exception as err: 187 | print(err) 188 | 189 | 190 | def test(self): 191 | self.__setup() 192 | self.__run() 193 | self.__cleanUp() 194 | 195 | 196 | 197 | if __name__ == '__main__': 198 | Dbtester = MultiDbBuildTester() 199 | Dbtester.test() 200 | # imageRoot = config['imageRoot'] 201 | --------------------------------------------------------------------------------