├── main.py ├── __init__.py ├── models ├── visnet.py ├── __init__.py ├── loss.py ├── vgg_classify.py ├── dml_model.py ├── hashnet.py ├── sample_dml.py └── mx_margin_model.py ├── checkpoints └── deep_test.txt ├── data ├── classify │ ├── __init__.py │ └── ClassifyData.py ├── hashdata │ ├── __init__.py │ └── hash_tri_files.py ├── n_pair_mc │ ├── __init__.py │ ├── deep_in_fashion.py │ └── npair_dataset.py ├── margin_cub200 │ ├── __init__.py │ └── cub200_margin.py ├── mxdata │ ├── basic_module │ │ ├── __init__.py │ │ └── basic_transform.py │ ├── __init__.py │ ├── composedataset.py │ ├── mxcub_simple.py │ ├── mxcub200.py │ ├── online_products.py │ └── deep_fashion.py └── __init__.py ├── utils ├── train_mx_margin.py ├── __init__.py ├── mxnet_server_client.py ├── extract_Ebaytxt_fromDeepFashion.py ├── sku_viewer.py ├── log_config.py ├── parse_deepinshopdata.py ├── visulization.py └── vis_tsne_images.py ├── server ├── __init__.py ├── ab_test.py └── copy_nn.py ├── requiremetns.txt ├── extract_feature.sh ├── train_cub.sh ├── train_fashion_inclass.sh ├── train_In_classEbay.sh ├── train_cross_classEbay.sh ├── train_fashion_inclass2.sh ├── manage_visdom.py ├── .gitignore ├── configs.py ├── docs └── deep-metric-learning.MD ├── train_classify.py ├── train_hash.py ├── train_mc_npair.py ├── train_margin_cub.py ├── train_mx_margin.py ├── README.MD └── train_mx_ebay_margin.py /main.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/visnet.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /checkpoints/deep_test.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/classify/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/hashdata/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/n_pair_mc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/train_mx_margin.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/margin_cub200/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/mxdata/basic_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/__init__.py: -------------------------------------------------------------------------------- 1 | from models import MarginNet -------------------------------------------------------------------------------- /requiremetns.txt: -------------------------------------------------------------------------------- 1 | visdom 2 | fire 3 | numpy 4 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .visulization import Visulizer 2 | from .log_config import logger -------------------------------------------------------------------------------- /data/mxdata/__init__.py: -------------------------------------------------------------------------------- 1 | from data.mxdata.basic_module.basic_transform import default_transform,test_transform -------------------------------------------------------------------------------- /utils/mxnet_server_client.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import numpy as np 3 | 4 | host = 'http://hpc5.yud.io:8080/ping' 5 | ret = requests.post(host) 6 | print(ret) -------------------------------------------------------------------------------- /extract_feature.sh: -------------------------------------------------------------------------------- 1 | python train_mx_ebay_margin.py --gpus=1 --batch-size=100 --use_viz --name=ExtractDeepFashion --data=DeepFashionInClass --load_model_path=checkpoints/Fashion_In.params -------------------------------------------------------------------------------- /train_cub.sh: -------------------------------------------------------------------------------- 1 | nohup python train_mx_ebay_margin.py --gpus=0 --batch-k=5 --batch-size=70 --use_pretrained --use_viz --epochs=30 --name=CUB_200_2011 --data=CUB_200_2011 >mytraincub200.log 2>&1 & 2 | -------------------------------------------------------------------------------- /train_fashion_inclass.sh: -------------------------------------------------------------------------------- 1 | nohup python train_mx_ebay_margin.py --gpus=2 --batch-k=4 --batch-size=100 --use_pretrained --use_viz --name=Fashion_In --data=DeepFashionInClass >mytrainDeep_Inclass.log 2>&1 & -------------------------------------------------------------------------------- /train_In_classEbay.sh: -------------------------------------------------------------------------------- 1 | nohup python train_mx_ebay_margin.py --gpus=3 --batch-k=2 --batch-size=80 --use_pretrained --use_viz --epochs=55 --name=Ebay_Inclass --data=EbayInClass >mytrainEbay_Inclass.log 2>&1 & 2 | -------------------------------------------------------------------------------- /train_cross_classEbay.sh: -------------------------------------------------------------------------------- 1 | nohup python train_mx_ebay_margin.py --gpus=0 --batch-k=2 --batch-size=80 --epochs=55 --use_pretrained --use_viz --name=Ebay_Crossclass --data=EbayCrossClass >mytrainEbay_Crossclass.log 2>&1 & 2 | -------------------------------------------------------------------------------- /train_fashion_inclass2.sh: -------------------------------------------------------------------------------- 1 | nohup python train_mx_ebay_margin.py --gpus=2 --epochs=40 --steps=14,16,20,30 --batch-k=4 --batch-size=100 --use_pretrained --use_viz --name=Fashion_In2 --data=DeepFashionInClass >mytrainDeep_Inclass2.log 2>&1 & -------------------------------------------------------------------------------- /manage_visdom.py: -------------------------------------------------------------------------------- 1 | from utils import Visulizer 2 | from configs import opt 3 | from visdom import Visdom 4 | from utils import Visulizer 5 | 6 | viz = Visulizer(opt.vis_host,opt.vis_port,env='main') 7 | print(viz) 8 | viz.delete_env('dmldml3') 9 | print("finished") -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .vgg_classify import BaseModule 2 | from .vgg_classify import VggClassify 3 | from .dml_model import ModGoogLeNet 4 | from .loss import NpairLoss 5 | from .vgg_classify import BaseModule 6 | from .sample_dml import Margin_Loss,SampleModel 7 | from .mx_margin_model import MarginNet 8 | from .hashnet import HashNetRes50,HashLoss -------------------------------------------------------------------------------- /utils/extract_Ebaytxt_fromDeepFashion.py: -------------------------------------------------------------------------------- 1 | # We want to extract Ebay like txt file for DeepFashion Inshop dataset 2 | # From ImageFolder files to construct a pandas table to store in simple file 3 | 4 | # Then we can Instance Dataset from Stanford Online Products from DeepFashion 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import csv 9 | import os 10 | import shutil 11 | import sys 12 | 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.out 2 | *.log 3 | checkpoints/*.pth 4 | checkpoints/*.csv 5 | checkpoints/*00.jpg 6 | data/Stanford_Online_Products 7 | data/DeepInShop 8 | data/cub200_2011 9 | data/Stanford_Online_Products 10 | data/CUB_200_2011 11 | data/hashdata/coco 12 | data/hashdata/nus_wide 13 | *.png 14 | *.npy 15 | *.csv 16 | .ipynb_checkpoints/ 17 | __pycache__/ 18 | *.pyc 19 | *.params 20 | *.jpg 21 | *.jpeg -------------------------------------------------------------------------------- /server/ab_test.py: -------------------------------------------------------------------------------- 1 | # this is a A/B test from image_nn_prod and image_metric_taobao128 2 | 3 | old_index = 'image_nn_prod' 4 | new_index = 'image_metric_taobao128' 5 | 6 | from .copy_nn import get_net() 7 | from .copy_nn import get_target_colection 8 | from .copy_nn import get_nn_config 9 | from .copy_nn import get_db 10 | 11 | if __name__=='__main__': 12 | dev = get_db() 13 | host,path = get_nn_config() 14 | net = get_net(0) 15 | nn_128 = get_target_colection(db) 16 | 17 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | import torchvision.transforms as T 2 | 3 | from .classify.ClassifyData import my_collate_fn,Street2shop 4 | from .n_pair_mc.npair_dataset import EbayDataset 5 | from .margin_cub200.cub200_margin import CUB200DataSet 6 | from .mxdata.mxcub200 import cub200_iterator 7 | 8 | from .mxdata.online_products import getEbayCrossClassData,getEbayInClassData 9 | from .mxdata.mxcub_simple import getCUB200 10 | from .mxdata.deep_fashion import getDeepCrossClassFashion,getDeepInClassFashion 11 | from .hashdata.hash_tri_files import get_hash_dataloader -------------------------------------------------------------------------------- /utils/sku_viewer.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | from pymongo import MongoClient 3 | from pprint import pprint 4 | from pymongo import MongoClient 5 | mongdb={} 6 | mongdb['host']='' 7 | mongdb['port']=3717 8 | client=MongoClient(host=mongdb['host'],port=mongdb['port']) 9 | dev=client.get_database('dev') 10 | dev.authenticate(name='',password='') 11 | print(dev.collection_names()) 12 | 13 | tao_bao_collection = dev.get_collection('image_faiss_dual_taobao') 14 | print(tao_bao_collection.count()) 15 | 16 | item = tao_bao_collection.find_one() 17 | pprint(item['_source']) 18 | -------------------------------------------------------------------------------- /utils/log_config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os.path 3 | import time 4 | 5 | logger = logging.getLogger() 6 | logger.setLevel(logging.INFO) 7 | rq = time.strftime('%Y%m%H%M',time.localtime(time.time())) 8 | log_path = os.path.join(os.getcwd() ,'Logs') 9 | log_name = os.path.join(log_path,rq+'.log') 10 | logfile = log_name 11 | file_handler = logging.FileHandler(logfile,'w') 12 | file_handler.setLevel(logging.DEBUG) 13 | 14 | formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s") 15 | file_handler.setFormatter(formatter) 16 | 17 | # add the logger to handler 18 | 19 | logger.addHandler(file_handler) 20 | -------------------------------------------------------------------------------- /data/mxdata/basic_module/basic_transform.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | from mxnet.gluon import nn 3 | import mxnet.gluon.data.vision.transforms as T 4 | 5 | 6 | class RandomCrop(nn.Block): 7 | def __init__(self,size): 8 | self.size = size 9 | def forward(self,x): 10 | return mx.image.random_crop(x,(size,size)) 11 | 12 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) 13 | default_transform = T.Compose([ 14 | T.Resize(256), 15 | RandomCrop(224), 16 | T.RandomFlipLeftRight(), 17 | T.ToTensor(), # last to swap channel to c,w,h 18 | normalize 19 | ]) 20 | 21 | test_transform = T.Compose([ 22 | T.Resize(256), 23 | T.CenterCrop(224), 24 | T.ToTensor(), 25 | normalize 26 | ]) -------------------------------------------------------------------------------- /data/n_pair_mc/deep_in_fashion.py: -------------------------------------------------------------------------------- 1 | # this is an implementation of pytorch deep_fashion_in dataset, 2 | # aim to train an multi-class-n-pair model as base line 3 | 4 | import torchvision.transforms as T 5 | #from configs import opt 6 | from torch.utils.data import Dataset 7 | import os 8 | import csv 9 | import fnmatch 10 | from PIL import Image 11 | import numpy as np 12 | import pandas as pd 13 | import torch 14 | 15 | #normalize = T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) 16 | normalize = T.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5, 0.5, 0.5]) 17 | default_transform = T.Compose([ 18 | T.Resize(256), 19 | T.RandomCrop(227), 20 | T.RandomHorizontalFlip(), 21 | T.ToTensor(), 22 | normalize, 23 | ]) 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /models/loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn.functional import cross_entropy 3 | import torch 4 | 5 | class NpairLoss(nn.Module): 6 | """the multi-class n-pair loss""" 7 | def __init__(self,l2_reg=3e-3): 8 | super(NpairLoss,self).__init__() 9 | self.l2_reg = l2_reg 10 | 11 | def forward(self,feature,target): 12 | """ 13 | compute the feature pair loss,the first half is anchor 14 | the last half is pair feature 15 | :param feature: 16 | :return: 17 | """ 18 | 19 | batch_size = feature.size(0) 20 | fa = feature[:int(batch_size/2)] 21 | fp = feature[int(batch_size/2):] 22 | logit = torch.matmul(fa,torch.transpose(fp,0,1)) 23 | loss_sce = cross_entropy(logit,target) 24 | l2_loss = sum(torch.norm(feature,p=2,dim=1))/batch_size 25 | loss = loss_sce + self.l2_reg*l2_loss 26 | return loss 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /utils/parse_deepinshopdata.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from mxnet.image import imread 4 | import os 5 | bboxfile = r'../Logs/Anno/list_bbox_inshop.txt' 6 | line = None 7 | skip=40006 8 | with open(bboxfile,'r') as f_box: 9 | f_box.readline() # 10 | f_box.readline() # 11 | for i in range(skip): 12 | f_box.readline() 13 | line = f_box.readline() 14 | 15 | img_dir = r'C:\download\In-shop-clothes' 16 | line_list = line.strip().split(' ') 17 | path,bbox = line_list[0],line_list[-4:] 18 | print('path:',path,"bbox",bbox) 19 | fig = plt.figure() 20 | plt.subplot(2,1,1) 21 | image = imread(os.path.join(img_dir,path)) 22 | bbox=[int(x) for x in bbox] 23 | plt.imshow(image.asnumpy()) 24 | plt.subplot(2,1,2) 25 | plt.imshow(image[bbox[1]:bbox[3],bbox[0]:bbox[2]].asnumpy()) 26 | plt.show() 27 | 28 | img_path= r'C:\Users\Dengjun\Pictures\a.jpg' 29 | img = imread(img_path) 30 | print(img.shape) 31 | 32 | -------------------------------------------------------------------------------- /configs.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | class DefaultConfig(object): 3 | num_classes = 13 # street2shop classes 4 | train_classify_dir = "/data/jh/notebooks/hudengjun/VisNet/classify" 5 | valid_classify_dir = "/data/jh/notebooks/hudengjun/VisNet/" 6 | persist = 'data/persist.csv' 7 | lr=0.001 8 | lr_step = 50 9 | use_gpu = True 10 | gpu_id = 2 11 | load_model_path=None 12 | num_workers = 4 13 | momentum=0.89 14 | max_epoch = 800 15 | print_freq = 40 16 | batch_size = 32 17 | vis_host="http://hpc3.yud.io" 18 | vis_port=8088 19 | vis_env='Street2shop' 20 | debug = False 21 | 22 | 23 | #tripplet dataset config 24 | ebay_dir = '/data/jh/notebooks/hudengjun/DML/deep_metric_learning/lib/online_products/Stanford_Online_Products/' 25 | n_pair_train = 'Ebay_train.txt' 26 | n_pair_test = 'Ebay_test.txt' 27 | embeding_size=512 28 | dml_model_path=None 29 | l2_reg=3e-3 30 | use_viz=True 31 | freeze_level=0 32 | 33 | 34 | def parse(self,kwargs): 35 | """update dict with kwargs params""" 36 | for k,v in kwargs.items(): 37 | if not hasattr(self,k): 38 | warnings.warn("does not has attribute",k) 39 | setattr(self,k,v) 40 | print("use config:") 41 | for k,v in self.__class__.__dict__.items(): 42 | if not k.startswith('__'): 43 | print(k,':',getattr(self,k)) 44 | DefaultConfig.parse = parse 45 | opt = DefaultConfig() 46 | -------------------------------------------------------------------------------- /docs/deep-metric-learning.MD: -------------------------------------------------------------------------------- 1 | - 从度量学习,监督或非监督降维理论及流型学习理论以来,诸多学习算法力求学习一种数据从原始空间和低维嵌入空间的映射,在嵌入空间中能保持 2 | 原生空间数据的相似距离关系,这种空间转换函数的学习可称之为度量学习;相对于传统的分类问题而言,其种类数目比较固定,如ImageNet根据WordNet分为1000类别, 3 | 然而很多现实中的问题不是**closeset**(闭集)问题,是openset(开放集合)问题,即在泛化测试时有很多样本和种类类别是从未遇见的,典型的特征是种类多,单个种类样本少,最为典型的是人脸识别(亿级别的类别,单个人最多就抓拍十几张头像)。 4 | - 将分类和度量对比,分类学习的是一种概念,粗糙的概观,类似孤立看待问题的观点,度量学习的是精细的尺度,强调的是对隐含属性概念的整体布局,一种尺度。 5 | 也可以认为,分类问题对事物的编码是一种**one-hot**编码格式,而度量是对事物的一种**denseVector**编码格式。 6 | 概念在未见过的种类无法进行描述,而尺度在未见过的种类依然可以按照尺度测量规范进行测量。(举例,前者学习的是每一个的人名,后者学习的是对人的测量方法,遇到向别人描述一个他不认识的人时,通过属性描述的方式,更容易让别人形成画像) 7 | 8 | - 现实世界中,很多时候并不需要对所有见过的事物说名道姓,我们不关心他是谁,关心的是他和其他事物的距离关系。这也就是从DeepID2的分类loss转换到FaceNet时triplet loss之后人脸识别效果提升的原因,当然这也是人脸识别,行人重识别,车辆检索(车辆套牌分析),商品检索应用落地的原因。 9 | - 通过度量学习,我们可以把现实世界中的搜索问题(视觉搜索,VisualSearch or Visual Recommendation)定义为简单的两步,将原生数据空间通过学习得来的映射函数映射到嵌入空间,在嵌入空间通过近似临近搜索算法搜索最近点,通过最近点id反查原始空间的数据。 10 | 其核心技术在于 __关系高保持的空间映射函数__ 和 __高速高容量的向量搜索引擎__。前者可以保证从原生数据空间映射到嵌入空间之后,数据的相似关系保持的精细,数据在嵌入空间的距离与原生数据的观测属性成正相关。 11 | 后者保证能在广泛的搜索空间中高效的查找嵌入空间中距离相近的点(可以在亿级的库中以毫秒级时间搜索出近似最近点)。 12 | 13 | -传统的LDA,MDS,IsoMap等方法是一种浅层次的映射函数或关系保持的数据转换,而且MDS实际上无法对未见的数据点进行低维度嵌入,只能使用拟合函数进行转换。而SNE算法是一种已指数据样本高维空间距离之后,根据数据距离分布近似的目标,将高位数据点降维,并不适合视觉图片。 14 | 深度卷积神经网络通过层层递进特征提取的方式,具备深层次语义特征提取的能力,通过合适的数据加载和目标函数的梯度引导,该卷积神经网络函数能学习一种深层次的特征提取。该方法又被称为深度度量学习,即Deep Metric Learning 15 | 以人们分辨狗和猫为例,如果让一个婴儿来学习分辨,是不可能的;必须得是大脑发育完整的儿童,如果让儿童整天看白猫和黑狗,久而久之,儿童会以为白颜色的是猫,黑颜色的是狗,遇到一个黑猫,也认为是狗。当然有的是每次给一对动物来分辨,有的是给一批动物来分辨。 16 | 如果一直让儿童去分辨容易分辨的狗和猫,儿童没有经过艰难的判别隔离训练,对于之后遇到的困难样本,就变得无所适从。 17 | 从上述例子可知:深度度量学习函数要训练好的三个前提条件分别是:模型结构,原始数据采样方式,目标函数,困难样本。深度度量学习很多提升改进都是基于基本要素的考虑。 18 | -------------------------------------------------------------------------------- /models/vgg_classify.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import time 4 | from torchvision.models import vgg16_bn 5 | 6 | class BaseModule(nn.Module): 7 | """model save and load""" 8 | def __init__(self): 9 | super(BaseModule,self).__init__() 10 | self.model_name = str(type(self)) 11 | self.model_name='basemodel' 12 | 13 | def load(self,path): 14 | """ 15 | 加载模型 16 | :param path: reload model path 17 | :return: None 18 | """ 19 | self.load_state_dict(torch.load(path)) 20 | 21 | def save(self,name=None): 22 | """default modelname and time""" 23 | if name is None: 24 | prefix = 'checkpoints/'+self.model_name+'_' 25 | name = time.strftime(prefix + '%m%d_%H:%M:%S.pth') 26 | torch.save(self.state_dict(), name) 27 | return name 28 | 29 | class VggClassify(BaseModule): 30 | """a model viaries from vgg_16""" 31 | def __init__(self,num_classes): 32 | super(VggClassify, self).__init__() 33 | vgg16_model = vgg16_bn(pretrained=False) 34 | features,classifier = vgg16_model.features,vgg16_model.classifier 35 | classifier = list(classifier) 36 | del classifier[-1] 37 | classifier.append(nn.Linear(4096,num_classes)) 38 | self.features = features 39 | self.classifier = nn.Sequential(*classifier) 40 | self.model_name = 'vgg_bn' 41 | def forward(self,x): 42 | features = self.features(x) 43 | features = features.view(features.size(0), -1) 44 | output = self.classifier(features) 45 | return output 46 | 47 | if __name__=='__main__': 48 | model = BaseModule() 49 | model.save() 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /utils/visulization.py: -------------------------------------------------------------------------------- 1 | from configs import opt 2 | import visdom 3 | import torch as t 4 | import time 5 | import numpy as np 6 | class Visulizer(object): 7 | """the object interface to store train trace to website""" 8 | def __init__(self,host=opt.vis_host,port=opt.vis_port,env=opt.vis_env): 9 | self.vis = visdom.Visdom(server=host,port=port,env=env) 10 | 11 | self.index ={} 12 | self.log_text="" 13 | 14 | def reinit(self,env='default'): 15 | self.vis = visdom.Visdom(server=opt.vis_host,port=opt.vis_port,env=opt.vis_env) 16 | return self 17 | 18 | def plot(self,name,y): 19 | """plot loss:1.0""" 20 | x = self.index.get(name,0) 21 | self.vis.line(Y=np.array([y]),X=np.array([x]), 22 | win=name, 23 | opts=dict(title=name), 24 | update=None if x==0 else 'append') 25 | self.index[name] = x+1 26 | 27 | def img(self,name,img_,**kwargs): 28 | """ 29 | :param name: the window name 30 | :param img_: img shape and data type,t.Tensor(64,64),Tensor(3,64,64),Tensor(100,1,64,64) 31 | :param kwargs: 32 | :return: 33 | """ 34 | self.vis.images(t.Tensor(img_).cpu().numpy(), 35 | win=name, 36 | opts=dict(title=name), 37 | **kwargs) 38 | 39 | def log(self,info,win='log_text'): 40 | """self.log({loss:1,'lr':0.0001}""" 41 | self.log_text += ('[{time}] {info}
'.format( 42 | time=time.strftime('%m%d_%H%M%S'), \ 43 | info=info)) 44 | self.vis.text(self.log_text, win) 45 | 46 | def delete_env(self,env): 47 | self.vis.delete_env(env) 48 | 49 | if __name__=='__main__': 50 | """nohup python -m visdom.server --port-8088 & 51 | this to start visdom server""" 52 | viz = Visulizer(host='http://192.168.3.13',port=8088,env='street') 53 | viz.log("this is a start") 54 | viz.plot('loss',2.3) 55 | viz.plot('loss',2.2) 56 | viz.plot('loss',2.1) 57 | 58 | viz.img('origin',np.random.random((10,3,224,224))) -------------------------------------------------------------------------------- /data/mxdata/composedataset.py: -------------------------------------------------------------------------------- 1 | import mxnet.gluon.data.vision.transforms as T 2 | from mxnet.gluon.data import DataLoader,Dataset 3 | from mxnet import nd 4 | import numpy as np 5 | import os 6 | from mxnet.image import imread 7 | import pandas as pd 8 | 9 | 10 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) 11 | default_transform = T.Compose([ 12 | T.Resize(256), 13 | T.RandomResizedCrop(224), 14 | T.RandomFlipLeftRight(), 15 | T.ToTensor(), 16 | normalize 17 | ]) 18 | 19 | class ComposeDataSet(Dataset): 20 | """ an Ebay and DeepFashion Composite Dataset to metric learning""" 21 | def __init__(self,ebay_dir,fashion_dir,batch_k,batch_size,is_train,transform=default_transform): 22 | self.ebay_dir = ebay_dir 23 | self.fashion_dir = fashion_dir 24 | self.batch_k = batch_k 25 | self.batch_size = batch_size 26 | self.is_train = is_train 27 | self._transform = transform 28 | 29 | #begin to resolve ebay data 30 | 31 | if self.is_train: 32 | #start ebay data 33 | table_name = os.path.join(self.ebay_dir,'Ebay_train.txt') 34 | table_data = pd.read_table(table_name,header=0,delim_whitespace=True) 35 | min_super_id, max_super_id = min(table_data.super_class_id), max(table_data.super_class_id) 36 | 37 | #this is the super id for ebaydata 38 | self.super_ids = np.arange(min_super_id, max_super_id + 1) 39 | self.super2class = {} #store a dict for {super_id:[class_id1,class_id2]} 40 | for super_id in self.super_ids: 41 | self.super2class[super_id] = table_data[table_data.super_class_id == super_id].class_id.tolist() 42 | 43 | min_class_id, max_class_id = min(table_data.class_id), max(table_data.class_id) 44 | self.class_ids = list(np.arange(min_class_id, max_class_id + 1)) 45 | self.train_length = max_class_id + 1 - min_class_id 46 | self.super_id_dist = [len(v) for k, v in self.super2class.items()] 47 | for class_id in self.class_ids: 48 | one_class_paths = table_data[table_data.class_id == class_id].path.tolist() # type list 49 | self.class2imagefiless.append(one_class_paths) 50 | 51 | #Process deepfashion data 52 | extract_super_ids_to_class_ids 53 | else: 54 | -------------------------------------------------------------------------------- /models/dml_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch.nn as nn 4 | 5 | import torch.nn.functional as F 6 | from torchvision.models.inception import inception_v3 7 | from .vgg_classify import BaseModule 8 | from collections import OrderedDict 9 | class ModGoogLeNet(BaseModule): 10 | """the change the head from googlenet""" 11 | def __init__(self,embeding_size=512,with_drop=False): 12 | super(ModGoogLeNet,self).__init__() 13 | basic_model = inception_v3(pretrained=True, transform_input=False) 14 | basic_model.aux_logits=False 15 | feature = list(basic_model.named_children()) 16 | def aux(name_module): 17 | return 'AuxLogits' not in name_module[0] 18 | 19 | del feature[-1] 20 | feature = filter(aux, feature) #generator 21 | feature = [m for m in feature] 22 | self.level1_2 = nn.Sequential(OrderedDict(feature[0:3])) 23 | self.level_3_4 = nn.Sequential(OrderedDict(feature[3:5])) 24 | self.level_5_6 = nn.Sequential(OrderedDict(feature[5:13])) 25 | self.level_7 = nn.Sequential(OrderedDict(feature[13:16])) 26 | self.fc = nn.Linear(in_features=2048,out_features=embeding_size) 27 | self.model_name = 'DMLGoogle' 28 | self.with_drop = with_drop 29 | 30 | def freeze_model(self,level=5): 31 | """ 32 | 33 | :param level: the freeze level,all the model split in ( 34 | Conv2d_1a_3x3 35 | Conv2d_2a_3x3,Conv2d_2b_3x3, 36 | Conv2d_3b_1x1,Conv2d_4a_3x3, 37 | Mixed_5b,Mixed_5c,Mixed_5d, 38 | Mixed_6a,Mixed_6b,Mixed_6c,Mixed_6d,Mixed_6e,AuxLogits 39 | Mixed_7a,Mixed_7b,Mixed_7c 40 | 41 | :return: 42 | """ 43 | for i,(name,module) in enumerate(self.basic_model.named_children()): 44 | if i<10 and int(name.split('_')[1][0])<=level: 45 | for param in module.parameters(): 46 | param.requried_grad = False 47 | 48 | 49 | def forward(self,x,normalize=False): 50 | """ 51 | forward data data shape (32,2,227,227) 52 | :param x: torch.tensor 53 | :return: feature embeding 54 | """ 55 | x = self.level1_2(x) 56 | x = F.max_pool2d(x, kernel_size=3, stride=2) 57 | x = self.level_3_4(x) 58 | x = F.max_pool2d(x, kernel_size=3, stride=2) 59 | x = self.level_5_6(x) 60 | x = self.level_7(x) 61 | 62 | x = F.avg_pool2d(x, kernel_size=x.size(-1)) #default 8*8,another 5*5 63 | # 1 x 1 x 2048 64 | if self.with_drop: 65 | x = F.dropout(x, training=self.training) 66 | # 1 x 1 x 2048 67 | x = x.view(x.size(0), -1) 68 | embeding = self.fc(x) 69 | return embeding 70 | 71 | 72 | -------------------------------------------------------------------------------- /train_classify.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.optim as optim 3 | from data import Street2shop 4 | import os 5 | from configs import opt 6 | from models import VggClassify 7 | from torch.utils.data import DataLoader 8 | from tqdm import tqdm 9 | from utils import Visulizer 10 | 11 | def val(model,dataloader): 12 | """run model with data,""" 13 | model.eval() 14 | num_total =0 15 | num_correct =0 16 | for i,(data,label) in tqdm(enumerate(dataloader)): 17 | if opt.use_gpu: 18 | data = data.cuda() 19 | label = label.cuda() 20 | score = model(data) 21 | num_total += data.size(0) 22 | pred = torch.argmax(score,dim=1) 23 | acc = torch.eq(pred, label) 24 | num_correct += acc.sum().item() 25 | if opt.debug: 26 | break 27 | print("valid, correct rate",1.0*num_correct/num_total) 28 | model.train() 29 | 30 | def train(**kwargs): 31 | opt.parse(kwargs) 32 | os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.gpu_id) 33 | #data 34 | train_data = Street2shop(opt.train_classify_dir,train=True,persist=opt.persist) 35 | val_data = Street2shop(opt.train_classify_dir,train=False,persist=opt.persist) 36 | 37 | #model 38 | model = VggClassify(num_classes=opt.num_classes) 39 | if opt.load_model_path: 40 | model.load(opt.load_model_path) 41 | if opt.use_gpu: 42 | model = model.cuda() 43 | 44 | #data loader 45 | train_dataloader= DataLoader(train_data,batch_size=opt.batch_size,shuffle=True,num_workers=opt.num_workers) 46 | val_dataloader = DataLoader(val_data,batch_size=opt.batch_size,shuffle=True,num_workers=opt.num_workers) 47 | 48 | #visulizer 49 | viz = Visulizer(host=opt.vis_host,port=opt.vis_port,env=opt.vis_env) 50 | viz.log("start to train") 51 | #loss function 52 | criterion = torch.nn.CrossEntropyLoss() 53 | lr = opt.lr 54 | m = opt.momentum 55 | optimizer = optim.SGD(model.parameters(),lr=lr,momentum=m) 56 | lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,20,0.1) 57 | for epoch in range(opt.max_epoch): 58 | lr_scheduler.step() 59 | train_loss = 0 60 | for i,(data,label) in tqdm(enumerate(train_dataloader)): 61 | if opt.use_gpu: 62 | data = data.cuda() 63 | label = label.cuda() 64 | optimizer.zero_grad() 65 | score = model(data) 66 | loss = criterion(score,label) 67 | loss.backward() 68 | optimizer.step() 69 | train_loss = train_loss + loss.item() 70 | if i%opt.print_freq == opt.print_freq-1: 71 | average_loss = train_loss/opt.batch_size 72 | viz.plot('loss',average_loss) 73 | train_loss =0 74 | if opt.debug: 75 | break 76 | print("epoch :{0} finished,begin to valid test".format(epoch)) 77 | model.save() 78 | val(model,val_dataloader) 79 | if opt.debug: 80 | print("finished one iter") 81 | break 82 | def help(): 83 | """print information""" 84 | print(""" 85 | useage: python file.py --args=value 86 | function := train help 87 | example: 88 | python {0} train 89 | python {0} help""") 90 | 91 | if __name__=='__main__': 92 | import fire 93 | fire.Fire() 94 | 95 | -------------------------------------------------------------------------------- /data/classify/ClassifyData.py: -------------------------------------------------------------------------------- 1 | import torchvision.transforms as T 2 | from configs import opt 3 | from torchvision.datasets import ImageFolder 4 | from torch.utils.data import Dataset 5 | import os 6 | import csv 7 | import fnmatch 8 | from PIL import Image 9 | import numpy as np 10 | from torch.utils.data.dataloader import default_collate 11 | 12 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) 13 | default_transform = T.Compose([ 14 | T.RandomResizedCrop(224), 15 | T.RandomHorizontalFlip(), 16 | T.ToTensor(), 17 | normalize, 18 | ]) 19 | 20 | 21 | #origin_dataset = ImageFolder(opt.train_classify_dir,target_transform=transform) 22 | def my_collate_fn(batch): 23 | ''' 24 | batch中每个元素形如(data, label) 25 | ''' 26 | # 过滤为None的数据 27 | batch = list(filter(lambda x:hasattr(x[0],'size'), batch)) 28 | if len(batch) == 0: return t.Tensor() 29 | return default_collate(batch) # 用默认方式拼接过滤后的batch数据 30 | 31 | class Street2shop(Dataset): 32 | """ dataset split to train and test 33 | root is a ln -s link like this 34 | --root 35 | --bags 36 | --tops 37 | --skirts 38 | --hats 39 | --;; 40 | have 13 catergory consumer""" 41 | def __init__(self,root,train=True,persist = opt.persist,trans = default_transform): 42 | self.train = train 43 | self.root = root 44 | 45 | self.names_idx = {} 46 | self.transform = trans 47 | 48 | if persist is None and not os.path.exists(persist): 49 | folders = os.listdir(root) 50 | folders.sort() # from a to x sort 51 | self.names_idx={fold:i for i,fold in enumerate(folders)} 52 | with open('data/persist.csv','w') as f: 53 | writer = csv.writer(f) 54 | for fold in folders: 55 | index = self.names_idx[fold] 56 | imgs = os.listdir(os.path.join(self.root,fold)) 57 | for img in imgs: 58 | writer.writerow([fold+'/{0}'.format(img),index]) 59 | 60 | # start to read data 61 | with open(persist, 'r') as f: 62 | reader = csv.reader(f) 63 | self.imgs = [row for row in reader] 64 | print("dataset size",len(self.imgs)) 65 | 66 | np.random.shuffle(self.imgs) 67 | if self.train: 68 | self.imgs = self.imgs[:int(0.7 * len(self.imgs))] 69 | else: 70 | self.imgs = self.imgs[int(0.7 * len(self.imgs)):] 71 | 72 | def __getitem__(self, index): 73 | """get data and transform""" 74 | img_path,label = self.imgs[index] 75 | img_path = os.path.join(self.root,img_path) 76 | try: 77 | data = Image.open(img_path) 78 | if not hasattr(data,'size'): 79 | raise Exception("no size or data channel problem") 80 | if self.transform: 81 | data = self.transform(data) 82 | if not data.size(0) is 3: 83 | print("channel not 3,img_path is :{0}".format(img_path)) 84 | raise Exception("channel not 3") 85 | return data,int(label) 86 | except Exception as e: 87 | print(e,img_path) 88 | img_path,label = self.imgs[0] 89 | data = Image.open(os.path.join(self.root,img_path)) 90 | label = int(label) 91 | if self.transform: 92 | data = self.transform(data) 93 | return data,label 94 | 95 | 96 | 97 | 98 | def __len__(self): 99 | return len(self.imgs) 100 | 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /data/hashdata/hash_tri_files.py: -------------------------------------------------------------------------------- 1 | # hash data from three files,contain coco,nus_wide and imangenet 2 | 3 | from torch.utils.data import Dataset,DataLoader 4 | import torchvision.transforms as T 5 | from PIL import Image 6 | import os 7 | import numpy as np 8 | 9 | 10 | root_path = '/data/jh/notebooks/hudengjun/DeepEmbeding/data/hashdata' 11 | 12 | def image_train(resize_size=256, crop_size=224): 13 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406], 14 | std=[0.229, 0.224, 0.225]) 15 | return T.Compose([ 16 | T.Resize(resize_size), 17 | T.RandomResizedCrop(crop_size), 18 | T.RandomHorizontalFlip(), 19 | T.ToTensor(), 20 | normalize]) 21 | 22 | def image_test(resize_size = 256,crop_size=224): 23 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406], 24 | std=[0.229, 0.224, 0.225]) 25 | #start_first = 0 26 | start_center = (resize_size - crop_size - 1) / 2 27 | #start_last = resize_size - crop_size - 1 28 | 29 | return T.Compose([ 30 | T.Resize(resize_size), 31 | PlaceCrop(crop_size,start_center,start_center), 32 | T.ToTensor(), 33 | normalize]) 34 | class PlaceCrop(object): 35 | """Crops the given PIL.Image at the particular index. 36 | Args: 37 | size (sequence or int): Desired output size of the crop. If size is an 38 | int instead of sequence like (w, h), a square crop (size, size) is 39 | made. 40 | """ 41 | 42 | def __init__(self, size, start_x, start_y): 43 | if isinstance(size, int): 44 | self.size = (int(size), int(size)) 45 | else: 46 | self.size = size 47 | self.start_x = start_x 48 | self.start_y = start_y 49 | 50 | def __call__(self, img): 51 | """ 52 | Args: 53 | img (PIL.Image): Image to be cropped. 54 | Returns: 55 | PIL.Image: Cropped image. 56 | """ 57 | th, tw = self.size 58 | return img.crop((self.start_x, self.start_y, self.start_x + tw, self.start_y + th)) 59 | 60 | 61 | class ImageList(Dataset): 62 | def __init__(self,file,transform=None): 63 | if transform is None: 64 | self._transform = image_train(256,224) if 'train.txt' in file else image_test(256,224) 65 | else: 66 | self._transform = transform 67 | if not os.path.exists(file): 68 | raise Exception("file not exist") 69 | self.file = file 70 | 71 | self.images = [] 72 | with open(self.file,'r') as f: 73 | for line in f.readlines(): 74 | items = line.strip().split(' ') 75 | self.images.append((items[0],np.array([int(la) for la in items[1:]],dtype=np.float32))) 76 | 77 | def __len__(self): 78 | return len(self.images) 79 | 80 | def __getitem__(self, index): 81 | path,target = self.images[index] 82 | img = Image.open(path).convert('RGB') 83 | if self._transform: 84 | img = self._transform(img) 85 | return img,target 86 | 87 | def get_hash_dataloader(dataset_name,train_batch,test_batch,database_batch): 88 | """ 89 | return the double train dataset 90 | :param dataset: 91 | :return: 92 | """ 93 | file_names = ['train.txt','test.txt','database.txt'] 94 | files = [os.path.join(root_path,dataset_name,file_name) for file_name in file_names] 95 | datasets = [ImageList(file) for file in files] 96 | train1 = DataLoader(datasets[0],batch_size=train_batch,shuffle=True,num_workers=6) 97 | train2 = DataLoader(datasets[0],batch_size=train_batch,shuffle=True,num_workers=6) 98 | test = DataLoader(datasets[1],batch_size=test_batch,shuffle=False,num_workers=4) 99 | database = DataLoader(datasets[2],batch_size=database_batch,shuffle=False,num_workers=4) 100 | return train1,train2,test,database 101 | 102 | 103 | 104 | if __name__== '__main__': 105 | coco_train = ImageList('/data/jh/notebooks/hudengjun/DeepEmbeding/data/hashdata/coco/train.txt') 106 | print("size of cocotrain",len(coco_train)) 107 | print("start to get data",coco_train[0][0].shape,coco_train[0][1].shape) 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /data/margin_cub200/cub200_margin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | from PIL import Image 4 | 5 | import torchvision.transforms as T 6 | import numpy as np 7 | import os 8 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) 9 | default_transform = T.Compose([ 10 | T.Resize(256), 11 | T.RandomCrop(224), 12 | T.RandomHorizontalFlip(), 13 | T.ToTensor(), 14 | normalize, 15 | ]) 16 | 17 | class CUB200DataSet(Dataset): 18 | """ 19 | the cub200 bird dataset,dataset description: 20 | 200 catagory bird, 100 for train ,100 for test ,each catagory hase 60 images 21 | 22 | """ 23 | def __init__(self,data_path,batch_k=5,batch_size=70,is_train=True,transform = default_transform): 24 | self.is_train = is_train 25 | self.batch_k = batch_k #sample numbers in every calsses,for example,5 26 | self.batch_size = batch_size #the whole batch samples to fetch ,for example,70,so the sampled classes is 12 27 | self.train_image_files = [[]for _ in range(100)] 28 | self.test_image_files =[] 29 | self.test_labels =[] 30 | self.boxes = {} 31 | self.transform = transform 32 | 33 | with open(os.path.join(data_path,'images.txt'),'r') as f_img,\ 34 | open(os.path.join(data_path,'image_class_labels.txt'),'r') as f_label,\ 35 | open(os.path.join(data_path,'bounding_boxes.txt'),'r') as f_box: 36 | for line_img,line_label,line_box in zip(f_img,f_label,f_box): 37 | fname = os.path.join(data_path,'images',line_img.strip().split()[-1]) 38 | label = int(line_label.strip().split()[-1])-1 39 | box = [int(float(v)) for v in line_box.split()[-4:]] 40 | self.boxes[fname]=box 41 | 42 | if label<100: 43 | self.train_image_files[label].append(fname) 44 | else: 45 | self.test_image_files.append(fname) 46 | self.test_labels.append(label) 47 | 48 | self.n_test = len(self.test_image_files) 49 | 50 | def __getitem__(self, index): 51 | """ 52 | get data item in train dataset,all test dataset 53 | :param index: the index of training or test of sample 54 | :return: return the origin image data and labels based on sample method, 55 | search batch/batch_k classes ,every class,choose batch_k iamges to compound a batch 56 | """ 57 | if self.is_train: 58 | #get train batch 59 | images = [] 60 | labels = [] 61 | num_groups = self.batch_size//self.batch_k 62 | sampled_classes = np.random.choice(100,num_groups,replace=False) 63 | for class_id in sampled_classes: 64 | img_fnames = np.random.choice(self.train_image_files[class_id],self.batch_k,replace=False) 65 | for file_path in img_fnames: 66 | x,y,w,h = self.boxes[file_path] 67 | img = Image.open(file_path).convert('RGB').crop((x,y,x+w,y+h)) 68 | try: 69 | img_tensor = self.transform(img) 70 | images.append(img_tensor) 71 | labels.append(class_id) 72 | except Exception as e: 73 | print(file_path) 74 | break 75 | 76 | batch_data = torch.stack(images,dim=0) # from list of tensor to batch tensor 77 | label_data = torch.tensor(np.array(labels,dtype=np.int32)) # from list to tensor 78 | return batch_data,label_data 79 | else: 80 | #get one sample 81 | image = Image.open(self.test_image_files[index]).convert('RGB') 82 | label = self.test_labels[index] 83 | if self.transform: 84 | image = self.transform(image) 85 | return image,label 86 | 87 | def __len__(self): 88 | if self.is_train: 89 | return 200 # 90 | else: 91 | return self.n_test # will return all test_image_files 92 | 93 | 94 | 95 | if __name__=='__main__': 96 | import ipdb 97 | ipdb.set_trace() 98 | dataset = CUB200DataSet(data_path='data/cub200_2011/CUB_200_2011/') 99 | data = dataset[1] 100 | print(type(data)) 101 | print(data[1]) -------------------------------------------------------------------------------- /models/hashnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision.models.resnet import resnet50 4 | 5 | class HashNetRes50(nn.Module): 6 | """ 7 | this is a hash net based on resnet50 8 | """ 9 | def __init__(self,n_bit): 10 | super(HashNetRes50,self).__init__() 11 | model_resnet = resnet50(pretrained=True) 12 | self.conv1 = model_resnet.conv1 13 | self.bn1 = model_resnet.bn1 14 | self.relu = model_resnet.relu 15 | self.maxpool = model_resnet.maxpool 16 | self.layer1 = model_resnet.layer1 17 | self.layer2 = model_resnet.layer2 18 | self.layer3 = model_resnet.layer3 19 | self.layer4 = model_resnet.layer4 20 | self.avgpool = model_resnet.avgpool 21 | self.feature_layers = nn.Sequential(self.conv1, 22 | self.bn1, 23 | self.relu, 24 | self.maxpool, 25 | self.layer1, 26 | self.layer2, 27 | self.layer3, 28 | self.layer4, 29 | self.avgpool) 30 | self.hash_layer = nn.Linear(model_resnet.fc.in_features,n_bit) 31 | self.hash_layer.weight.data.normal_(0,0.01) 32 | self.hash_layer.bias.data.fill_(0.0) 33 | self.activation = torch.nn.Tanh() 34 | 35 | self.iter_num =0 36 | self.gamma = 0.005 37 | self.step_size = 200 38 | self.power =0.5 39 | self.init_scale = 1.0 40 | self.scale = self.init_scale 41 | self.__in_features = n_bit 42 | 43 | def forward(self,x): 44 | """ the image x contains x and x' to generate similairty""" 45 | if self.training: 46 | self.iter_num +=1 47 | x = self.feature_layers(x) 48 | x = x.view(x.size(0),-1) 49 | y = self.hash_layer(x) # just a linear transform 50 | if self.iter_num % self.step_size == 0: 51 | self.scale = self.init_scale*math.pow((1+self.gamma*self.iter_num),self.power) 52 | y = self.activation(self.scale*y) 53 | return y 54 | 55 | def ouput_num(self): 56 | return self.__in_features 57 | 58 | class HashLoss(nn.Module): 59 | def __init__(self,hash_bit): 60 | super(HashLoss,self).__init__() 61 | self.hash_bit = hash_bit 62 | 63 | 64 | def forward(self,x,y,sigmoid_param = 1.0,l_threshold=15.0,class_num =1.0): 65 | """ 66 | 67 | :param x: 68 | :param y: 69 | :param sigmoid_param: 70 | :param l_threshold: the big dot_product use the limitation 71 | :param class_num: the imbalance data distribution 72 | :return: 73 | """ 74 | total_size = x.shape[0] 75 | x1 = x.narrow(0,0,total_size//2) 76 | x2 = x.narrow(0,total_size//2,total_size//2) # narrow,dimension,start,length 77 | y1 = y.narrow(0,0,total_size//2) 78 | y2 = y.narrow(0,total_size//2,total_size//2) 79 | 80 | similarity = torch.mm(y1,y2.t()) 81 | dot_product = sigmoid_param * torch.mm(x1,x2.t()) 82 | exp_product = torch.exp(dot_product) 83 | 84 | mask_dot = dot_product.data>l_threshold 85 | mask_exp = dot_product.data<=l_threshold #dot_product 比较小时候,使用log(1+exp(x)) - sij 86 | 87 | mask_positive = similarity.data>0 88 | mask_negative = similarity.data<=0 89 | 90 | mask_dp = mask_dot & mask_positive 91 | mask_dn = mask_dot & mask_negative 92 | mask_ep = mask_exp & mask_positive 93 | mask_en = mask_exp & mask_negative 94 | 95 | dot_loss = dot_product*(1-similarity) # dot_loss 是对exp_loss在dot_product比较大时候的近似,能让dot_loss =0, 96 | # 在dot_product 比较大时候使用x近似log(1+exp(x)) 97 | exp_loss = torch.log(1+exp_product) - similarity*dot_product 98 | 99 | loss = (torch.sum(torch.mask_select(exp_loss,mask_ep))+ 100 | torch.sum(torch.mask_select(dot_loss,mask_dp)))*class_num + torch.sum(torch.mask_select(exp_loss,mask_en))+torch.sum(torch.mask_select(dot_loss,mask_dn)) 101 | 102 | loss = loss /(torch.sum(mask_positive.float())*class_num +torch.sum(mask_negative.float())) 103 | return loss 104 | 105 | 106 | 107 | 108 | 109 | 110 | if __name__=='__main__': 111 | 112 | base_resnet = HashNetRes50(n_bit=48) 113 | x = torch.rand((10,3,224,224)) 114 | x = base_resnet(x) 115 | print(x.shape) 116 | 117 | #base_resnet.zero_grad() 118 | torch.save(nn.Sequential(base_resnet),"hashnet.pth.tar") 119 | print("finished") 120 | model = torch.load("hashnet.pth.tar") 121 | print(model) -------------------------------------------------------------------------------- /utils/vis_tsne_images.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from PIL import Image 4 | from lapjv import lapjv 5 | from sklearn.manifold import TSNE 6 | from scipy.spatial.distance import cdist 7 | import matplotlib as mlp 8 | import matplotlib.pyplot as plt 9 | import os 10 | from tqdm import tqdm 11 | from sklearn.cluster import KMeans 12 | from sklearn.metrics import normalized_mutual_info_score 13 | def load_img(file_list,in_dir): 14 | #pred_img = [f for f in os.listdir(in_dir) if os.path.isfile(os.path.join(in_dir, f))] 15 | pred_img =file_list 16 | img_collection = [] 17 | for idx, img in enumerate(pred_img): 18 | img = os.path.join(in_dir, img) 19 | img_collection.append(Image.open(img)) 20 | return img_collection 21 | 22 | def save_tsne_grid(img_list, x2d, out_res,crop_size,in_dir): 23 | """ 24 | plot all the images in X_2d pictures 25 | :param img_collection: the image 26 | :param X_2d: the point 27 | :param out_res: the output picture resolution 28 | :return: 29 | """ 30 | out_img = np.ones((out_res+crop_size,out_res+crop_size,3),dtype='uint8') 31 | out_img = out_img*255 32 | 33 | i=0 34 | for img_path,point in tqdm(zip(img_list,x2d)): 35 | i +=1 36 | point = point*out_res 37 | px = int(point[0]) 38 | py = int(point[1]) 39 | img = Image.open(os.path.join(in_dir,img_path)) 40 | img.thumbnail((crop_size,crop_size)) 41 | 42 | a = np.array(img) 43 | 44 | try: 45 | h,w = a.shape[:2] 46 | if len(a.shape)==3: 47 | out_img[py:py + h, px:px + w]= a 48 | except Exception as e: 49 | print(e) 50 | print(a.shape) 51 | print(img_path) 52 | # if i%5000==4999: 53 | # tm = out_img.astype('uint8') 54 | # tm_pl_img = Image.fromarray(tm) 55 | # tm_pl_img.save('checkpoints/tsne_product_{0}.jpg'.format(i+1)) 56 | 57 | out_img = out_img.astype('uint8') 58 | pl_img = Image.fromarray(out_img) 59 | pl_img.save('checkpoints/tsne_product.jpg') 60 | 61 | 62 | def generate_tsne(activations): 63 | perplexity=30 64 | tsne = TSNE(perplexity=perplexity, n_components=2, init='random') 65 | X_2d = tsne.fit_transform(activations) # activations dtype is numpy.ndarray 66 | X_2d -= X_2d.min(axis=0) 67 | X_2d /= X_2d.max(axis=0) 68 | return X_2d 69 | 70 | def visualize(im_files_list,features,data_dir,tsne_size=20000,crop_size=100): 71 | """ 72 | visualize t-sne data 73 | :param im_files_list: image file list 74 | :param features: image features numpy.ndarray shape (n,512) 75 | :return: 76 | """ 77 | print("dimension deduction from features ...") 78 | feature_2d = generate_tsne(features) 79 | np.save('fashion.npy',feature_2d) 80 | #feature_2d = np.load('x2d.npy') 81 | print("build t-sne image ... ...") 82 | save_tsne_grid(im_files_list, feature_2d, tsne_size,crop_size,data_dir) 83 | 84 | 85 | def nmi(gt_class,features): 86 | """ 87 | normal mutual information,for features 88 | :param im_class: np.ndarray, shape [n,1],dtype=np.int32 89 | :param features: image features to clustering ,numpy.ndarray [n,512] 90 | :return: 91 | """ 92 | 93 | gt_class = gt_class - min(gt_class) 94 | n_cluster = len(set(gt_class)) #gt_class from 0 to n_cluster 95 | #convert 96 | st_class = set(gt_class) 97 | kv={} 98 | for k in st_class: 99 | kv[k]=len(kv) 100 | gt_class = np.array([kv[k] for k in gt_class]) 101 | 102 | model = KMeans(n_clusters=n_cluster) 103 | Y=model.fit(features) # this would take 40 minutes 104 | cl_class = Y.labels_ 105 | score = normalized_mutual_info_score(gt_class,cl_class) 106 | print("the normal_mutal_info_score",score) 107 | 108 | 109 | 110 | 111 | def vis_ebay_n_pair(): 112 | """ 113 | read compute data and visualize t-sne picture,then comput nmi index 114 | """ 115 | features_file = 'checkpoints/online_product_compute.csv' 116 | test_info_file = 'data/Stanford_Online_Products/Ebay_test.txt' 117 | 118 | vectors = None 119 | features = pd.read_csv(features_file,header=None) 120 | id_class = features.iloc[:,0:2] 121 | id_class = np.array(id_class) 122 | vectors = np.array(features.iloc[:,2:]) 123 | 124 | image_id_path= pd.read_table(test_info_file, header=0, delim_whitespace=True) 125 | file_list = np.array(image_id_path.path) 126 | 127 | visualize(file_list,vectors,'data/Stanford_Online_Products') 128 | file_class = np.array(image_id_path.class_id) 129 | file_class = file_class.astype(np.int32) 130 | nmi(file_class,vectors) 131 | 132 | 133 | def vis_deep_fashon_margin(): 134 | feature_file = 'checkpoints/deepfashion.csv' 135 | test_info_file = 'checkpoints/fashion_test.txt' 136 | features = pd.read_csv(feature_file,header=None) 137 | vectors = np.array(features.iloc[:,2:]) 138 | gt_class = np.array(features.iloc[:,1],dtype=np.int32) 139 | image_path_id = pd.read_table(test_info_file,header=None,sep=',') 140 | file_list = np.array(image_path_id.iloc[:,0]) 141 | #visualize(file_list,vectors,'data/DeepInShop') 142 | nmi(gt_class,vectors) 143 | 144 | 145 | if __name__=='__main__': 146 | vis_deep_fashon_margin() 147 | 148 | -------------------------------------------------------------------------------- /data/mxdata/mxcub_simple.py: -------------------------------------------------------------------------------- 1 | # an implementation of mxnet in vision data dataset similar like pytorch. 2 | 3 | from mxnet.gluon.data import DataLoader,Dataset 4 | from mxnet import nd 5 | from mxnet.image import imread 6 | 7 | import os 8 | import numpy as np 9 | import mxnet as mx 10 | from mxnet.gluon import nn 11 | import mxnet.gluon.data.vision.transforms as T 12 | 13 | 14 | class RandomCrop(nn.Block): 15 | def __init__(self,size): 16 | self.size = size 17 | def forward(self,x): 18 | return mx.image.random_crop(x,(self.size,self.size)) 19 | 20 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) 21 | default_transform = T.Compose([ 22 | T.Resize(256), 23 | T.RandomResizedCrop(size=224,scale=(1.0,1.0),ratio=(1.0,1.0)),# just crop,not scale 24 | T.RandomFlipLeftRight(), 25 | T.ToTensor(), # last to swap channel to c,w,h 26 | normalize 27 | ]) 28 | 29 | test_transform = T.Compose([ 30 | T.Resize(256), 31 | T.CenterCrop(224), 32 | T.ToTensor(), 33 | normalize 34 | ]) 35 | 36 | class CUB200Data(Dataset): 37 | def __init__(self,dir_path,batch_k,batch_size,is_train,transform = default_transform): 38 | self.dir_path = dir_path 39 | self.batch_k = batch_k 40 | self.batch_size = batch_size 41 | self._transform = transform 42 | self.is_train = is_train 43 | self.train_image_files = [ [] for _ in range(100)] 44 | self.test_images_files = [] # to store test image files 45 | self.test_labels = [] # to store test iamge and image label 46 | self.boxes = {} # to store image bounding box 47 | 48 | with open(os.path.join(dir_path,'images.txt'),'r') as f_img,\ 49 | open(os.path.join(dir_path,'image_class_labels.txt'),'r') as f_label,\ 50 | open(os.path.join(dir_path,'bounding_boxes.txt'),'r') as f_box: 51 | for line_img,line_label,line_box in zip(f_img,f_label,f_box): 52 | fname = os.path.join(self.dir_path,'images',line_img.strip().split()[-1]) 53 | label = int(line_label.strip().split()[-1])-1 54 | box = [int(float(v)) for v in line_box.split()[-4:]] 55 | self.boxes[fname]=box 56 | 57 | if label<100: 58 | self.train_image_files[label].append(fname) 59 | else: 60 | self.test_images_files.append(fname) 61 | self.test_labels.append(label) 62 | self.n_test = len(self.test_images_files) 63 | self.train_class_ids = list(np.arange(0,100)) #list(self.train_image_files.keys()) # get all train class id list 64 | 65 | def __len__(self): 66 | if self.is_train: 67 | return 200 68 | else: 69 | return self.n_test 70 | 71 | def __getitem__(self, index): 72 | """ 73 | get the batch //batch_k for train and single for test 74 | """ 75 | if self.is_train: 76 | image_names,labels = self.sample_train_batch() 77 | # get sampled order image_file names and corresponding label 78 | image_list,label_list=[],[] 79 | for img,label in zip(image_names,labels): 80 | image = imread(img,flag=1,to_rgb=True) 81 | x,y,w,h = self.boxes[img] 82 | image = image[y:min(y+h,image.shape[0]),x:min(x+w,image.shape[1])] 83 | if image.shape[2]==1: 84 | print("has gray file",img) 85 | image = nd.tile(image,(1,1,3)) 86 | image =self._transform(image) # for rgb same value 87 | image_list.append(image) 88 | label_list.append(label) 89 | batch_data = nd.stack(*image_list,axis=0) 90 | batch_label = nd.array(label_list) 91 | return batch_data,batch_label 92 | else: 93 | img = self.test_images_files[index] # get the file name full path 94 | image = imread(img,flag=1,to_rgb=1) 95 | x,y,w,h = self.boxes[img] 96 | image = image[y:min(y+h,image.shape[0]),x:min(x+w,image.shape[1])] 97 | image = self._transform(image) 98 | 99 | return image,self.test_labels[index] 100 | 101 | def sample_train_batch(self): 102 | """sample batch_size//batch_k and sample small batch_k in each instance""" 103 | batch = [] 104 | labels =[] 105 | num_groups = self.batch_size // self.batch_k 106 | sampleed_classes = np.random.choice(self.train_class_ids,num_groups,replace=False) 107 | for class_id in sampleed_classes: 108 | img_fname = np.random.choice(self.train_image_files[class_id],self.batch_k,replace=False) 109 | batch += img_fname.tolist() 110 | labels += [class_id]*self.batch_k 111 | return batch,labels 112 | 113 | 114 | def getCUB200(data_path,batch_k,batch_size): 115 | train_dataset = CUB200Data(data_path,batch_k=batch_k,batch_size=batch_size,is_train=True,transform=default_transform) 116 | test_dataset = CUB200Data(data_path,batch_k=batch_k,batch_size=batch_size,is_train=False,transform=test_transform) 117 | train_loader = DataLoader(train_dataset,batch_size=1,shuffle=False,num_workers=6) 118 | test_loader = DataLoader(test_dataset,batch_size=batch_size,num_workers=6) 119 | return train_loader,test_loader 120 | 121 | 122 | if __name__=='__main__': 123 | import ipdb 124 | #ipdb.set_trace() 125 | train_loader, test_loader = getCUB200('data/CUB_200_2011',batch_k=5,batch_size=10) 126 | # for train_batch,test_batch in zip(train_loader,test_loader): 127 | # print("begin to resolve data from train_loader and test_loader") 128 | # ipdb.set_trace() 129 | # print("data",train_batch[0][0].shape,train_batch[1][0].shape) 130 | # print("test_data",test_batch[0].shape,test_batch[1].shape) 131 | # break 132 | train_dataset = CUB200Data('data/CUB_200_2011', batch_k=5, batch_size=10, is_train=True) 133 | ipdb.set_trace() 134 | data = train_dataset[0] 135 | print(data) 136 | test_dataset = CUB200Data('data/CUB_200_2011',batch_k=5,batch_size=10,is_train=False) 137 | data = test_dataset[0] 138 | print(data) 139 | # for test_batch in test_loader: 140 | # ipdb.set_trace() 141 | # print(test_batch[0].shape,test_batch[1].shape) 142 | # break 143 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /train_hash.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.optim as optim 3 | import argparse 4 | from data import get_hash_dataloader 5 | from models import HashNetRes50,HashLoss 6 | from utils import Visulizer 7 | import torch.nn as nn 8 | import os 9 | import numpy as np 10 | from pprint import pprint 11 | args = argparse.ArgumentParser() 12 | args.add_argument('--gpus',type=str,default='0',help="gpus ids") 13 | args.add_argument('--dataset',type=str,default='coco',help='the dataset name in coco,nus_wide imagent') 14 | args.add_argument('--hash_bit',type=int,default=48,help='the hash bit of deephashing output') 15 | args.add_argument('--iter_nums',type=int,default=10000,help='the max train iter') 16 | args.add_argument('--train_batch',type=int,default=32,help='the train batch_size') 17 | args.add_argument('--lr',type=float,default=0.0001,help='the train learning rate') 18 | args.add_argument('--class_num',type=float,default=1.0,help='the imbalance ratio') 19 | args.add_argument('--viz_env',type=str,default='cocohash',help='the visdom env name') 20 | args.add_argument('--log_interval',type=int,default=20,help='the loss print log interval') 21 | args.add_argument('--snapshot_interval',type=int,default=3000,help='the snapshot archive model interval') 22 | args.add_argument('--test_interval',type=int,default=500,help='the test hash search interval') 23 | 24 | 25 | def test_model(model,test_loader,database_loader,viz): 26 | def code_predict(net,loader): 27 | code = [] 28 | label = [] 29 | for data in loader: 30 | x,y = data 31 | if torch.cuda.is_available(): 32 | x = x.cuda() 33 | x = model(x) 34 | code.append(x.cpu()) 35 | label.append(y) 36 | code = torch.cat(code,dim=0) 37 | code = torch.sign(code) # the quantization sign function 38 | label = torch.cat(label,dim=0) 39 | return code.numpy(),label.numpy() 40 | test_code,test_label = code_predict(model,test_loader) 41 | database_code,database_label = code_predict(model,database_loader) 42 | 43 | #compute the mean average precision--namely map 44 | query_num = test_code.shape[0] 45 | sim = np.dot(database_code, test_code.T) 46 | ids = np.argsort(-sim, axis=0) 47 | APx = [] 48 | 49 | for i in range(query_num): 50 | label = test_label[i, :] 51 | label[label == 0] = -1 52 | idx = ids[:, i] 53 | imatch = np.sum(database_label[idx[0:R], :] == label, axis=1) > 0 54 | relevant_num = np.sum(imatch) 55 | Lx = np.cumsum(imatch) 56 | Px = Lx.astype(float) / np.arange(1, R + 1, 1) 57 | if relevant_num != 0: 58 | APx.append(np.sum(Px * imatch) / relevant_num) 59 | mAP = np.mean(np.array(APx)) 60 | viz.plot("mAP",str(mAP)) 61 | 62 | 63 | 64 | 65 | 66 | 67 | if __name__=='__main__': 68 | config={} 69 | ags = args.parse_args() 70 | config['gpus']=int(ags.gpus) 71 | os.environ['CUDA_VISIBLE_DEVICES']=ags.gpus 72 | config['dataset']=ags.dataset 73 | config['hash_bit'] = ags.hash_bit 74 | config['iter_nums']= ags.iter_nums 75 | config['train_batch'] = ags.train_batch 76 | config['lr']=ags.lr 77 | config['log_interval'] = ags.log_interval 78 | config['snapshot_interval'] = ags.snapshot_interval 79 | config['test_interval'] = ags.test_interval 80 | config['viz_env'] = ags.viz_env 81 | 82 | 83 | 84 | #program setting 85 | config['weight_decay']=0.0005 86 | 87 | config["optimiz_params"] = {"lr": config['lr'], "momentum": 0.9, "weight_decay": 0.0005, "nesterov": True} 88 | config['lr_scheduler']={"gamma":0.5, "step":2000} 89 | config["loss"] = {"l_weight": 1.0, "q_weight": 0, 90 | "l_threshold": 15.0, "sigmoid_param": 10. / config["hash_bit"], 91 | "class_num": ags.class_num} 92 | 93 | pprint(config) # print the config data 94 | #prepare model and dataset 95 | model = HashNetRes50(n_bit=config['hash_bit']) 96 | criteria = HashLoss(hash_bit=config['hash_bit']) 97 | 98 | train1,train2,test_loader,database_loader = get_hash_dataloader(config['dataset'],config['train_batch'], 99 | config['train_batch']//2,config['train_batch']//2) 100 | if torch.cuda.is_available(): 101 | model = model.cuda() 102 | params_list = [{"params":model.feature_layers.parameters(),'lr':1}, 103 | {"params":model.hash_layer.parameters(),'lr':10}] 104 | optimizer = optim.SGD(params_list,lr=config['lr'],momentum=0.9,weight_decay=config['weight_decay'],nesterov=True) 105 | lr_schedualer = optim.lr_scheduler.StepLR(optimizer,step_size=2000, 106 | gamma=0.5,last_epoch=-1) 107 | 108 | viz = Visulizer(host='http://hpc3.yud.io',port=8088,env=config['viz_env']) 109 | viz.log("start the hash learning") 110 | viz.log(config) 111 | len_train = len(train1) 112 | train_loss = 0 113 | for it in range(config['iter_nums']): 114 | 115 | lr_schedualer.step() 116 | if it % len_train==0: 117 | iter1 = iter(train1) 118 | iter2 = iter(train2) 119 | train_part1 = iter1.next() 120 | train_part2 = iter2.next() # same train data two different shuffle 121 | 122 | x1,y1 = train_part1 123 | x2,y2 = train_part2 124 | if torch.cuda.is_available(): 125 | x1 = x1.cuda() 126 | x2 = x2.cuda() 127 | y1 = y1.cuda() 128 | y2 = y2.cuda() 129 | inputs = torch.cat((x1,x2),dim=0) 130 | labels = torch.cat((y1,y2),dim=0) 131 | outputs = model(inputs) 132 | loss = criteria(outputs,labels,sigmoid_param=config["loss"]["sigmoid_param"], \ 133 | l_threshold=config["loss"]["l_threshold"], \ 134 | class_num=config["loss"]["class_num"]) 135 | loss.backward() 136 | train_loss += loss.item() 137 | if (it+1)%config['log_interval']==0: 138 | print("Iter: {:05d}, loss: {:.3f}".format(it,train_loss/config['log_interval'])) 139 | train_loss =0 140 | optimizer.step() 141 | 142 | if it%config['snapshot_interval'] ==0: 143 | torch.save(nn.Sequential(model), 144 | './checkpoints/resnet_{0}_{1}_{2}.pth.tar'.format(config['dataset'],config['hash_bit'],it)) 145 | if it%config['test_interval']==0: 146 | test_model(model,test_loader,database_loader) # to validate the efficiency of hash code 147 | viz.log("finish train model") 148 | 149 | -------------------------------------------------------------------------------- /models/sample_dml.py: -------------------------------------------------------------------------------- 1 | """ 2 | this is a model explemenation for "sampling matters in deep embeding learning" paper 3 | in this paper,we take model resnet-50 as base model to extract 128 dimension discriminative features,and then use distance weighted sampling , 4 | along with margin-based loss as loss function. 5 | """ 6 | 7 | import torch 8 | from torchvision.models import resnet50 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from .vgg_classify import BaseModule 12 | from collections import OrderedDict 13 | import numpy as np 14 | 15 | class SampleModel(BaseModule): 16 | """ 17 | this is the resnet-50 based model 18 | """ 19 | def __init__(self,embeding_dim = 128): 20 | super(SampleModel,self).__init__() 21 | basic_model = resnet50(pretrained=True) 22 | 23 | feature = list(basic_model.named_children())[:-1] 24 | self.base_model = nn.Sequential(OrderedDict(feature)) 25 | self.dense = nn.Linear(in_features=2048,out_features=128) 26 | 27 | 28 | def forward(self,x): 29 | """ 30 | extract the 128 dimension feature,for x is a batch of image data with every batch_k image is same images 31 | :param x: batch of images in data type of torch Tensor(N,C,W,H) of (70,3,224,224) 32 | :return: feature of images,in data type of torch Tensor(N,D) of [70,128] 33 | """ 34 | res_feature = self.base_model(x) 35 | res_feature = res_feature.view(res_feature.size(0),-1) 36 | embeding = self.dense(res_feature) 37 | embeding = F.normalize(embeding,p=2,dim=1) 38 | return embeding 39 | 40 | 41 | class Margin_Loss(nn.Module): 42 | """ the margin losss contain the distane weighted sampling and margin based loss, 43 | sampling and margin loss compute based on paper 'Sampling Matters in Deep Embedding Learning' """ 44 | def __init__(self,batch_k=5,margin=0.2,nu=0.0,cutoff=0.5,nonzero_loss_cutoff=1.4): 45 | """ 46 | this loss function receive batch of image_feature,then compute the distance weighted sampling loss 47 | :param batch_k: images count for every class 48 | :param margin: margin for alpha in paper 49 | :param nu: regularization parameter for beta 50 | """ 51 | super(Margin_Loss,self).__init__() 52 | self.margin = torch.tensor(margin,dtype=torch.float32) 53 | self.nu = torch.tensor(nu,dtype=torch.float32) 54 | self.batch_k = batch_k 55 | self.cutoff = cutoff # to cut for probbality 56 | self.nonzero_loss_cutoff = nonzero_loss_cutoff # to cut the distance upper bound 57 | self.relu1 = torch.nn.ReLU() 58 | self.relu2 = torch.nn.ReLU() 59 | 60 | def convert_param(self,to_cuda=True): 61 | """ 62 | convert parameter margin and nu coeff to cuda or to cpu 63 | :param to_cuda: 64 | :return: 65 | """ 66 | if to_cuda: 67 | self.margin = self.margin.cuda() 68 | self.nu = self.nu.cuda() 69 | else: 70 | self.margin = self.margin.cpu() 71 | self.nu = self.nu.cpu() 72 | 73 | 74 | 75 | def forward(self,x,y,beta_in): 76 | """ 77 | 78 | :param x: x is the feature extracted from resnet,data type torch.tensor,data.shape (n.d) typical (70,128) 79 | :param y: the label for each small class range from 0-200,so as the same dimension of beta_in 80 | :param beta_in: beta_in is a torch variable (tensor) with require_grad = True 81 | :return: the loss of beta_reg_loss and margin loss 82 | """ 83 | a_index,p_index,n_index = self.sampling(x) # so the corresponding anchor,postive and negitve has belong the distance weighted distribution 84 | beta_work = beta_in[a_index] # get the coeffient of the beta data 85 | beta_reg_loss = torch.sum(beta_work)*self.nu # loss batckward valid 86 | 87 | # compute margin loss from feature 88 | anchors = x[a_index] 89 | postives = x[p_index] 90 | negtives = x[n_index] 91 | d_ap = torch.sqrt(torch.sum((anchors - postives)*(anchors - postives),1)+1e-8) 92 | d_an = torch.sqrt(torch.sum((anchors - negtives)*(anchors - negtives),1)+1e-8) 93 | 94 | pos_loss = self.relu1(d_ap - beta_work + self.margin) 95 | neg_loss = self.relu2(beta_work - d_an + self.margin) 96 | pair_cnt = torch.sum((pos_loss>0.0) +(neg_loss>0.0)) 97 | 98 | # normalize based on the number of pairs 99 | loss = (torch.sum(pos_loss + neg_loss) + beta_reg_loss)/ pair_cnt.float()#pair_cnt.numpy()[0] 100 | return loss 101 | 102 | 103 | def sampling(self,x): 104 | """ 105 | sampling images pairs based on distance of each images 106 | :param x: x is the [N,128] tensor of the extracted features 107 | :return: anchors,postives,negtives 108 | """ 109 | np_feature = x.cpu().detach().numpy() 110 | k = self.batch_k 111 | n,d = np_feature.shape 112 | 113 | # compute distance 114 | dis_matrix = self.get_distance(np_feature) 115 | 116 | # cut off to avoid hight variance 117 | dis_matrix = np.maximum(dis_matrix,self.cutoff) 118 | 119 | log_weights = ((2.0 - float(d)) * np.log(dis_matrix) 120 | - (float(d-3)/2)*np.log(1.0-0.25*(dis_matrix**2))) 121 | #weights = np.exp(log_weights - log_weights.max(1).reshape(-1,1)) #log_weights-log_weights.max(1).reshape(-1,1), every line subtract the max weight number ,not the total number 122 | weights = np.exp(log_weights - log_weights.max()) 123 | mask = np.ones(weights.shape) 124 | for i in range(0,n,k): 125 | mask[i:i+k,i:i+k] = 0 # to set block in indentity line surrounding box is 0 126 | 127 | weights = weights * mask *(dis_matrix=2: 57 | self.classid2imageid[class_id]=group_image_id #one group must have more than 2 images 58 | self.image_nums = self.data.image_id.count() 59 | 60 | def __len__(self): 61 | """the lengh and data loader recycle size""" 62 | if self.train: 63 | return len(self.all_class) # 11318 64 | else: 65 | return self.image_nums 66 | 67 | def __getitem__(self, index): 68 | """get pair size pair data with index 69 | when using dataloader ,the batchsize is always 1 70 | for train model: 71 | the index is class_id ,so this will select a batch of different class type to construct a n-pair 72 | for test model: 73 | the index is image_id,so this will get one picture with it's image_id and class_id,the extracted feature will send to cluster 74 | """ 75 | if self.train: 76 | class_id = self.all_class[index] 77 | super_id = self.data[self.data.class_id==1].super_class_id[0] 78 | anchor_class=[] 79 | anchor_class.append(class_id) 80 | innder_count = int(0.9* self.batch_size//2) # image pair of different class in same super class 81 | 82 | inner_class = np.random.choice(self.super2class[super_id], innder_count, False) # in same super class choose most 83 | anchor_class.extend(inner_class) 84 | anchor_class = list(set(anchor_class))# duplicate repeate 85 | 86 | outer_count = self.batch_size//2 - len(anchor_class) 87 | outer_class = np.random.choice(self.super_ids,outer_count,True) 88 | for outer_id in outer_class: 89 | anchor_class.extend(np.random.choice(self.super2class[outer_id],1)) 90 | 91 | #from each anchor_class,select the anchor image and the postive image 92 | image_id =[] 93 | for anchor_id in anchor_class: 94 | select = np.random.choice(self.classid2imageid[anchor_id],2,False) 95 | image_id.extend(select) 96 | 97 | 98 | anchor_path = self.data[self.data.image_id.isin(image_id)][['image_id', 'path']] 99 | anchor_path.sort_index(0) # sort by the first colum index id 100 | # to stack image in a to construct to one bulk. first construct 32 image to a numpy ndarray, 101 | tensor_list=[] 102 | tensor_p=[] 103 | jump = False 104 | for i,image_path in enumerate(anchor_path.path): 105 | image = Image.open(os.path.join(self.root,image_path)).convert('RGB') 106 | if self.transform: 107 | data = self.transform(image) 108 | if i%2==0: 109 | if data.size(0)<3: # the anchor image channel not 3 110 | jump = True # jump the next image 111 | continue 112 | jump = False 113 | tensor_list.append(data) 114 | else: 115 | if jump: 116 | continue 117 | if data.size(0)<3: # the pair iamge channel not 3 118 | tensor_list.pop(-1) # delete the last one in tensor_list 119 | continue 120 | tensor_p.append(data) 121 | 122 | 123 | tensor_list.extend(tensor_p) 124 | #print("tensor dataset",len(tensor_list)) 125 | batch_tensor = torch.stack(tensor_list,dim=0) 126 | return batch_tensor 127 | else: 128 | item = self.data.loc[index] 129 | image_path = item['path'] 130 | image_id = int(item['image_id']) 131 | image_class= int(item['class_id']) 132 | default_path = self.data.loc[0]['path'] 133 | image = Image.open(os.path.join(self.root, image_path)).convert('RGB') 134 | if self.transform: 135 | data = self.transform(image) 136 | if data.size(0)<3: 137 | image = Image.open(os.path.join(self.root, default_path)) 138 | data = self.transform(image) 139 | image_id =0 140 | image_class =0 141 | return data,image_id,image_class 142 | 143 | 144 | 145 | if __name__=='__main__': 146 | """ to test the dataset""" 147 | import ipdb 148 | ipdb.set_trace() 149 | root = '/data/jh/notebooks/hudengjun/DML/deep_metric_learning/lib/online_products/Stanford_Online_Products/' 150 | dataset = EbayDataset(dir_root=root) 151 | data = dataset[0] 152 | print(type(data)) 153 | 154 | test_dataset = EbayDataset(dir_root=root,train=False) 155 | data = test_dataset[0] 156 | print(data) 157 | 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /train_mc_npair.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.optim as optim 3 | from data import EbayDataset 4 | import os 5 | from configs import opt 6 | from models import ModGoogLeNet,NpairLoss 7 | from torch.utils.data import DataLoader 8 | from tqdm import tqdm 9 | from utils import Visulizer 10 | import csv 11 | import numpy as np 12 | 13 | def train(**kwargs): 14 | print("run train") 15 | opt.parse(kwargs) 16 | os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.gpu_id) 17 | 18 | if opt.debug: 19 | import ipdb 20 | ipdb.set_trace() 21 | model =ModGoogLeNet(embeding_size=opt.embeding_size) 22 | if opt.dml_model_path: 23 | model.load(opt.dml_model_path) 24 | if opt.use_gpu: 25 | model = model.cuda() 26 | #model.freeze_model(level=opt.freeze_level) 27 | 28 | if opt.use_viz: 29 | viz = Visulizer(host=opt.vis_host,port=opt.vis_port,env='dml'+opt.vis_env) 30 | viz.log("start to train dml npair mc model") 31 | 32 | #loss function 33 | criterion = NpairLoss(l2_reg=opt.l2_reg) 34 | lr = opt.lr 35 | m = opt.momentum 36 | optimizer = optim.SGD([{'params':model.level1_2.parameters()}, 37 | {'params': model.level_3_4.parameters()}, 38 | {'params': model.level_5_6.parameters()}, 39 | {'params': model.level_7.parameters()}, 40 | {'params':model.fc.parameters(),'lr':10*lr}],lr=lr,momentum=m) 41 | #optimizer = optim.SGD(model.parameters(),lr=lr,momentum=m) 42 | lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,1,0.2) 43 | 44 | # data and dataloader 45 | train_data = EbayDataset(dir_root=opt.ebay_dir, train=True, batch_size=opt.batch_size) 46 | cycle_length = len(train_data) 47 | #val_data = EbayDataset(dir_root=opt.ebay_dir, train=False, batch_size=opt.batch_size) 48 | train_dataloader = DataLoader(train_data, batch_size=1, shuffle=True, num_workers=opt.num_workers) 49 | #val_dataloader = DataLoader(val_data, batch_size=60, shuffle=False, num_workers=opt.num_workers) 50 | 51 | print("dataloader setted ,begin to train") 52 | 53 | #f = open('dml_log.out','w') 54 | for epoch in range(opt.max_epoch): 55 | lr_scheduler.step() 56 | train_loss = 0 57 | 58 | for i,data in enumerate(train_dataloader): 59 | # if i in [200, 800, 1500]: 60 | # lr_scheduler.step() 61 | 62 | data = data[0] 63 | if opt.use_gpu: 64 | data = data.cuda() 65 | optimizer.zero_grad() 66 | feature = model(data) 67 | batch_size = data.size(0) 68 | target = torch.arange(0, int(batch_size / 2), dtype=torch.int64).cuda() 69 | loss = criterion(feature,target) 70 | loss.backward() 71 | optimizer.step() 72 | 73 | train_loss += loss.item() 74 | freq = int(opt.print_freq) 75 | if i%freq==(freq-1): 76 | average_loss = train_loss /opt.print_freq 77 | #f.write("iteration:{0},dml_loss:{1}\n".format(i+ epoch*cycle_length,average_loss)) 78 | #f.flush() 79 | if opt.use_viz: 80 | viz.plot('dml_loss',average_loss) 81 | train_loss =0 82 | if opt.debug: 83 | break 84 | #f.write("epoch:{0} finished,begin to valid test".format(epoch)) 85 | model.save() 86 | # if epoch>1 and epoch%5==0: 87 | # val(model,val_dataloader,epoch) 88 | if opt.debug: 89 | #f.write("finish one iter") 90 | break 91 | #f.write("finish train epoch {0}".format(opt.max_epoch)) 92 | #f.close() 93 | 94 | 95 | def val(model,dataloder,epoch): 96 | """ 97 | this val model will calculate the nmi index.normal mutual information 98 | :param model: the emebding model 99 | :param dataloder: val dataloder 100 | :return: 101 | """ 102 | # prepare file model to extract feature 103 | file_name = 'checkpoints/online_product_{0}.csv'.format(epoch) 104 | f = open(file_name,'w') 105 | writer = csv.writer(f,dialect='excel') 106 | model.eval() 107 | # feature extreat,fisrt for all image,image_id,class_id extract the feature vector 108 | for i,(data,image_id,class_id) in enumerate(dataloder): 109 | if opt.use_gpu: 110 | data = data.cuda() 111 | feature = model(data) # the feature is [batch,512] vector 112 | vector = feature.cpu().detach().numpy() if opt.use_gpu else feature.numpy() 113 | image_id = image_id.numpy().reshape(-1,1) 114 | class_id = class_id.numpy().reshape(-1,1) 115 | result = np.hstack(image_id,class_id,vector) 116 | 117 | #write the data to dataframe file 118 | writer.writerows(result) 119 | if opt.debug: 120 | print("test one batch of val data and save to csv file") 121 | break 122 | f.close() 123 | # clustering to centriod, second, for all image_id,and corresponding feature vector,using kmeans cluster to fixed |class_id| 124 | #featuredata = pd.read_csv(file_name,header=None) 125 | 126 | # for distribution from origin and cluster distribution.compute the nmi by sklearn metric nmi 127 | 128 | 129 | model.train() 130 | print("finished cluster and evalution") 131 | 132 | 133 | def compute(**kwargs): 134 | print("run compute_vector") 135 | opt.parse(kwargs) 136 | os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.gpu_id) 137 | 138 | if opt.debug: 139 | import ipdb 140 | ipdb.set_trace() 141 | model =ModGoogLeNet(embeding_size=opt.embeding_size) 142 | if opt.dml_model_path: 143 | model.load(opt.dml_model_path) 144 | if opt.use_gpu: 145 | model = model.cuda() 146 | 147 | val_data = EbayDataset(dir_root=opt.ebay_dir, train=False, batch_size=opt.batch_size) 148 | val_dataloader = DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) 149 | 150 | file_name = 'checkpoints/online_product_compute.csv' 151 | f = open(file_name, 'w') 152 | writer = csv.writer(f, dialect='excel') 153 | model.eval() 154 | # feature extreat,fisrt for all image,image_id,class_id extract the feature vector 155 | for i, (data, image_id, class_id) in enumerate(val_dataloader): 156 | if opt.use_gpu: 157 | data = data.cuda() 158 | feature = model(data) # the feature is [batch,512] vector 159 | vector = feature.cpu().detach().numpy() if opt.use_gpu else feature.numpy() 160 | image_id = image_id.numpy().reshape(-1, 1) 161 | class_id = class_id.numpy().reshape(-1, 1) 162 | result = np.hstack([image_id, class_id, vector]) 163 | 164 | # write the data to dataframe file 165 | writer.writerows(result) 166 | if opt.debug: 167 | print("test one batch of val data and save to csv file") 168 | break 169 | f.close() 170 | 171 | 172 | def help(): 173 | """print function use information""" 174 | print("""this file help to train product train: 175 | exanple --: 176 | python train_mc_npair.py help 177 | python train_mc_npair.py train --gpu_id=3 --debug=True 178 | python train_mc_npair.py train --gpu_id=2 --batch_size=72 179 | python train_mc_npair.py train --gpu_id=3 --lr=0.0003 --batch_size=72 180 | python train_mc_npair.py train --gpu_id=0 --debug=True --dml_model_path=checkpoints/DMLGoogle_0710_20\:24\:04.pth 181 | python train_mc_npair.py train --batch_size=120 --gpu_id=3 --lr=0.0001 --debug=True --dml_model_path=checkpoints/DMLGoogle_0710_20\:24\:04.pth 182 | python train_mc_npair.py compute --batch_size=300 --gpu_id=2 --dml_model_path=checkpoints/DMLGoogle_0714_07:51:44.pth --num_workers=6 183 | """) 184 | 185 | if __name__=='__main__': 186 | import fire 187 | fire.Fire() -------------------------------------------------------------------------------- /data/mxdata/online_products.py: -------------------------------------------------------------------------------- 1 | from mxnet.image import * 2 | from mxnet.gluon.data import Dataset,DataLoader 3 | from mxnet.image import * 4 | import numpy as np 5 | import mxnet as mx 6 | from mxnet.gluon import nn 7 | import mxnet.gluon.data.vision.transforms as T 8 | import pandas as pd 9 | 10 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) 11 | default_transform = T.Compose([ 12 | T.Resize(256), 13 | T.RandomResizedCrop(size=224,scale=(1.0,1.0),ratio=(1.0,1.0)),# just crop,not scale 14 | T.RandomFlipLeftRight(), 15 | T.ToTensor(), # last to swap channel to c,w,h 16 | normalize 17 | ]) 18 | 19 | test_transform = T.Compose([ 20 | T.Resize(256), 21 | T.CenterCrop(224), 22 | T.ToTensor(), 23 | normalize 24 | ]) 25 | 26 | class MxEbayInClass(Dataset): 27 | """this is an mxnet edition of Ebay dataset""" 28 | def __init__(self,dir_root,batch_k=4,batch_size=40,is_train=True,transform =default_transform): 29 | self.batch_size=batch_size 30 | self.batch_k = batch_k 31 | self.root = dir_root 32 | self._trans = transform 33 | self.is_train = is_train 34 | 35 | self.test_image_files =[] 36 | self.test_labels =[] 37 | self.train_length = 0 38 | 39 | if self.is_train: 40 | table_name = os.path.join(self.root,'Ebay_train.txt') 41 | table_data = pd.read_table(table_name, header=0, delim_whitespace=True) 42 | min_super_id, max_super_id = min(table_data.super_class_id), max(table_data.super_class_id) 43 | self.super_ids = np.arange(min_super_id, max_super_id + 1) 44 | self.super2class = {} 45 | for super_id in self.super_ids: 46 | self.super2class[super_id] = table_data[table_data.super_class_id == super_id].class_id.tolist() 47 | 48 | min_class_id,max_class_id = min(table_data.class_id),max(table_data.class_id) 49 | self.class_ids = list(np.arange(min_class_id,max_class_id+1)) 50 | self.train_length = max_class_id+1-min_class_id 51 | self.super_id_dist = [len(v) for k,v in self.super2class.items()] 52 | total = sum(self.super_id_dist) 53 | self.super_id_dist = [v*1.0/total for v in self.super_id_dist] 54 | self.class2imagefiless = [[]] # placeholder for class_id = 0 55 | for class_id in self.class_ids: 56 | one_class_paths = table_data[table_data.class_id==class_id].path.tolist() # type list 57 | self.class2imagefiless.append(one_class_paths) 58 | else: 59 | table_name = os.path.join(self.root,'Ebay_test.txt') 60 | table_data = pd.read_table(table_name,header=0,delim_whitespace=True) 61 | 62 | self.test_image_files = table_data.path.tolist() 63 | self.test_labels = table_data.class_id.tolist() 64 | 65 | 66 | 67 | def __len__(self): 68 | if self.is_train: 69 | return 800 70 | else: 71 | return 4000 72 | 73 | def sample_train_batch(self): 74 | batch =[] 75 | labels =[] 76 | num_groups = self.batch_size // self.batch_k # for every sample count k 77 | super_id = np.random.choice(list(self.super2class.keys()), size=1,p=self.super_id_dist)[0] # the super class id 78 | sampled_class = np.random.choice(self.super2class[super_id], num_groups*2, replace=False) 79 | for i in sampled_class: 80 | try: 81 | img_fnames = np.random.choice(self.class2imagefiless[i], 82 | self.batch_k, 83 | replace=False) 84 | except Exception as e: # just has not enough data to choose 85 | continue 86 | batch += img_fnames.tolist() 87 | labels += [i]*self.batch_k 88 | if len(batch)>=self.batch_size: 89 | break 90 | return batch,labels 91 | 92 | 93 | def __getitem__(self, index): 94 | """get data batch like pytorch, 95 | only smaple same super class_id,not cross sample""" 96 | if self.is_train: 97 | imagelist =[] 98 | batch,labels = self.sample_train_batch() 99 | for file in batch: 100 | file_path = os.path.join(self.root,file) 101 | img = image.imread(file_path,to_rgb=1,flag=1) 102 | img = self._trans(img) 103 | imagelist.append(img) 104 | return nd.stack(*imagelist,axis=0),nd.array(labels) 105 | else: 106 | file = self.test_image_files[index] 107 | label = self.test_labels[index] 108 | img = image.imread(os.path.join(self.root,file),flag=1,to_rgb=1) 109 | img = self._trans(img) 110 | return img,label 111 | 112 | 113 | 114 | def getEbayInClassData(root,batch_k,batch_size): 115 | train_dataset = MxEbayInClass(root,batch_k=batch_k,batch_size=batch_size,is_train=True,transform=default_transform) 116 | test_dataset = MxEbayInClass(root,batch_k=batch_k,batch_size=batch_size,is_train=False,transform=test_transform) 117 | train_loader = DataLoader(train_dataset,batch_size=1,shuffle=False,num_workers=6) 118 | test_loader = DataLoader(test_dataset,batch_size=test_dataset.batch_size,shuffle=False,num_workers=6) 119 | return train_loader,test_loader 120 | 121 | 122 | class MxEbayCrossClass(MxEbayInClass): 123 | """the cross class edition of StanfordOnlineProducts""" 124 | def __init__(self,dir_root,batch_k=4,batch_size=40,is_train=True,transform =default_transform): 125 | super(MxEbayCrossClass,self).__init__(dir_root=dir_root,batch_k=batch_k,batch_size=batch_size,is_train=is_train,transform=transform) 126 | self.datatype="CrossClass" 127 | 128 | def sample_train_batch(self): 129 | """rewrite the sample strategy""" 130 | batch = [] 131 | labels = [] 132 | num_groups = self.batch_size // self.batch_k # for every sample count k 133 | 134 | #directly choose the class_id 135 | sampled_class = np.random.choice(self.class_ids, num_groups * 2, replace=False) 136 | for i in sampled_class: 137 | try: 138 | img_fnames = np.random.choice(self.class2imagefiless[i], 139 | self.batch_k, 140 | replace=False) 141 | except: 142 | print("class id:{0},instance count small than {1}".format(i, self.batch_k)) 143 | continue 144 | batch += img_fnames.tolist() 145 | labels += [i] * self.batch_k 146 | if len(batch) >= self.batch_size: 147 | break 148 | return batch, labels 149 | 150 | def getEbayCrossClassData(root,batch_k,batch_size): 151 | train_dataset = MxEbayCrossClass(root, batch_k=batch_k, batch_size=batch_size, is_train=True, transform=default_transform) 152 | test_dataset = MxEbayCrossClass(root, batch_k=batch_k, batch_size=batch_size, is_train=False, transform=test_transform) 153 | train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=6) 154 | test_loader = DataLoader(test_dataset, batch_size=test_dataset.batch_size, shuffle=False, num_workers=6) 155 | return train_loader, test_loader 156 | 157 | if __name__=='__main__': 158 | # construct the dataset and get data in train and test mode 159 | 160 | train_data = MxEbayInClass(dir_root='data/Stanford_Online_Products',\ 161 | batch_k=4,batch_size=40,is_train=True,\ 162 | transform=default_transform) 163 | 164 | data = train_data[0] 165 | 166 | train_crossdata = MxEbayCrossClass(dir_root='data/Stanford_Online_Products',\ 167 | batch_k=4,batch_size=40,is_train=True,\ 168 | transform=default_transform) 169 | data2 = train_crossdata[0] 170 | import ipdb 171 | ipdb.set_trace() 172 | test_data = MxEbayInClass(dir_root='data/Stanford_Online_Products',\ 173 | batch_k=4,batch_size=40,is_train=False,\ 174 | transform=test_transform) 175 | data = test_data[0] 176 | 177 | 178 | 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /train_margin_cub.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import logging 4 | import numpy as np 5 | import torch 6 | 7 | from torch.utils.data import DataLoader 8 | import os 9 | from data import CUB200DataSet 10 | from models import Margin_Loss,SampleModel 11 | 12 | 13 | parser = argparse.ArgumentParser(description="train a margin based loss model") 14 | parser.add_argument('--data_path',type=str,default="data/cub200_2011", 15 | help='path of the cub_data') 16 | parser.add_argument('--embed_dim',type=int,default=128, 17 | help='dimensionality of image embeding,times of 8') 18 | parser.add_argument('--batch_size',type=int,default=70, 19 | help='training batch size per device') 20 | parser.add_argument('--batch_k',type=int,default=5, 21 | help='number of images per class in a batch,can be divided by batch_size') 22 | parser.add_argument('--gpu_id',type=str,default='0', 23 | help='the gpu_id of the runing batch') 24 | parser.add_argument('--epochs',type=int,default=100, 25 | help='number of training epochs,default is 100') 26 | parser.add_argument('--optimizer',type=str,default='adam', 27 | help='optimizer,default is adam') 28 | parser.add_argument('--lr',type=float,default=0.0001, 29 | help='learning rate of the resnet and dense layer') 30 | parser.add_argument('--lr_beta',type=float,default=0.1, 31 | help='learning rate for the beta in margin based loss') 32 | parser.add_argument('--margin',type=float,default=0.2, 33 | help='margin for the margin based loss,default is 0.2') 34 | parser.add_argument('--beta',type=float,default=1.2, 35 | help='the class specific beta parameter') 36 | parser.add_argument('--nu',type=float,default=0.0, 37 | help='regularization parameter for beta,default is 0') 38 | parser.add_argument('--steps',type=str,default='30,50,100,300', 39 | help='epochs to updata learning rate') 40 | parser.add_argument('--wd',type=float,default=0.0001, 41 | help='weight decay rate,default is 0.0001') 42 | parser.add_argument('--seed',type=int,default=123, 43 | help='random seed to use,default=123') 44 | parser.add_argument('--factor',type=float,default=0.5, 45 | help='learning rate schedule factor,default is 0.5') 46 | parser.add_argument('--print_freq',type=int,default=20, 47 | help='print the accumulate loss for training process') 48 | parser.add_argument('--debug',action='store_true',default=False) 49 | 50 | 51 | opt = parser.parse_args() 52 | logging.info(opt) 53 | torch.random.manual_seed(opt.seed) 54 | np.random.seed(opt.seed) 55 | batch_size = opt.batch_size 56 | os.environ['CUDA_VISIBLE_DEVICES']=opt.gpu_id 57 | steps = [int(step) for step in opt.steps.split(',')] 58 | 59 | 60 | def train(): 61 | """ 62 | train the margin based loss model 63 | :return: 64 | """ 65 | # prepare for data for loader 66 | train_data = CUB200DataSet(data_path='data/cub200_2011/CUB_200_2011',batch_k=opt.batch_k,batch_size = opt.batch_size,is_train=True) 67 | test_data = CUB200DataSet(data_path='data/cub200_2011/CUB_200_2011',is_train=False) 68 | 69 | train_loader = DataLoader(train_data,batch_size=1,shuffle=False,num_workers=6) 70 | test_loader = DataLoader(test_data,batch_size=60,shuffle=False,num_workers=6) 71 | 72 | #begin to set model loss,optimizer,lr_rate, lr_schedule 73 | model = SampleModel(embeding_dim=opt.embed_dim) 74 | beta = torch.tensor(np.ones(100)*opt.beta, requires_grad=True,dtype=torch.float32) 75 | 76 | loss_criterion = Margin_Loss(batch_k=opt.batch_k,\ 77 | margin=opt.margin,nu=opt.nu) # set loss function for this model 78 | 79 | conv_params = [] 80 | non_conv_param =[] 81 | for name,param in model.base_model.named_parameters(): 82 | if 'conv' in name: 83 | conv_params.append({'params':param,'lr':opt.lr*0.01}) 84 | else: 85 | non_conv_param.append({'params':param,'lr':opt.lr}) 86 | total_param =[] 87 | total_param.append({'params':model.dense.parameters(),'lr':opt.lr}) 88 | total_param.extend(conv_params) 89 | total_param.extend(non_conv_param) 90 | #optimizer = torch.optim.Adam(total_param,lr=opt.lr,weight_decay=opt.wd) 91 | optimizer = torch.optim.SGD(total_param,lr=opt.lr,momentum=0.89) 92 | optimizer_beta = torch.optim.SGD([{'params':beta}],lr=opt.lr_beta,momentum= 0.9) 93 | 94 | 95 | lr_schedule = torch.optim.lr_scheduler.MultiStepLR(optimizer, 96 | milestones=steps,gamma=opt.factor) 97 | 98 | if int(opt.gpu_id)>=0: 99 | model = model.cuda() # the loss function has paramter to convey to cuda 100 | beta = beta.cuda() # the beta parameter has parameter to stored in cuda 101 | loss_criterion = loss_criterion.cuda() # the loss criterion has compute in cuda 102 | loss_criterion.convert_param(to_cuda=True) 103 | 104 | # begin to fetch data and train model 105 | for epoch in range(opt.epochs): 106 | print("begin to train epochs:{0}",epoch) 107 | cumulative_loss =0 108 | prev_loss = 0 109 | lr_schedule.step() 110 | for i,data in enumerate(train_loader): 111 | images,label = data[0][0],data[1][0] 112 | if int(opt.gpu_id)>=0: 113 | images = images.cuda() 114 | label = label.cuda() 115 | features = model(images) 116 | loss = loss_criterion(features,label,beta) 117 | loss.backward() 118 | optimizer.step() 119 | optimizer_beta.step() 120 | cumulative_loss += loss.item() 121 | if (i+1)%(opt.print_freq)==0: 122 | print("[Epoch %d,Iter %d] training loss=%f"%(epoch,i+1,cumulative_loss-prev_loss)) 123 | prev_loss = cumulative_loss 124 | if opt.debug: 125 | break 126 | 127 | print("[Epoch %d] trainin loss =%f"%(epoch,cumulative_loss)) 128 | # print test val recall index 129 | names,val_accs = val_model(model,test_loader) 130 | for name,val_acc in zip(names,val_accs): 131 | print("Epoch %d,validation:%s=%f"%(epoch,name,val_acc)) 132 | print("job finished") 133 | 134 | 135 | def val_model(model,test_loader): 136 | """ 137 | val the model,return the recall@K k=1 index 138 | :param model: Margin based model to extract feature of 128 dimension 139 | :param test_loader: Test dataloader to load images data 140 | :return: the recall@K k=1 index 141 | """ 142 | model.eval() 143 | outputs = [] 144 | labels =[] 145 | with torch.no_grad(): 146 | for data,label in test_loader: 147 | if int(opt.gpu_id)>=0: 148 | data = data.cuda() 149 | feature = model(data) 150 | outputs += feature.detach().cpu().numpy().tolist() 151 | labels += label.numpy().tolist() 152 | model.train() 153 | 154 | #eval recall@k 155 | features = np.array(outputs) 156 | labels = np.array(labels) 157 | 158 | return evaluate_emb(features,labels) 159 | 160 | def evaluate_emb(features,labels): 161 | """ 162 | evaluate embedding in recall 163 | :param features: 164 | :param labels: 165 | :return: 166 | """ 167 | d_mat = get_distance_matrix(features) 168 | names =[] 169 | accs =[] 170 | for k in [1,2,4,8,16]: 171 | names.append('Recall@%d'%k) 172 | correct,cnt = 0.0,0.0 173 | for i in range(features.shape[0]): 174 | d_mat[i,i]=1e10 175 | nns = d_mat[i].argpartition(k)[:k] 176 | if any(labels[i] ==labels[nn] for nn in nns): 177 | correct +=1 178 | cnt +=1 179 | accs.append(correct/cnt) 180 | return names,accs # names is a list of ["Recall@K",,,,] accs is a list of [float_value] 181 | 182 | 183 | 184 | def get_distance_matrix(x): 185 | """ 186 | compute the distance matirx of features, 187 | :param x: np.ndarray in shape (n,d) d is 128 188 | :return: distance matrix of [n,n] for distance in each vector 189 | """ 190 | squrare = np.sum(x*x,axis=1,keepdims=True) 191 | distance_squrare = squrare + squrare.transpose() -2*np.dot(x,x.transpose()) 192 | return distance_squrare 193 | 194 | 195 | 196 | 197 | if __name__=='__main__': 198 | print("begin to train the model of margin based loss") 199 | train() 200 | -------------------------------------------------------------------------------- /server/copy_nn.py: -------------------------------------------------------------------------------- 1 | # import pymongo 2 | # import mxnet 3 | # from mxnet import nd 4 | # 5 | # #every time yield 20 items and read iobytes extract feature then insert to new nnindex 6 | # 7 | # import asyncio 8 | # import aiohttp 9 | # from io import BytesIO 10 | # import time 11 | # import requests 12 | # 13 | # 14 | # @asyncio.coroutine 15 | # def get_image(img_url): 16 | # resp = yield from requests.get(img_url) 17 | # return resp.content 18 | # 19 | # def save_image(img,fobj): 20 | # fobj.write(img) 21 | # 22 | # @asyncio.coroutine 23 | # def download_one(img_url,fobj): 24 | # image = yield from get_image(img_url) 25 | # save_image(image,fobj) 26 | 27 | # !/usr/bin/env python 28 | # import asyncio 29 | # import aiohttp 30 | # 31 | # async def fetch_img(session, url): 32 | # with aiohttp.Timeout(10): 33 | # async with session.get(url) as response: 34 | # assert response.status == 200 35 | # return await response.read() 36 | # 37 | # loop = asyncio.get_event_loop() 38 | # with aiohttp.ClientSession(loop=loop) as session: 39 | # img = loop.run_until_complete( 40 | # fetch_img(session, 'https://cdn.aidigger.com/images/instagram/f95f00da22a2e143e6e457b10544a120.jpeg')) 41 | # with open("img.png", "wb") as f: 42 | # f.write(img) 43 | 44 | # if __name__ == '__main__': 45 | # url_list = ['https://cdn.aidigger.com/images/instagram/e2452f9daaad3ef7070adb22ee70958a.jpeg', 46 | # 'https://cdn.aidigger.com/images/instagram/bd717eaa4c351b842a497e8907b69855.jpeg', 47 | # 'https://cdn.aidigger.com/images/instagram/189a2af5d9661500b32271ca9b1865be.jpeg', 48 | # 'https://cdn.aidigger.com/images/instagram/6e70c94dd3fac214c5d7e6c061df2b2f.jpeg', 49 | # 'https://cdn.aidigger.com/images/instagram/f95f00da22a2e143e6e457b10544a120.jpeg'] 50 | # fobj_list =[BytesIO() for _ in range(len(url_list))] 51 | # start = time.time() 52 | # loop = asyncio.get_event_loop() 53 | # to_do_tasks = [download_one(url,f) for url,f in zip(url_list,fobj_list)] 54 | # res,= loop.run_until_complete(asyncio.wait(to_do_tasks)) 55 | # print(len(res)) 56 | # print(time.time()-start) 57 | 58 | 59 | import asyncio 60 | import logging 61 | from contextlib import closing 62 | import aiohttp # $ pip install aiohttp 63 | from io import BytesIO 64 | from PIL import Image 65 | import numpy as np 66 | from pymongo import MongoClient 67 | from mxnet import nd 68 | import mxnet as mx 69 | import mxnet.gluon.data.vision.transforms as T 70 | import mxnet.gluon.model_zoo.vision as vision_model 71 | from models import MarginNet 72 | import mxnet 73 | from mxnet.image import imread 74 | 75 | logging.basicConfig(level=logging.WARNING, format='%(asctime)s %(message)s') 76 | import requests 77 | import json 78 | import binascii 79 | import numpy as np 80 | from pymongo import MongoClient 81 | from requests import ReadTimeout 82 | from pprint import pprint 83 | 84 | 85 | 86 | 87 | #image transform 88 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) 89 | test_transform = T.Compose([ 90 | T.Resize(256), 91 | T.CenterCrop(224), 92 | T.ToTensor(), 93 | normalize 94 | ]) 95 | 96 | # define mongodb connect 97 | def get_db(): 98 | mongdb={} 99 | mongdb['host']='cc.com' 100 | mongdb['port']=3717 101 | client=MongoClient(host=mongdb['host'],port=mongdb['port']) 102 | dev=client.get_database('dev') 103 | dev.authenticate(name='cc',password='cc') 104 | return dev 105 | 106 | 107 | @asyncio.coroutine 108 | def download(url, session, semaphore, chunk_size=1<<15): 109 | with (yield from semaphore): # limit number of concurrent downloads 110 | file = BytesIO() 111 | logging.info('downloading %s', file) 112 | response = yield from session.get(url) 113 | with closing(response): 114 | while True: # save file 115 | chunk = yield from response.content.read(chunk_size) 116 | if not chunk: 117 | break 118 | file.write(chunk) 119 | logging.info('done %s', file) 120 | return file, (response.status, tuple(response.headers.items())) 121 | 122 | def get_net(gpu_id): 123 | param_path = 'checkpoints/Fashion_In.params' 124 | base_net = vision_model.get_model('resnet50_v2') 125 | net = MarginNet(base_net.features, 128, batch_k=5) 126 | context = [mxnet.gpu(gpu_id)] 127 | net.initialize() 128 | net.collect_params().reset_ctx(context) 129 | net.load_parameters(filename=param_path, ctx=context[0]) 130 | return net,context 131 | 132 | def get_cursor(db,collection_name,batch_size): 133 | #define source nn_prod data fetch 134 | nn_prod = db.get_collection(collection_name) 135 | cursor = nn_prod.find({},{'vector':0,},batch_size=batch_size) 136 | return cursor 137 | 138 | def get_target_colection(db): 139 | colletion_name = 'image_metric_taobao128' 140 | target_collection = db.get_collection(colletion_name) 141 | return target_collection 142 | 143 | 144 | def convert_vector_to_ascii(vector): 145 | """convert a numpy array or a list to bytes, and to make it can be dumped by json, we convert the bytes to string 146 | """ 147 | if isinstance(vector, (list, np.ndarray, np.generic)): 148 | vector = np.asarray(vector, dtype=np.float32) 149 | else: 150 | raise ValueError("vector must be list or numpy array") 151 | # add decode to convert base64 bytes to string 152 | return binascii.b2a_base64(vector.tobytes()).decode() 153 | 154 | def get_nn_config(model_name ='image_metric_taobao128'): 155 | 156 | host = 'https://alpha-nnsearch.aidigger.com/api/v1/' 157 | path = 'model/'+model_name+'/' 158 | return host,path 159 | 160 | # begin to set basic paramter 161 | batch_size=20 162 | urls= [] 163 | records = [] 164 | db = get_db() 165 | cursor = get_cursor(db,'image_nn_prod',batch_size) 166 | net,context = get_net(0) 167 | host,path = get_nn_config('image_metric_taobao128') 168 | # set basic parameter finished 169 | 170 | targe_collection = get_target_colection(db) 171 | 172 | loop = asyncio.get_event_loop() 173 | session = aiohttp.ClientSession() 174 | semaphore = asyncio.Semaphore(20) 175 | 176 | for item in cursor: 177 | if len(urls)==batch_size: 178 | #process 179 | #with closing(asyncio.get_event_loop()) as loop, closing(aiohttp.ClientSession()) as session: 180 | try: 181 | download_tasks = (download(url, session, semaphore) for url in urls) 182 | result = loop.run_until_complete(asyncio.gather(*download_tasks)) 183 | except Exception as e: 184 | print(e) 185 | urls = [] 186 | records = [] 187 | continue 188 | 189 | nd_img_list = [] 190 | succeed_ids = [] 191 | docs = [] 192 | for i,(f_ret,rec) in enumerate(zip(result,records)): 193 | try: 194 | pil_img = Image.open(f_ret[0]) 195 | nd_img_list.append(test_transform(nd.array(np.asarray(pil_img)))) 196 | new_rec = {} 197 | new_rec['_id'] = rec['_id'] 198 | new_rec['_int_id'] = rec['int_id'] 199 | new_rec.update(rec['_source']) 200 | docs.append(new_rec) 201 | except Exception as e: 202 | print(urls[i]) 203 | print(e) 204 | 205 | 206 | #nd_img_list = [test_transform(nd.array(np.asarray(Image.open(f_ret[0])))) for f_ret in result ] 207 | if len(nd_img_list)!=len(records) or len(nd_img_list)< 2: 208 | if len(nd_img_list)<2: 209 | print(urls[0]) 210 | print("caution,failed to download all pictures") 211 | print(result[0][1][0],result[0][1][1]) 212 | 213 | records.clear() 214 | urls.clear() 215 | docs.clear() 216 | for f_ret in result: 217 | try: 218 | if not f_ret[0].closed: 219 | f_ret[0].close() 220 | except Exception as e: 221 | print(e) 222 | continue 223 | 224 | nd_tensor_img = nd.stack(*nd_img_list,axis=0) 225 | nd_tensor_img = nd_tensor_img.as_in_context(context[0]) 226 | data = net.extract(nd_tensor_img) 227 | data = data.asnumpy() 228 | 229 | 230 | 231 | doc_types =['image']*len(records) 232 | vectors = [convert_vector_to_ascii(v) for v in data ] 233 | 234 | ret = requests.post(host + path + "add/batch", json={"docs": docs, "doc_types": doc_types, "vectors": vectors}) 235 | print(ret.json()) 236 | 237 | #for annother loop 238 | doc_types=[] 239 | vectors =[] 240 | doc_types=[] 241 | records = [] 242 | urls=[] 243 | for f_ret in result: 244 | try: 245 | if not f_ret[0].closed: 246 | f_ret[0].close() 247 | except Exception as e: 248 | print(e) 249 | else: 250 | records.append(item) 251 | urls.append(item['_source']['cdn_url']) 252 | 253 | 254 | 255 | -------------------------------------------------------------------------------- /models/mx_margin_model.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | 19 | from mxnet import gluon 20 | from mxnet.gluon import nn, Block, HybridBlock 21 | import numpy as np 22 | 23 | class L2Normalization(HybridBlock): 24 | r"""Applies L2 Normalization to input. 25 | 26 | Parameters 27 | ---------- 28 | mode : str 29 | Mode of normalization. 30 | See :func:`~mxnet.ndarray.L2Normalization` for available choices. 31 | 32 | Inputs: 33 | - **data**: input tensor with arbitrary shape. 34 | 35 | Outputs: 36 | - **out**: output tensor with the same shape as `data`. 37 | """ 38 | def __init__(self, mode, **kwargs): 39 | self._mode = mode 40 | super(L2Normalization, self).__init__(**kwargs) 41 | 42 | def hybrid_forward(self, F, x): 43 | return F.L2Normalization(x, mode=self._mode, name='l2_norm') 44 | 45 | def __repr__(self): 46 | s = '{name}({_mode})' 47 | return s.format(name=self.__class__.__name__, 48 | **self.__dict__) 49 | 50 | 51 | def get_distance(F, x): 52 | """Helper function for margin-based loss. Return a distance matrix given a matrix.""" 53 | n = x.shape[0] 54 | 55 | square = F.sum(x ** 2.0, axis=1, keepdims=True) 56 | distance_square = square + square.transpose() - (2.0 * F.dot(x, x.transpose())) 57 | 58 | # Adding identity to make sqrt work. 59 | return F.sqrt(distance_square + F.array(np.identity(n))) 60 | 61 | class DistanceWeightedSampling(HybridBlock): 62 | r"""Distance weighted sampling. See "sampling matters in deep embedding learning" 63 | paper for details. 64 | 65 | Parameters 66 | ---------- 67 | batch_k : int 68 | Number of images per class. 69 | 70 | Inputs: 71 | - **data**: input tensor with shape (batch_size, embed_dim). 72 | Here we assume the consecutive batch_k examples are of the same class. 73 | For example, if batch_k = 5, the first 5 examples belong to the same class, 74 | 6th-10th examples belong to another class, etc. 75 | 76 | Outputs: 77 | - a_indices: indices of anchors. 78 | - x[a_indices]: sampled anchor embeddings. 79 | - x[p_indices]: sampled positive embeddings. 80 | - x[n_indices]: sampled negative embeddings. 81 | - x: embeddings of the input batch. 82 | """ 83 | def __init__(self, batch_k, cutoff=0.5, nonzero_loss_cutoff=1.4, **kwargs): 84 | self.batch_k = batch_k 85 | self.cutoff = cutoff 86 | 87 | # We sample only from negatives that induce a non-zero loss. 88 | # These are negatives with a distance < nonzero_loss_cutoff. 89 | # With a margin-based loss, nonzero_loss_cutoff == margin + beta. 90 | self.nonzero_loss_cutoff = nonzero_loss_cutoff 91 | super(DistanceWeightedSampling, self).__init__(**kwargs) 92 | 93 | def hybrid_forward(self, F, x): 94 | k = self.batch_k 95 | n, d = x.shape 96 | 97 | distance = get_distance(F, x) 98 | # Cut off to avoid high variance. 99 | distance = F.maximum(distance, self.cutoff) 100 | 101 | # Subtract max(log(distance)) for stability. 102 | log_weights = ((2.0 - float(d)) * F.log(distance) 103 | - (float(d - 3) / 2) * F.log(1.0 - 0.25 * (distance ** 2.0))) 104 | weights = F.exp(log_weights - F.max(log_weights)) 105 | 106 | # Sample only negative examples by setting weights of 107 | # the same-class examples to 0. 108 | mask = np.ones(weights.shape) 109 | for i in range(0, n, k): 110 | mask[i:i+k, i:i+k] = 0 111 | mask_uniform_probs = mask * (1.0/(n-k)) 112 | 113 | weights = weights * F.array(mask) * (distance < self.nonzero_loss_cutoff) 114 | weights_sum = F.sum(weights, axis=1, keepdims=True) 115 | weights = weights / weights_sum 116 | 117 | a_indices = [] 118 | p_indices = [] 119 | n_indices = [] 120 | 121 | np_weights = weights.asnumpy() 122 | for i in range(n): 123 | block_idx = i // k 124 | 125 | if weights_sum[i] != 0: 126 | n_indices += np.random.choice(n, k-1, p=np_weights[i]).tolist() 127 | else: 128 | # all samples are above the cutoff so we sample uniformly 129 | n_indices += np.random.choice(n, k-1, p=mask_uniform_probs[i]).tolist() 130 | for j in range(block_idx * k, (block_idx + 1) * k): 131 | if j != i: 132 | a_indices.append(i) 133 | p_indices.append(j) 134 | 135 | return a_indices, x[a_indices], x[p_indices], x[n_indices], x 136 | 137 | def __repr__(self): 138 | s = '{name}({batch_k})' 139 | return s.format(name=self.__class__.__name__, 140 | **self.__dict__) 141 | 142 | 143 | class MarginNet(Block): 144 | r"""Embedding network with distance weighted sampling. 145 | It takes a base CNN and adds an embedding layer and a 146 | sampling layer at the end. 147 | 148 | Parameters 149 | ---------- 150 | base_net : Block 151 | Base network. 152 | emb_dim : int 153 | Dimensionality of the embedding. 154 | batch_k : int 155 | Number of images per class in a batch. Used in sampling. 156 | 157 | Inputs: 158 | - **data**: input tensor with shape (batch_size, channels, width, height). 159 | Here we assume the consecutive batch_k images are of the same class. 160 | For example, if batch_k = 5, the first 5 images belong to the same class, 161 | 6th-10th images belong to another class, etc. 162 | 163 | Outputs: 164 | - The output of DistanceWeightedSampling. 165 | """ 166 | def __init__(self, base_net, emb_dim, batch_k=5, **kwargs): 167 | super(MarginNet, self).__init__(**kwargs) 168 | with self.name_scope(): 169 | self.base_net = base_net 170 | self.dense = nn.Dense(emb_dim) 171 | self.normalize = L2Normalization(mode='instance') 172 | self.sampled = DistanceWeightedSampling(batch_k=batch_k) 173 | 174 | def forward(self, x): 175 | z = self.base_net(x) 176 | z = self.dense(z) 177 | z = self.normalize(z) 178 | z = self.sampled(z) 179 | return z 180 | 181 | def extract(self,x): 182 | z = self.base_net(x) 183 | z = self.dense(z) 184 | z = self.normalize(z) 185 | return z # just return feature vector 186 | 187 | 188 | class MarginLoss(gluon.loss.Loss): 189 | r"""Margin based loss. 190 | 191 | Parameters 192 | ---------- 193 | margin : float 194 | Margin between positive and negative pairs. 195 | nu : float 196 | Regularization parameter for beta. 197 | 198 | Inputs: 199 | - anchors: sampled anchor embeddings. 200 | - positives: sampled positive embeddings. 201 | - negatives: sampled negative embeddings. 202 | - beta_in: class-specific betas. 203 | - a_indices: indices of anchors. Used to get class-specific beta. 204 | 205 | Outputs: 206 | - Loss. 207 | """ 208 | def __init__(self, margin=0.2, nu=0.0, weight=None, batch_axis=0, **kwargs): 209 | super(MarginLoss, self).__init__(weight, batch_axis, **kwargs) 210 | self._margin = margin 211 | self._nu = nu 212 | 213 | def hybrid_forward(self, F, anchors, positives, negatives, beta_in, a_indices=None): 214 | if a_indices is not None: 215 | # Jointly train class-specific beta. 216 | beta = beta_in.data()[a_indices] 217 | beta_reg_loss = F.sum(beta) * self._nu 218 | else: 219 | # Use a constant beta. 220 | beta = beta_in 221 | beta_reg_loss = 0.0 222 | 223 | d_ap = F.sqrt(F.sum(F.square(positives - anchors), axis=1) + 1e-8) 224 | d_an = F.sqrt(F.sum(F.square(negatives - anchors), axis=1) + 1e-8) 225 | 226 | pos_loss = F.maximum(d_ap - beta + self._margin, 0.0) 227 | neg_loss = F.maximum(beta - d_an + self._margin, 0.0) 228 | 229 | pair_cnt = F.sum((pos_loss > 0.0) + (neg_loss > 0.0)) 230 | if pair_cnt == 0.0: 231 | # When poss_loss and neg_loss is zero then total loss is zero as well 232 | loss = F.sum(pos_loss + neg_loss) 233 | else: 234 | # Normalize based on the number of pairs. 235 | loss = (F.sum(pos_loss + neg_loss) + beta_reg_loss) / pair_cnt 236 | return gluon.loss._apply_weighting(F, loss, self._weight, None) 237 | -------------------------------------------------------------------------------- /train_mx_margin.py: -------------------------------------------------------------------------------- 1 | # copy from mxnet/examples/gluon/emebding_learning example code 2 | 3 | from __future__ import division 4 | 5 | import argparse 6 | import logging 7 | import time 8 | 9 | import numpy as np 10 | from bottleneck import argpartition 11 | 12 | import mxnet as mx 13 | from data import cub200_iterator 14 | from mxnet import gluon 15 | from mxnet.gluon.model_zoo import vision as models 16 | from mxnet import autograd as ag, nd 17 | from models.mx_margin_model import MarginNet, MarginLoss 18 | 19 | logging.basicConfig(level=logging.INFO) 20 | 21 | # CLI 22 | parser = argparse.ArgumentParser(description='train a model for image classification.') 23 | parser.add_argument('--data-path', type=str, default='data/CUB_200_2011', 24 | help='path of data.') 25 | parser.add_argument('--embed-dim', type=int, default=128, 26 | help='dimensionality of image embedding. default is 128.') 27 | parser.add_argument('--batch-size', type=int, default=70, 28 | help='training batch size per device (CPU/GPU). default is 70.') 29 | parser.add_argument('--batch-k', type=int, default=5, 30 | help='number of images per class in a batch. default is 5.') 31 | parser.add_argument('--gpus', type=str, default='', 32 | help='list of gpus to use, e.g. 0 or 0,2,5. empty means using cpu.') 33 | parser.add_argument('--epochs', type=int, default=20, 34 | help='number of training epochs. default is 20.') 35 | parser.add_argument('--optimizer', type=str, default='adam', 36 | help='optimizer. default is adam.') 37 | parser.add_argument('--lr', type=float, default=0.0001, 38 | help='learning rate. default is 0.0001.') 39 | parser.add_argument('--lr-beta', type=float, default=0.1, 40 | help='learning rate for the beta in margin based loss. default is 0.1.') 41 | parser.add_argument('--margin', type=float, default=0.2, 42 | help='margin for the margin based loss. default is 0.2.') 43 | parser.add_argument('--beta', type=float, default=1.2, 44 | help='initial value for beta. default is 1.2.') 45 | parser.add_argument('--nu', type=float, default=0.0, 46 | help='regularization parameter for beta. default is 0.0.') 47 | parser.add_argument('--factor', type=float, default=0.5, 48 | help='learning rate schedule factor. default is 0.5.') 49 | parser.add_argument('--steps', type=str, default='12,14,16,18', 50 | help='epochs to update learning rate. default is 12,14,16,18.') 51 | parser.add_argument('--wd', type=float, default=0.0001, 52 | help='weight decay rate. default is 0.0001.') 53 | parser.add_argument('--seed', type=int, default=123, 54 | help='random seed to use. default=123.') 55 | parser.add_argument('--model', type=str, default='resnet50_v2', 56 | help='type of model to use. see vision_model for options.') 57 | parser.add_argument('--save-model-prefix', type=str, default='margin_loss_model', 58 | help='prefix of models to be saved.') 59 | parser.add_argument('--use_pretrained', action='store_true', 60 | help='enable using pretrained model from gluon.') 61 | parser.add_argument('--kvstore', type=str, default='device', 62 | help='kvstore to use for trainer.') 63 | parser.add_argument('--log-interval', type=int, default=20, 64 | help='number of batches to wait before logging.') 65 | opt = parser.parse_args() 66 | 67 | logging.info(opt) 68 | 69 | # Settings. 70 | mx.random.seed(opt.seed) 71 | np.random.seed(opt.seed) 72 | 73 | batch_size = opt.batch_size 74 | 75 | gpus = [] if opt.gpus is None or opt.gpus is '' else [ 76 | int(gpu) for gpu in opt.gpus.split(',')] 77 | num_gpus = len(gpus) 78 | 79 | batch_size *= max(1, num_gpus) 80 | context = [mx.gpu(i) for i in gpus] if num_gpus > 0 else [mx.cpu()] 81 | steps = [int(step) for step in opt.steps.split(',')] 82 | 83 | # Construct model. 84 | kwargs = {'ctx': context, 'pretrained': opt.use_pretrained} 85 | net = models.get_model(opt.model, **kwargs) 86 | 87 | if opt.use_pretrained: 88 | # Use a smaller learning rate for pre-trained convolutional layers. 89 | for v in net.collect_params().values(): 90 | if 'conv' in v.name: 91 | setattr(v, 'lr_mult', 0.01) 92 | 93 | net.hybridize() 94 | net = MarginNet(net.features, opt.embed_dim, opt.batch_k) 95 | beta = mx.gluon.Parameter('beta', shape=(100,)) 96 | 97 | # Get iterators. 98 | train_data, val_data = cub200_iterator(opt.data_path, opt.batch_k, batch_size, (3, 224, 224)) 99 | 100 | 101 | def get_distance_matrix(x): 102 | """Get distance matrix given a matrix. Used in testing.""" 103 | square = nd.sum(x ** 2.0, axis=1, keepdims=True) 104 | distance_square = square + square.transpose() - (2.0 * nd.dot(x, x.transpose())) 105 | return nd.sqrt(distance_square) 106 | 107 | 108 | def evaluate_emb(emb, labels): 109 | """Evaluate embeddings based on Recall@k.""" 110 | d_mat = get_distance_matrix(emb) 111 | d_mat = d_mat.asnumpy() 112 | labels = labels.asnumpy() 113 | 114 | names = [] 115 | accs = [] 116 | for k in [1, 2, 4, 8, 16]: 117 | names.append('Recall@%d' % k) 118 | correct, cnt = 0.0, 0.0 119 | for i in range(emb.shape[0]): 120 | d_mat[i, i] = 1e10 121 | nns = argpartition(d_mat[i], k)[:k] 122 | if any(labels[i] == labels[nn] for nn in nns): 123 | correct += 1 124 | cnt += 1 125 | accs.append(correct/cnt) 126 | return names, accs 127 | 128 | 129 | def test(ctx): 130 | """Test a model.""" 131 | val_data.reset() 132 | outputs = [] 133 | labels = [] 134 | for batch in val_data: 135 | data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) 136 | label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) 137 | for x in data: 138 | outputs.append(net(x)[-1]) 139 | labels += label 140 | 141 | outputs = nd.concatenate(outputs, axis=0)[:val_data.n_test] 142 | labels = nd.concatenate(labels, axis=0)[:val_data.n_test] 143 | return evaluate_emb(outputs, labels) 144 | 145 | 146 | def get_lr(lr, epoch, steps, factor): 147 | """Get learning rate based on schedule.""" 148 | for s in steps: 149 | if epoch >= s: 150 | lr *= factor 151 | return lr 152 | 153 | 154 | def train(epochs, ctx): 155 | """Training function.""" 156 | if isinstance(ctx, mx.Context): 157 | ctx = [ctx] 158 | net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx) 159 | 160 | opt_options = {'learning_rate': opt.lr, 'wd': opt.wd} 161 | if opt.optimizer == 'sgd': 162 | opt_options['momentum'] = 0.9 163 | if opt.optimizer == 'adam': 164 | opt_options['epsilon'] = 1e-7 165 | trainer = gluon.Trainer(net.collect_params(), opt.optimizer, 166 | opt_options, 167 | kvstore=opt.kvstore) 168 | if opt.lr_beta > 0.0: 169 | # Jointly train class-specific beta. 170 | # See "sampling matters in deep embedding learning" paper for details. 171 | beta.initialize(mx.init.Constant(opt.beta), ctx=ctx) 172 | trainer_beta = gluon.Trainer([beta], 'sgd', 173 | {'learning_rate': opt.lr_beta, 'momentum': 0.9}, 174 | kvstore=opt.kvstore) 175 | 176 | loss = MarginLoss(margin=opt.margin, nu=opt.nu) 177 | 178 | best_val = 0.0 179 | for epoch in range(epochs): 180 | tic = time.time() 181 | prev_loss, cumulative_loss = 0.0, 0.0 182 | 183 | # Learning rate schedule. 184 | trainer.set_learning_rate(get_lr(opt.lr, epoch, steps, opt.factor)) 185 | logging.info('Epoch %d learning rate=%f', epoch, trainer.learning_rate) 186 | if opt.lr_beta > 0.0: 187 | trainer_beta.set_learning_rate(get_lr(opt.lr_beta, epoch, steps, opt.factor)) 188 | logging.info('Epoch %d beta learning rate=%f', epoch, trainer_beta.learning_rate) 189 | 190 | # Inner training loop. 191 | for i in range(200): 192 | batch = train_data.next() 193 | data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) 194 | label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) 195 | 196 | Ls = [] 197 | with ag.record(): 198 | for x, y in zip(data, label): 199 | a_indices, anchors, positives, negatives, _ = net(x) 200 | 201 | if opt.lr_beta > 0.0: 202 | L = loss(anchors, positives, negatives, beta, y[a_indices]) 203 | else: 204 | L = loss(anchors, positives, negatives, opt.beta, None) 205 | 206 | # Store the loss and do backward after we have done forward 207 | # on all GPUs for better speed on multiple GPUs. 208 | Ls.append(L) 209 | cumulative_loss += nd.mean(L).asscalar() 210 | 211 | for L in Ls: 212 | L.backward() 213 | 214 | # Update. 215 | trainer.step(batch.data[0].shape[0]) 216 | if opt.lr_beta > 0.0: 217 | trainer_beta.step(batch.data[0].shape[0]) 218 | 219 | if (i+1) % opt.log_interval == 0: 220 | logging.info('[Epoch %d, Iter %d] training loss=%f' % ( 221 | epoch, i+1, cumulative_loss - prev_loss)) 222 | prev_loss = cumulative_loss 223 | 224 | logging.info('[Epoch %d] training loss=%f'%(epoch, cumulative_loss)) 225 | logging.info('[Epoch %d] time cost: %f'%(epoch, time.time()-tic)) 226 | 227 | names, val_accs = test(ctx) 228 | for name, val_acc in zip(names, val_accs): 229 | logging.info('[Epoch %d] validation: %s=%f'%(epoch, name, val_acc)) 230 | 231 | if val_accs[0] > best_val: 232 | best_val = val_accs[0] 233 | logging.info('Saving %s.' % opt.save_model_prefix) 234 | net.save_parameters('%s.params' % opt.save_model_prefix) 235 | return best_val 236 | 237 | 238 | if __name__ == '__main__': 239 | best_val_recall = train(opt.epochs, context) 240 | print('Best validation Recall@1: %.2f.' % best_val_recall) 241 | -------------------------------------------------------------------------------- /data/mxdata/deep_fashion.py: -------------------------------------------------------------------------------- 1 | print("program begin") 2 | from mxnet.gluon.data import DataLoader,Dataset 3 | from mxnet import nd 4 | from mxnet.image import imread 5 | import os 6 | import numpy as np 7 | import mxnet as mx 8 | import mxnet.gluon.data.vision.transforms as T 9 | from collections import Counter 10 | 11 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) 12 | default_transform = T.Compose([ 13 | T.Resize(256), 14 | T.RandomResizedCrop(size=224,scale=(1.0,1.0),ratio=(1.0,1.0)),# just crop,not scale 15 | T.RandomFlipLeftRight(), 16 | T.ToTensor(), # last to swap channel to c,w,h 17 | normalize 18 | ]) 19 | 20 | test_transform = T.Compose([ 21 | T.Resize(256), 22 | T.CenterCrop(224), 23 | T.ToTensor(), 24 | normalize 25 | ]) 26 | 27 | 28 | # like the 29 | class DeepInClassFashion(Dataset): 30 | """ 31 | the DeepInClassFashion dataset.read data from list_item_inshop.txt, 32 | 33 | """ 34 | def __init__(self,dir_root,batch_k=4,batch_size=80,is_train = True,transform = default_transform): 35 | self.root = dir_root 36 | self.batch_k = batch_k 37 | self.batch_size = batch_size 38 | self._transform = transform 39 | self.is_train = is_train 40 | self.train_ids = [] 41 | self.boxes = {} # a dictionary store {key:path,value:bbox} 42 | self.test_ids = set() # for super_type to subtract 43 | self.test_images2id=[]# a list to store[(path,id),(path,id)] 44 | with open(os.path.join(self.root,'Anno','list_item_inshop.txt'),'r') as f_instance: 45 | self.instance_count = int(f_instance.readline().strip()) 46 | #self.instance_ids = list(f_instance.readlines()) 47 | self.images_files = [ [] for _ in range(self.instance_count+1)] 48 | 49 | with open(os.path.join(self.root,'Anno','list_eval_partition.txt'),'r') as f_parti: 50 | f_parti.readline() # read pictures number 51 | f_parti.readline() # read information 52 | train_ids = [] # will use counter to duplicate checking 53 | for line in f_parti.readlines(): 54 | path,item_id,status = [ i for i in filter(lambda x:x is not '',line.strip().split(' '))] 55 | int_id = int(item_id.split('_')[-1]) 56 | path = str(path) 57 | if status == 'train': 58 | self.images_files[int_id].append(path) 59 | self.train_ids.append(int_id) 60 | else: 61 | self.test_images2id.append((path,int_id)) 62 | self.test_ids.add(int_id) 63 | # count train_ids and its distribution 64 | #post precessing for train_ids 65 | self.train_ids_list = list(set(self.train_ids)) 66 | count = Counter(self.train_ids) 67 | self.train_ids_count = np.array([count[int_id] for int_id in self.train_ids_list]) 68 | self.train_ids_dist = self.train_ids_count/sum(self.train_ids_count) 69 | 70 | with open(os.path.join(self.root,'Anno','list_bbox_inshop.txt'),'r') as f_bbox: 71 | f_bbox.readline() # read count 72 | f_bbox.readline() # read description 73 | for line in f_bbox.readlines(): 74 | list_info = line.strip().split(' ') 75 | path,box = str(list_info[0]),list_info[-4:] 76 | self.boxes[path]=[ i for i in map(lambda x:int(x),box)] # convert to int 77 | #read instance ,split set,bbox data 78 | 79 | # sub_list_test = self.images_files[list(self.test_ids)] 80 | # self.test_len = 0 81 | # for small_list in sub_list_test: 82 | # self.test_len += len(small_list) 83 | self.build_structure() 84 | 85 | 86 | def write_test_files(self): 87 | """write the test files and label id""" 88 | import csv 89 | f = open('checkpoints/fashion_test.txt','w') 90 | writer = csv.writer(f,dialect='excel') 91 | print(len(self.test_images2id)) 92 | print("begin to write") 93 | writer.writerows(self.test_images2id) 94 | f.close() 95 | 96 | 97 | 98 | def build_structure(self): 99 | """build the folder to id structure dataset, 100 | construct the super class structure to select""" 101 | print("the img_root:%s"%(self.root)) 102 | img_root = os.path.join(self.root,'img') 103 | self.super_types = {} # super_type2 ids{'men_shorts':[1,23,4,5]} 104 | for sexual in os.listdir(img_root): 105 | for clothe_type in os.listdir(os.path.join(self.root,'img',sexual)): 106 | ids = os.listdir(os.path.join(self.root,'img',sexual,clothe_type)) 107 | origin_ids = [int(instance_id.split('_')[-1]) for instance_id in ids] 108 | split_test = set(origin_ids) - self.test_ids 109 | self.super_types[sexual+'_'+clothe_type] = list(split_test) # after split to test 110 | self.super_type_list = list(self.super_types.keys()) 111 | 112 | self.super_type_count = np.array([len(self.super_types[k]) for k in self.super_types.keys()]) 113 | #containing classes count in a super type 114 | 115 | self.super_type_distri =self.super_type_count/sum(self.super_type_count) # the distribution ,assume every id instance has 4 or five images 116 | 117 | def __len__(self): 118 | if self.is_train: 119 | return 1000 120 | else: 121 | return len(self.test_images2id) # to many picture to valid 122 | 123 | def sampled_batch_data(self): 124 | """choose an super_types, 125 | then choose the batch with batch_k iamges with bbox crop""" 126 | #sample based on the distribution 127 | batch =[] 128 | labels =[] 129 | num_groups = self.batch_size //self.batch_k 130 | super_id = np.random.choice(self.super_type_list,size=1,replace=False,\ 131 | p=self.super_type_distri)[0] 132 | try: 133 | sampled_ids = np.random.choice(self.super_types[super_id],\ 134 | size=num_groups,replace=False) 135 | except Exception as e: 136 | sampled_ids = self.super_types[super_id] # type is list small than 25 137 | 138 | #the sampled_ids is like[1,2,5,45,23] in a super_type 139 | for i in sampled_ids: 140 | try: 141 | img_fname = np.random.choice( 142 | self.images_files[i], 143 | size=self.batch_k, 144 | replace=False 145 | ) 146 | except Exception as e: 147 | continue 148 | batch += img_fname.tolist() 149 | labels += [i]*self.batch_k 150 | return batch,labels # format like img/man/short/id_xxxx01/01_shorts.jpg 151 | 152 | def __getitem__(self, index): 153 | if self.is_train: 154 | imagelist = [] 155 | batch,labels = self.sampled_batch_data() 156 | for file in batch: 157 | file_path = os.path.join(self.root,file) 158 | image = imread(file_path,to_rgb=True,flag=1) 159 | if image.shape[2]==1: 160 | print("has gray file",file) 161 | image = nd.tile(image,(1,1,3)) 162 | box = self.boxes.get(file,[0,0,256,256]) 163 | image = image[box[1]:box[3],box[0]:box[2]] # crop image in width and height 164 | image = self._transform(image) 165 | imagelist.append(image) 166 | return nd.stack(*imagelist,axis=0),nd.array(labels) 167 | else: 168 | path,class_id = self.test_images2id[index] 169 | box = self.boxes.get(path, [0, 0, 256, 256]) # fetch path,id and box 170 | file_path = os.path.join(self.root,path) 171 | image = imread(file_path,to_rgb=True,flag=1) 172 | if image.shape[2]==1: 173 | image = nd.tile(image,(1,1,3)) 174 | 175 | image = image[box[1]:box[3], box[0]:box[2]] # crop test image 176 | image = self._transform(image) 177 | return image,class_id 178 | 179 | 180 | 181 | 182 | 183 | 184 | def getDeepInClassFashion(dir_root,batch_k,batch_size): 185 | """three main paramter dir,batch_k,batch_size""" 186 | train_data = DeepInClassFashion(dir_root=dir_root,batch_k=batch_k,batch_size=batch_size,is_train=True,\ 187 | transform=default_transform) 188 | test_data = DeepInClassFashion(dir_root=dir_root,batch_k=batch_k,batch_size=batch_size,is_train=False,\ 189 | transform=test_transform) 190 | train_loader = DataLoader(train_data,batch_size=1,shuffle=False,num_workers=6) 191 | test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=False,num_workers=6) 192 | return train_loader,test_loader 193 | 194 | 195 | class DeepCrossClassFashion(DeepInClassFashion): 196 | def __init__(self,dir_root,batch_k=4,batch_size=80,is_train = True,transform = default_transform): 197 | super(DeepCrossClassFashion,self).__init__(dir_root,batch_k,batch_size,is_train,transform) 198 | self.datatype='CrossClass' 199 | 200 | def sampled_batch_data(self): 201 | batch = [] 202 | labels = [] 203 | num_groups = self.batch_size//self.batch_k 204 | sampled_ids = np.random.choice(self.train_ids_list,size=num_groups,replace=False,p=self.train_ids_dist) 205 | for i in sampled_ids: 206 | try: 207 | img_fnames = np.random.choice(self.images_files[i],\ 208 | size=self.batch_k,replace=False) 209 | except Exception as e: 210 | continue 211 | batch += img_fnames.tolist() 212 | labels += [i]*self.batch_k 213 | return batch, labels 214 | 215 | def getDeepCrossClassFashion(dir_root,batch_k,batch_size): 216 | train_data = DeepCrossClassFashion(dir_root,batch_k,batch_size=batch_size,\ 217 | is_train=True,transform=default_transform) 218 | test_data = DeepCrossClassFashion(dir_root,batch_k=batch_k,batch_size=batch_size,\ 219 | is_train=True,transform=test_transform) 220 | train_loader = DataLoader(train_data,batch_size=1,shuffle=False,num_workers=6) 221 | test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=False,num_workers=6) 222 | return train_loader,test_loader 223 | 224 | 225 | if __name__ == '__main__': 226 | train_data = DeepInClassFashion(dir_root='data/DeepInShop',batch_k=4,batch_size=80,is_train=True,\ 227 | transform=default_transform) 228 | # test_data = DeepCrossClassFashion(dir_root='data/DeepInShop',batch_k=4,batch_size=80,is_train=False,\ 229 | # transform=test_transform) 230 | # 231 | # data = train_data[0] 232 | # print('train data x shape',data[0].shape,'training data y shape ',data[1].shape) 233 | # data = test_data[0] 234 | # print('test data x shape',data[0].shape,'training data y shape',data[1]) 235 | train_data.write_test_files() 236 | 237 | 238 | 239 | -------------------------------------------------------------------------------- /README.MD: -------------------------------------------------------------------------------- 1 | # Deep Embedding Learning for Image Retrieval 2 | --- 3 | # Deep Embedding Introduction 4 | DeepEmbedding 是使用深度学习的方法把多种媒体映射嵌入到相同向量空间,在统一空间中进行搜索的技术。 5 | 本项目通过视觉级别搜索,细粒度类别(实例检索)和图像-文本互搜的方式来测试通用多媒体检索。 6 | # 图像检索问题的一般解法 7 | DeepEmbedding旨在使用深度度量学习(DeepMetric)或者深度哈希(DeepHash)的方法学习关系保持的空间映射函数,能将视觉空间映射到低维嵌入空间,使用向量搜索引擎进行搜索.第一个问题为特征提取问题,即为本实验研究的问题,第二个问题为特征搜索问题.第二个问题解决方案可参见ANNS(近似邻近搜索)[NNSearchService](https://github.com/EigenLab/NNSearchService) 。 8 | ## Note 9 | - 本项目实现了基于Multi-npair-loss的度量学习应用于检索和基于Sampling margin loss的方法进行检索 10 | - 具体复现参见论文Triplet loss[FaceNet: ](http://arxiv.org/abs/1503.03832) 11 | [N-pair loss](http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf) 12 | [Margin lossSampling Matters in Deep Emebedding Learning](https://www.cs.utexas.edu/~cywu/projects/sampling_matters/) 13 | [BatchHard](https://arxiv.org/abs/1703.07737) 14 | 15 | # 实验结果 16 | (可点击下载百度网盘链接,查看图片) 17 | - 在StanfordOnlineProduct训练,计算NMI聚类指标 nmi=0.866,对验证集的向量嵌入进行T-SNE降维后可以看出,降维图像约 43M,百度网盘链接在下: 18 | - [margin_based loss :DeepFashion](https://pan.baidu.com/s/1zLZX24qBb_Op1vsry4LX6w)https://pan.baidu.com/s/1zLZX24qBb_Op1vsry4LX6w 19 | 计算指标:nmi=0.866 20 | - [Mc-n-pair loss:StanfordOnlineProduct](https://pan.baidu.com/s/12eNTVsRFu--SYMW8P8HPfQ)https://pan.baidu.com/s/12eNTVsRFu--SYMW8P8HPfQ 21 | 计算指标:nmi=0.830 22 | 23 | 24 | # 关于本项目的使用 25 | 1.下载相应的数据集 26 | 2.采用不同的loss类型对模型进行训练 27 | run train cub200 model 28 | ```angular2html 29 | nohup python train_mx_ebay_margin.py --gpus=1 --batch-k=5 --use_viz --epochs=30 --use_pretrained --steps=12,16,20,24 --name=CUB_200_2011 --save-model-prefix=cub200 > mycub200.out 2>&1 & 30 | ``` 31 | run train stanford_online_product 32 | ```angular2html 33 | nohup python train_mx_ebay_margin.py --batch-k=2 --batch-size=80 --use_pretrained --use_viz --gpus=0 --name=Inclass_ebay --data=EbayInClass --save-model-prefix=ebayinclass > mytraininclass_ebay.log 2>&1 34 | ``` 35 | 36 | 37 | 3.后续工作: 38 | - 测试R-MAC NetVLAD等网络在视觉检索中的效果和评测 39 | - 测试使用GAN的方法增强检索效果 40 | ```angularjs 41 | 42 | __Deep Adversarial Metric Learning__ 43 | Deep Metric learning cannot get full used the easy negative examples,to in [Deep Adversarial Metirc Learning](http://openaccess.thecvf.com/content_cvpr_2018/papers/Duan_Deep_Adversarial_Metric_CVPR_2018_paper.pdf) proposal a new framework called DAML 44 | __DeepMetric and Deep Hashing Apply__ 45 | apply method to Fashion,vehicle and Person-ReID domain 46 | __Construct a datasets__ crawl application-domain data 47 | ``` 48 | # Dataset 49 | 50 | [CUB200_2011](http://www.vision.caltech.edu/visipedia/CUB-200.html): A small part of ImageNet 51 | [LFW](http://vis-www.cs.umass.edu/lfw/):face dataset 52 | [StanfordOnlineProducts](http://cvgl.stanford.edu/projects/lifted_struct/): a lot of types of products(furniture,bicycle,cups) 53 | [Street2Shop](http://www.tamaraberg.com/street2shop/):products data set from ebay 54 | [DeepFashion](http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion.html):all are colthes 55 | 56 | # 图像检索的应用 57 | - Face indentification: deep metric learning for face cluster,from [FaceNet](http://arxiv.org/abs/1503.03832) to [SphereFace](http://ieeexplore.ieee.org/document/8100196/) 58 | - Person ReIdentification:deep metric learning for Pedestrian Re-ID,from [MARS](https://pdfs.semanticscholar.org/c038/7e788a52f10bf35d4d50659cfa515d89fbec.pdf) to [NPSM&SVDNet](https://blog.csdn.net/u013982164/article/details/79608100) 59 | - Vehicle Search:deep metric learning for fake-licensed car or Vehicle retrieval. 60 | - Street2Products:search fashion clothe from street photos or in-shop photos,namely visual-search.from [DeepRanking](https://users.eecs.northwestern.edu/~jwa368/pdfs/deep_ranking.pdf) to [DAML](http://openaccess.thecvf.com/content_cvpr_2018/papers/Duan_Deep_Adversarial_Metric_CVPR_2018_paper.pdf) 61 | ## Deep Metric Learning mile-stone paper: 62 | 1.[DrLIM:Dimensionality Reduction by Learning an Invariant Mapping](http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf) 63 | 2.[DeepRanking:Learning fine-graied Image Similarity with DeepRanking](https://users.eecs.northwestern.edu/~jwa368/pdfs/deep_ranking.pdf) 64 | 3.[DeepID2:Deep Learning Face Representation by Joint Identification-Verification](http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf) 65 | 4.[FaceNet:FaceNet: A Unified Embedding for Face Recognition and Clustering](http://arxiv.org/abs/1503.03832) 66 | 5.[Defense:In Defense of the Triplet Loss for Person Re-Identification](http://arxiv.org/abs/1703.07737) 67 | 6.[N-pair:Improved Deep Metric Learning with Multi-class N-pair Loss Objective](http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf) 68 | 7.[Sampling:Sampling Matters in Deep Embedding Learning](https://arxiv.org/abs/1706.07567) 69 | 8.[DAML:Deep Adversarial Metric Learning](http://openaccess.thecvf.com/content_cvpr_2018/papers/Duan_Deep_Adversarial_Metric_CVPR_2018_paper.pdf) 70 | 9.[SphereFace:Deep Hypersphere Embedding for Face Recognition](http://ieeexplore.ieee.org/document/8100196/) 71 | 72 | 73 | # 部分DeepHash的工作 74 | DeepHash能将图片直接哈希到汉明码,使用faiss的IVF Binary 系列搜索加速,通过存储量大大减少。 75 | 76 | ## ReImplementation of HashNet 77 | ```angular2html 78 | python train_hash.py --params 79 | ``` 80 | 81 | 82 | ## Deep Hash Learning mile-stone paper: 83 | 1.[CNNH:Supervised Hashing for Image Retrieval via Image Representation Learning](https://www.aaai.org/ocs/index.php/AAAI/AAAI14/paper/view/8137/8861) 84 | 2.[DNNH:Simultaneous feature learning and hash coding with deep neural networks](http://ieeexplore.ieee.org/document/7298947/) 85 | 3.[DLBHC:Deep Learning of Binary Hash Codes for Fast Image Retrieval](http://www.iis.sinica.edu.tw/~kevinlin311.tw/cvprw15.pdf) 86 | 4.[DSH:Deep Supervised Hashing for Fast Image Retrieval](https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Liu_Deep_Supervised_Hashing_CVPR_2016_paper.pdf) 87 | 5.[SUBIC:SuBiC: A Supervised, Structured Binary Code for Image Search](http://ieeexplore.ieee.org/document/8237358/) 88 | 6.[HashNet:HashNet:Deep Learning to Hash by Continuous](https://arxiv.org/abs/1702.00758) 89 | 7.[DCH:Deep Cauchy Hashing for Hamming Space Retrieval](http://openaccess.thecvf.com/content_cvpr_2018/html/Cao_Deep_Cauchy_Hashing_CVPR_2018_paper.html) 90 | 91 | 92 | ## 视觉和文本共同嵌入 Visual-semantic-align embedding(cross modal retrieval) 93 | 1.[VSE++: Improving Visual-Semantic Embeddings with Hard Negatives](http://arxiv.org/abs/1707.05612) 94 | 2.[Dual-Path Convolutional Image-Text Embedding with Instance Loss](http://arxiv.org/abs/1711.05535) 95 | ```bash 96 | python train_vse.py --params 97 | ``` 98 | 99 | ## 其他类型的搜索方式 100 | 1.Sketch based [Deep Sketch Hashing: Fast Free-hand Sketch-Based Image Retrieval](https://github.com/ymcidence/DeepSketchHashing) 101 | 2.Text cross modal based [Deep Cross-Modal Hashing](https://github.com/jiangqy/DCMH-CVPR2017) 102 | 103 | ## 近似近邻搜索加速 104 | ANNS (Approximation Nearest Neighbor Search) to search a query vector in gallery database. 105 | 测试数据集 106 | - [SIFT1M](http://corpus-texmex.irisa.fr/) typical 128-dim sift vector 107 | - [DEEP1B](http://sites.skoltech.ru/compvision/noimi/),proposed by yandex.inc,this is a deep descriptor 108 | - [GIST1M](http://corpus-texmex.irisa.fr/) typical 512-dim gist vector 109 | 110 | ### papers 111 | -- PQ based 112 | 1.将传统的标量量化,转成分段乘积量化[Product Quantization for Nearest Neighbor Search](http://ieeexplore.ieee.org/document/5432202/) 113 | 2.类似于Cartisian Quantization,将向量整体进行旋转,使得聚类的分段坐标轴和向量对齐,聚类中心点和数据之间的重建误差小,压缩损失就小[Optimized Product Quantization](http://ieeexplore.ieee.org/document/6678503/) 114 | 115 | 3.[Revisiting the Inverted Indices for Billion-Scale Approximate Nearest Neighbors](http://arxiv.org/abs/1802.02422)提出使用anchor-point也即line-quantizition点来切分区域group,在搜索是pruning部分区域加速。和RobustiQ这篇论文撞衫。该论文中提出在一层中建立HNSW加速candidates center-point搜索选取。 116 | 4.粗量化使用双倒排,可以降低聚类维度,增加聚类中心点,使用Multi-Sequence算法提高粗略命中速度[The Inverted MultiIndex](http://cache-ash04.cdn.yandex.net/download.yandex.ru/company/cvpr2012.pdf) 117 | 5.多义码,将汉明码距离和量化中心点的距离建立映射关系,对entry-point有过滤作用 [Polysemous codes](https://arxiv.org/abs/1609.01882) 118 | 6.Additive Quantization 两篇论文:Additive Quantization for Extreme Vector Compression.使用稀疏加量化,量化误差更小,但需要额外存储与计算。 119 | 7.Composite Quantization.和上述Additive Quantizition 撞衫,额外提出NOCQ作为APQ替代,加速计算。 120 | 8.RobustiQ:A Robust ANN Search Method for Billion-scale Similarity Search on GPUs. 和第3篇论文很像,在传统IVF上添加了Line Quantization,也叫分组剪枝点,提升PQ搜索指标。 121 | 122 | -- 索引分层: 123 | 1.Zoom:Multi-View Vector Search for Optimizing Accuracy, Latency and Memory 使用全量数据k-means聚类,大量百万级中心点建立第一层导航图HNSW来代替以前的IVF,第二层使用量化加速计算排序,不限于PQ或者APQ,第三层全精度计算排序。这种典型的三层索引方式可以使得每层能采用组件替换的方式来优化索引。 124 | 2.Pyramid:A General Framwork for Distributed Similarity Search.香港中文大学郑尚策实验室的,对比了naive-HNSW分图和metaHNSW-subHNSW两种分图方式对搜索效果影响。使用两层HNSW进行搜索。使用meta-HNSW的上层图解决了跨机器搜索的问题。该方法相对于简易切分naive-HNSW建图增加了meta-HNSW建图,全量数据点查询,分块构建三部分时间,建图比较慢。但搜索过程中meta-HNSW起到了哈希的方法,比naive-HNSW全集群请求再合并的吞吐量大不少。 125 | 3.索引分层和图优化,量化优化结合在一起,可以进行索引改进设计。 126 | 127 | -- 子集索引 128 | 1.Reconfigurable Inverted Index:旨在解决搜索过程中既有向量近似需求,也有标签过滤需求的搜索场景。文中提出了subset search 问题,并给出了两种解决方案。虽然已经相当巧妙,但未看出来如何解决十亿级别数据如何解决。 129 | 130 | 131 | 132 | -- Graph Based 133 | 1.可参见NSW 浏览小世界 论文,结合跳表结构构造的索引[Approximate nearest neighbor algorithm based on navigable small world graphs](https://linkinghub.elsevier.com/retrieve/pii/S0306437913001300) 134 | 2.[Efficient and robust approximate nearest neighbor search using hierarchical Navigable Small World graphs](http://arxiv.org/abs/1603.09320) 135 | 7.[EFANNA : An Extremely Fast Approximate Nearest Neighbor Search Algorithm Based on kNN Graph](http://arxiv.org/abs/1609.07228) 136 | 8.[A Revisit on Deep Hashings for Large-scale Content Based Image Retrieval](http://arxiv.org/abs/1711.06016) 137 | 9.[RobustiQ A Robust ANN Search Method for Billion-scale Similarity Search on GPUs](http://users.monash.edu/~yli/assets/pdf/icmr19-sigconf.pdf) 138 | 10.[GGNN: Graph-based GPU Nearest Neighbor Search](https://arxiv.org/pdf/1912.01059.pdf) 139 | 11.[Zoom: Multi-View Vector Search for Optimizing Accuracy, Latency and Memory](https://www.microsoft.com/en-us/research/uploads/prod/2018/08/zoom-multi-view-tech-report.pdf) 140 | 12. [Vector and Line Quantization for Billion-scale Similarity Search on GPUs](https://arxiv.org/pdf/1901.00275.pdf) 141 | 13.[DiskANN:Fast Accurate Billion-point Nearest Neighbor Search on a Single Node](https://suhasjs.github.io/files/diskann_neurips19.pdf) 。提出vamana-Graph 解决了在单机建图解决十亿向量搜素问题。 142 | 143 | -- Hamming Code 144 | 1.[Fast Exact Search in Hamming Space with Multi-Index Hashing](http://arxiv.org/abs/1307.2982) 145 | 2.[Fast Nearest Neighbor Search in the Hamming Space](http://link.springer.com/10.1007/978-3-319-27671-7_27) 146 | 4.[Web-Scale Responsive Visual Search at Bing](http://arxiv.org/abs/1802.04914) 147 | 5.[Recurrent Binary Embedding for GPU-Enabled Exhaustive Retrieval from Billion-Scale Semantic Vectors](http://arxiv.org/abs/1802.06466) 148 | ### library 149 | 1.[faiss] 当前faiss包含IVF,IMI,PQ,OPQ,PCA,二级残差量化ReRank-PQ,HNSW,Link and Code 等各种类型的索引引擎 150 | 2.索引格式选择:高容量,低精度 IMI+OPQ+reRank 151 | 高精度,选择HNSW,当前NSG索引不支持增量插入,没有采用 152 | 3.sptag,基于空间切分建图的方式 153 | ### framework 154 | 1.vearch :by JingDong AI. 和深度学习模型深度贴合 155 | 2.milvus :by zilliz. 以数据库的思路来做的 156 | 157 | 158 | -------------------------------------------------------------------------------- /train_mx_ebay_margin.py: -------------------------------------------------------------------------------- 1 | # mxnet train ebay dataset,model copy from mxnet example deep learning emebding 2 | 3 | import argparse 4 | import logging 5 | import time 6 | import numpy as np 7 | 8 | from bottleneck import argpartition 9 | import mxnet as mx 10 | from data import getCUB200,getEbayCrossClassData,getEbayInClassData 11 | from data import getDeepInClassFashion,getDeepCrossClassFashion 12 | import os 13 | from mxnet import gluon 14 | import mxnet.gluon.model_zoo.vision as vision 15 | from mxnet import autograd as ag 16 | from mxnet import nd 17 | from models.mx_margin_model import MarginLoss,MarginNet 18 | from utils import Visulizer 19 | from configs import opt as opt_conf 20 | import ipdb 21 | from tqdm import tqdm 22 | logging.basicConfig(level=logging.INFO) 23 | 24 | parser = argparse.ArgumentParser(description='train a model for image classification.') 25 | parser.add_argument('--data', type=str, default='CUB_200_2011', 26 | help='path of data.') 27 | parser.add_argument('--embed-dim', type=int, default=128, 28 | help='dimensionality of image embedding. default is 128.') 29 | parser.add_argument('--batch-size', type=int, default=70, 30 | help='training batch size per device (CPU/GPU). default is 70.') 31 | parser.add_argument('--batch-k', type=int, default=5, 32 | help='number of images per class in a batch. default is 5.') 33 | parser.add_argument('--gpus', type=str, default='', 34 | help='list of gpus to use, e.g. 0 or 0,2,5. empty means using cpu.') 35 | parser.add_argument('--epochs', type=int, default=20, 36 | help='number of training epochs. default is 20.') 37 | parser.add_argument('--optimizer', type=str, default='adam', 38 | help='optimizer. default is adam.') 39 | parser.add_argument('--lr', type=float, default=0.0001, 40 | help='learning rate. default is 0.0001.') 41 | parser.add_argument('--lr-beta', type=float, default=0.1, 42 | help='learning rate for the beta in margin based loss. default is 0.1.') 43 | parser.add_argument('--margin', type=float, default=0.2, 44 | help='margin for the margin based loss. default is 0.2.') 45 | parser.add_argument('--beta', type=float, default=1.2, 46 | help='initial value for beta. default is 1.2.') 47 | parser.add_argument('--nu', type=float, default=0.0, 48 | help='regularization parameter for beta. default is 0.0.') 49 | parser.add_argument('--factor', type=float, default=0.5, 50 | help='learning rate schedule factor. default is 0.5.') 51 | parser.add_argument('--steps', type=str, default='12,14,16,18', 52 | help='epochs to update learning rate. default is 12,14,16,18.') 53 | parser.add_argument('--wd', type=float, default=0.0001, 54 | help='weight decay rate. default is 0.0001.') 55 | parser.add_argument('--seed', type=int, default=123, 56 | help='random seed to use. default=123.') 57 | parser.add_argument('--model', type=str, default='resnet50_v2', 58 | help='type of model to use. see vision_model for options.') 59 | parser.add_argument('--save-model-prefix', type=str, default='margin_loss_model', 60 | help='prefix of models to be saved.') 61 | parser.add_argument('--use_pretrained', action='store_true', 62 | help='enable using pretrained model from gluon.') 63 | parser.add_argument('--kvstore', type=str, default='device', 64 | help='kvstore to use for trainer.') 65 | parser.add_argument('--log-interval', type=int, default=20, 66 | help='number of batches to wait before logging.') 67 | parser.add_argument('--debug',action='store_true', 68 | help='enable debug to run through the model pipline') 69 | parser.add_argument('--use_viz',action='store_true', 70 | help='enable using visualization to vis the loss curve') 71 | parser.add_argument('--name',type=str,default='cub200', 72 | help='the train instance name') 73 | parser.add_argument('--load_model_path',type=str,default='checkpoints/Fashion_In.params', 74 | help='the trained model') 75 | 76 | opt = parser.parse_args() 77 | opt.save_model_prefix = opt.name # force save model prefix to name 78 | logging.info(opt) 79 | # Settings. 80 | mx.random.seed(opt.seed) 81 | np.random.seed(opt.seed) 82 | 83 | batch_size = opt.batch_size 84 | 85 | os.environ['CUDA_VISIBLE_DEVICES']='0,1,2,3' 86 | gpus = [] if opt.gpus is None or opt.gpus is '' else [ 87 | int(gpu) for gpu in opt.gpus.split(',')] 88 | num_gpus = len(gpus) 89 | 90 | batch_size *= max(1, num_gpus) 91 | context = [mx.gpu(i) for i in gpus] if num_gpus > 0 else [mx.cpu()] 92 | steps = [int(step) for step in opt.steps.split(',')] 93 | 94 | # Construct model. 95 | kwargs = {'ctx': context, 'pretrained': opt.use_pretrained} 96 | net = vision.get_model(opt.model, **kwargs) 97 | 98 | if opt.use_pretrained: 99 | # Use a smaller learning rate for pre-trained convolutional layers. 100 | for v in net.collect_params().values(): 101 | if 'conv' in v.name: 102 | setattr(v, 'lr_mult', 0.01) 103 | 104 | net.hybridize() 105 | net = MarginNet(net.features, opt.embed_dim, opt.batch_k) 106 | beta = mx.gluon.Parameter('beta', shape=(100000,)) 107 | data_dict={'CUB_200_2011':{'data_dir':'CUB_200_2011','func':getCUB200}, 108 | 'EbayInClass':{'data_dir':'Stanford_Online_Products','func':getEbayInClassData}, 109 | 'EbayCrossClass':{'data_dir':'Stanford_Online_Products','func':getEbayCrossClassData}, 110 | 'DeepFashionInClass':{'data_dir':'DeepInShop','func':getDeepInClassFashion}, 111 | 'DeepFashionCrossClass':{'data_dir':'DeepInShop','func':getDeepCrossClassFashion}} 112 | if opt.debug: 113 | ipdb.set_trace() 114 | train_dataloader,val_dataloader = data_dict[opt.data]['func'](os.path.join('data/',data_dict[opt.data]['data_dir']), 115 | batch_k=opt.batch_k,batch_size=opt.batch_size) 116 | # if opt.data=='Ebay': 117 | # train_dataloader,val_dataloader = getEbayData(os.path.join('data/',opt.data),batch_k=opt.batch_k,batch_size=batch_size ) 118 | # elif opt.data=='CUB_200_2011': 119 | # train_dataloader,val_dataloader = getCUB200(os.path.join('data/',opt.data),batch_k=opt.batch_k,batch_size=batch_size ) 120 | #train_dataloader has datashape [1,batch_size,channel,W,H] for image data,[1,batch_size,1] for label 121 | #test_dataloader has datashape [batch_size,channel,W,H] for image data,[batch_size,1] for label 122 | # use viz 123 | if opt.use_viz: 124 | viz = Visulizer(host=opt_conf.vis_host,port=opt_conf.vis_port,env='mx_margin'+opt.name) 125 | viz.log(str(opt)) 126 | viz.log("start to train mxnet marging model name:%s"%(opt.name)) 127 | 128 | def get_distance_matrix(x): 129 | """Get distance matrix given a matrix. Used in testing.""" 130 | if opt.use_viz: 131 | viz.log("begin to compute distance matrix") 132 | square = nd.sum(x ** 2.0, axis=1, keepdims=True) 133 | distance_square = square + square.transpose() - (2.0 * nd.dot(x, x.transpose())) 134 | return nd.sqrt(distance_square) 135 | 136 | def evaluate_emb(emb, labels): 137 | """Evaluate embeddings based on Recall@k.""" 138 | d_mat = get_distance_matrix(emb) 139 | #d_mat = d_mat.asnumpy() 140 | #labels = labels.asnumpy() #directory operate on mxnet.ndarray if convert to numpy,would cause memeory error 141 | 142 | names = [] 143 | accs = [] 144 | for i in range(emb.shape[0]): 145 | d_mat[i,i]=1e10 146 | index_mat = nd.argsort(d_mat) 147 | nd.waitall() 148 | if opt.use_viz: 149 | viz.log("nd all dist mat") 150 | for k in [1, 2, 4, 8, 16]: 151 | names.append('Recall@%d' % k) 152 | correct, cnt = 0.0, 0.0 153 | index_mat_part = index_mat[:,:k] 154 | for i in range(emb.shape[0]): 155 | if any(labels[i] == labels[nn] for nn in index_mat_part[i]): 156 | correct +=1 157 | cnt +=1 158 | # for i in range(emb.shape[0]): 159 | # d_mat[i, i] = 1e10 160 | # nns = argpartition(d_mat[i], k)[:k] 161 | # if any(labels[i] == labels[nn] for nn in nns): 162 | # correct += 1 163 | # cnt += 1 164 | accs.append(correct/cnt) 165 | return names, accs 166 | 167 | 168 | def test(ctx): 169 | """Test a model.""" 170 | if opt.use_viz: 171 | viz.log("begin to valid") 172 | 173 | outputs = [] 174 | labels = [] 175 | for i,batch in enumerate(val_dataloader): 176 | data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) 177 | label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) 178 | # after split data is list of two data batch 179 | for x in data: 180 | outputs.append(net(x)[-1]) 181 | labels +=label 182 | if (i+1)%(opt.log_interval*2) ==0: 183 | viz.log("valid iter {0}".format(i)) 184 | outputs = nd.concatenate(outputs, axis=0) 185 | labels = nd.concatenate(labels, axis=0) 186 | viz.log("begin to eval embedding search") 187 | return evaluate_emb(outputs, labels) 188 | 189 | def get_lr(lr, epoch, steps, factor): 190 | """Get learning rate based on schedule.""" 191 | for s in steps: 192 | if epoch >= s: 193 | lr *= factor 194 | return lr 195 | 196 | def train(epochs,ctx): 197 | """Training function.""" 198 | if isinstance(ctx, mx.Context): 199 | ctx = [ctx] 200 | net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx) 201 | 202 | opt_options = {'learning_rate': opt.lr, 'wd': opt.wd} 203 | if opt.optimizer == 'sgd': 204 | opt_options['momentum'] = 0.9 205 | if opt.optimizer == 'adam': 206 | opt_options['epsilon'] = 1e-7 207 | trainer = gluon.Trainer(net.collect_params(), opt.optimizer, 208 | opt_options, 209 | kvstore=opt.kvstore) 210 | if opt.lr_beta > 0.0: 211 | # Jointly train class-specific beta. 212 | # See "sampling matters in deep embedding learning" paper for details. 213 | beta.initialize(mx.init.Constant(opt.beta), ctx=ctx) 214 | trainer_beta = gluon.Trainer([beta], 'sgd', 215 | {'learning_rate': opt.lr_beta, 'momentum': 0.9}, 216 | kvstore=opt.kvstore) 217 | 218 | loss = MarginLoss(margin=opt.margin, nu=opt.nu) 219 | 220 | 221 | best_val =0.0 222 | for epoch in range(epochs): 223 | tic = time.time() 224 | prev_loss,cumulative_loss = 0.0,0.0 225 | 226 | # Learning rage schedule 227 | trainer.set_learning_rate(get_lr(opt.lr,epoch,steps,opt.factor)) 228 | if opt.use_viz: 229 | viz.log("Epoch {0} learning rate = {1}".format(epoch,trainer.learning_rate)) 230 | if opt.lr_beta>0: 231 | trainer_beta.set_learning_rate(get_lr(opt.lr_beta,epoch,steps,opt.factor)) 232 | viz.log("Epoch {0} beta learning rate={1}".format(epoch,trainer_beta.learning_rate)) 233 | 234 | #Inner training loop 235 | for i,batch_data in enumerate(train_dataloader): 236 | batch = batch_data[0][0] # batch_data is a tuple(x,y) x shape is [1,70,3,227,227] 237 | label = batch_data[1][0] 238 | data = gluon.utils.split_and_load(batch,ctx_list=ctx,batch_axis=0) 239 | label = gluon.utils.split_and_load(label,ctx_list=ctx,batch_axis=0) 240 | 241 | # After split,the data and label datatype is list 242 | Ls = [] 243 | with ag.record(): 244 | for x, y in zip(data, label): 245 | a_indices, anchors, positives, negatives, _ = net(x) 246 | 247 | if opt.lr_beta > 0.0: 248 | L = loss(anchors, positives, negatives, beta, y[a_indices]) 249 | else: 250 | L = loss(anchors, positives, negatives, opt.beta, None) 251 | 252 | # Store the loss and do backward after we have done forward 253 | # on all GPUs for better speed on multiple GPUs. 254 | Ls.append(L) 255 | cumulative_loss += nd.mean(L).asscalar() 256 | 257 | for L in Ls: 258 | L.backward() 259 | 260 | # Update. 261 | trainer.step(batch.shape[0]) 262 | if opt.lr_beta > 0.0: 263 | trainer_beta.step(batch.shape[0]) 264 | 265 | if (i + 1) % opt.log_interval == 0: 266 | viz.log('[Epoch {0}, Iter {1}] training loss={2}'.format( 267 | epoch, i + 1, cumulative_loss - prev_loss)) 268 | if opt.use_viz: 269 | viz.plot('margin_loss',cumulative_loss-prev_loss) 270 | prev_loss = cumulative_loss 271 | if opt.debug: 272 | import ipdb 273 | ipdb.set_trace() 274 | break 275 | 276 | viz.log('[Epoch {0}] training loss={1}'.format(epoch, cumulative_loss)) 277 | viz.log('[Epoch {0}] time cost: {1}'.format(epoch, time.time() - tic)) 278 | 279 | names, val_accs = test(ctx) 280 | for name, val_acc in zip(names, val_accs): 281 | viz.log('[Epoch {0}] validation: {1}={2}'.format(epoch, name, val_acc)) 282 | viz.plot('recall@1',val_accs[0]) 283 | 284 | if val_accs[0] > best_val: 285 | best_val = val_accs[0] 286 | viz.log('Saving {0}'.format(opt.save_model_prefix)) 287 | net.save_parameters('checkpoints/%s.params' % opt.save_model_prefix) 288 | return best_val 289 | 290 | 291 | def extract_feature(): 292 | """ 293 | extract data feature vector and save 294 | :param model: 295 | :param dataloader: 296 | :return: 297 | """ 298 | global net 299 | deepfashion_csv = 'checkpoints/deepfashion.csv' # write vector to this file 300 | net.initialize() 301 | net.collect_params().reset_ctx(context) 302 | net.load_parameters(opt.load_model_path,ctx=context) 303 | import csv 304 | f = open(deepfashion_csv,'w') 305 | writer = csv.writer(f,dialect='excel') 306 | 307 | for i,batch in tqdm(enumerate(val_dataloader)): 308 | batch_size = batch[0].shape[0] 309 | data = gluon.utils.split_and_load(batch[0], ctx_list=context, batch_axis=0) 310 | label = gluon.utils.split_and_load(batch[1], ctx_list=context, batch_axis=0) 311 | # after split data is list of two data batch 312 | small_batch_feature = [] 313 | for x in data: 314 | feature = net.extract(x) 315 | small_batch_feature.append(feature) 316 | image_id = np.arange(i*batch_size,(i+1)*batch_size).reshape(-1,1) # prepare the image_id 317 | vector = nd.concatenate(small_batch_feature,axis=0).asnumpy() # concatenate the feature 318 | label = np.array([x.asnumpy() for x in label]).reshape(-1,1) 319 | result = np.hstack((image_id,label,vector)) 320 | writer.writerows(result) 321 | print("finished extract feature") 322 | f.close() 323 | return "True finished" 324 | 325 | 326 | 327 | 328 | 329 | if __name__ == '__main__': 330 | import ipdb 331 | ipdb.set_trace() 332 | best_val_recall = train(opt.epochs,context) 333 | print("Best validation Recall@1:%.2f"%(best_val_recall)) 334 | 335 | # result = extract_feature() 336 | # print(result) --------------------------------------------------------------------------------