├── main.py
├── __init__.py
├── models
    ├── visnet.py
    ├── __init__.py
    ├── loss.py
    ├── vgg_classify.py
    ├── dml_model.py
    ├── hashnet.py
    ├── sample_dml.py
    └── mx_margin_model.py
├── checkpoints
    └── deep_test.txt
├── data
    ├── classify
    │   ├── __init__.py
    │   └── ClassifyData.py
    ├── hashdata
    │   ├── __init__.py
    │   └── hash_tri_files.py
    ├── n_pair_mc
    │   ├── __init__.py
    │   ├── deep_in_fashion.py
    │   └── npair_dataset.py
    ├── margin_cub200
    │   ├── __init__.py
    │   └── cub200_margin.py
    ├── mxdata
    │   ├── basic_module
    │   │   ├── __init__.py
    │   │   └── basic_transform.py
    │   ├── __init__.py
    │   ├── composedataset.py
    │   ├── mxcub_simple.py
    │   ├── mxcub200.py
    │   ├── online_products.py
    │   └── deep_fashion.py
    └── __init__.py
├── utils
    ├── train_mx_margin.py
    ├── __init__.py
    ├── mxnet_server_client.py
    ├── extract_Ebaytxt_fromDeepFashion.py
    ├── sku_viewer.py
    ├── log_config.py
    ├── parse_deepinshopdata.py
    ├── visulization.py
    └── vis_tsne_images.py
├── server
    ├── __init__.py
    ├── ab_test.py
    └── copy_nn.py
├── requiremetns.txt
├── extract_feature.sh
├── train_cub.sh
├── train_fashion_inclass.sh
├── train_In_classEbay.sh
├── train_cross_classEbay.sh
├── train_fashion_inclass2.sh
├── manage_visdom.py
├── .gitignore
├── configs.py
├── docs
    └── deep-metric-learning.MD
├── train_classify.py
├── train_hash.py
├── train_mc_npair.py
├── train_margin_cub.py
├── train_mx_margin.py
├── README.MD
└── train_mx_ebay_margin.py


/main.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/visnet.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/checkpoints/deep_test.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/classify/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/hashdata/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/n_pair_mc/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/train_mx_margin.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/margin_cub200/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/mxdata/basic_module/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/server/__init__.py:
--------------------------------------------------------------------------------
1 | from models import MarginNet


--------------------------------------------------------------------------------
/requiremetns.txt:
--------------------------------------------------------------------------------
1 | visdom
2 | fire
3 | numpy
4 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .visulization import Visulizer
2 | from .log_config import logger


--------------------------------------------------------------------------------
/data/mxdata/__init__.py:
--------------------------------------------------------------------------------
1 | from data.mxdata.basic_module.basic_transform import default_transform,test_transform


--------------------------------------------------------------------------------
/utils/mxnet_server_client.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import numpy as np
3 | 
4 | host = 'http://hpc5.yud.io:8080/ping'
5 | ret = requests.post(host)
6 | print(ret)


--------------------------------------------------------------------------------
/extract_feature.sh:
--------------------------------------------------------------------------------
1 | python train_mx_ebay_margin.py --gpus=1 --batch-size=100 --use_viz --name=ExtractDeepFashion --data=DeepFashionInClass --load_model_path=checkpoints/Fashion_In.params


--------------------------------------------------------------------------------
/train_cub.sh:
--------------------------------------------------------------------------------
1 | nohup python train_mx_ebay_margin.py --gpus=0 --batch-k=5 --batch-size=70 --use_pretrained --use_viz --epochs=30 --name=CUB_200_2011 --data=CUB_200_2011 >mytraincub200.log 2>&1 &
2 | 


--------------------------------------------------------------------------------
/train_fashion_inclass.sh:
--------------------------------------------------------------------------------
1 | nohup python train_mx_ebay_margin.py --gpus=2 --batch-k=4 --batch-size=100 --use_pretrained --use_viz --name=Fashion_In --data=DeepFashionInClass >mytrainDeep_Inclass.log 2>&1 &


--------------------------------------------------------------------------------
/train_In_classEbay.sh:
--------------------------------------------------------------------------------
1 | nohup python train_mx_ebay_margin.py --gpus=3 --batch-k=2 --batch-size=80 --use_pretrained --use_viz --epochs=55 --name=Ebay_Inclass --data=EbayInClass >mytrainEbay_Inclass.log 2>&1 &
2 | 


--------------------------------------------------------------------------------
/train_cross_classEbay.sh:
--------------------------------------------------------------------------------
1 | nohup python train_mx_ebay_margin.py --gpus=0 --batch-k=2 --batch-size=80 --epochs=55 --use_pretrained --use_viz --name=Ebay_Crossclass --data=EbayCrossClass >mytrainEbay_Crossclass.log 2>&1 &
2 | 


--------------------------------------------------------------------------------
/train_fashion_inclass2.sh:
--------------------------------------------------------------------------------
1 | nohup python train_mx_ebay_margin.py --gpus=2 --epochs=40 --steps=14,16,20,30 --batch-k=4 --batch-size=100 --use_pretrained --use_viz --name=Fashion_In2 --data=DeepFashionInClass >mytrainDeep_Inclass2.log 2>&1 &


--------------------------------------------------------------------------------
/manage_visdom.py:
--------------------------------------------------------------------------------
1 | from utils import Visulizer
2 | from configs import opt
3 | from visdom import Visdom
4 | from utils import Visulizer
5 | 
6 | viz = Visulizer(opt.vis_host,opt.vis_port,env='main')
7 | print(viz)
8 | viz.delete_env('dmldml3')
9 | print("finished")


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .vgg_classify import BaseModule
2 | from .vgg_classify import VggClassify
3 | from .dml_model import ModGoogLeNet
4 | from .loss import NpairLoss
5 | from .vgg_classify import BaseModule
6 | from .sample_dml import Margin_Loss,SampleModel
7 | from .mx_margin_model import MarginNet
8 | from .hashnet import HashNetRes50,HashLoss


--------------------------------------------------------------------------------
/utils/extract_Ebaytxt_fromDeepFashion.py:
--------------------------------------------------------------------------------
 1 | # We want to extract Ebay like txt file for DeepFashion Inshop dataset
 2 | # From ImageFolder files to construct a pandas table to store in simple file
 3 | 
 4 | # Then we can Instance Dataset from Stanford Online Products from DeepFashion
 5 | 
 6 | import numpy as np
 7 | import pandas as pd
 8 | import csv
 9 | import os
10 | import shutil
11 | import sys
12 | 
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.out
 2 | *.log
 3 | checkpoints/*.pth
 4 | checkpoints/*.csv
 5 | checkpoints/*00.jpg
 6 | data/Stanford_Online_Products
 7 | data/DeepInShop
 8 | data/cub200_2011
 9 | data/Stanford_Online_Products
10 | data/CUB_200_2011
11 | data/hashdata/coco
12 | data/hashdata/nus_wide
13 | *.png
14 | *.npy
15 | *.csv
16 | .ipynb_checkpoints/
17 | __pycache__/
18 | *.pyc
19 | *.params
20 | *.jpg
21 | *.jpeg


--------------------------------------------------------------------------------
/server/ab_test.py:
--------------------------------------------------------------------------------
 1 | # this is a A/B test from image_nn_prod and image_metric_taobao128
 2 | 
 3 | old_index = 'image_nn_prod'
 4 | new_index = 'image_metric_taobao128'
 5 | 
 6 | from .copy_nn import get_net()
 7 | from .copy_nn import get_target_colection
 8 | from .copy_nn import get_nn_config
 9 | from .copy_nn import get_db
10 | 
11 | if __name__=='__main__':
12 |     dev = get_db()
13 |     host,path = get_nn_config()
14 |     net = get_net(0)
15 |     nn_128 = get_target_colection(db)
16 | 
17 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
 1 | import torchvision.transforms as T
 2 | 
 3 | from .classify.ClassifyData import my_collate_fn,Street2shop
 4 | from .n_pair_mc.npair_dataset import EbayDataset
 5 | from .margin_cub200.cub200_margin import CUB200DataSet
 6 | from .mxdata.mxcub200 import cub200_iterator
 7 | 
 8 | from .mxdata.online_products import getEbayCrossClassData,getEbayInClassData
 9 | from .mxdata.mxcub_simple import getCUB200
10 | from .mxdata.deep_fashion import getDeepCrossClassFashion,getDeepInClassFashion
11 | from .hashdata.hash_tri_files import get_hash_dataloader


--------------------------------------------------------------------------------
/utils/sku_viewer.py:
--------------------------------------------------------------------------------
 1 | import pymongo
 2 | from pymongo import MongoClient
 3 | from pprint import pprint
 4 | from pymongo import MongoClient
 5 | mongdb={}
 6 | mongdb['host']=''
 7 | mongdb['port']=3717
 8 | client=MongoClient(host=mongdb['host'],port=mongdb['port'])
 9 | dev=client.get_database('dev')
10 | dev.authenticate(name='',password='')
11 | print(dev.collection_names())
12 | 
13 | tao_bao_collection = dev.get_collection('image_faiss_dual_taobao')
14 | print(tao_bao_collection.count())
15 | 
16 | item = tao_bao_collection.find_one()
17 | pprint(item['_source'])
18 | 


--------------------------------------------------------------------------------
/utils/log_config.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os.path
 3 | import time
 4 | 
 5 | logger = logging.getLogger()
 6 | logger.setLevel(logging.INFO)
 7 | rq = time.strftime('%Y%m%H%M',time.localtime(time.time()))
 8 | log_path = os.path.join(os.getcwd() ,'Logs')
 9 | log_name = os.path.join(log_path,rq+'.log')
10 | logfile = log_name
11 | file_handler = logging.FileHandler(logfile,'w')
12 | file_handler.setLevel(logging.DEBUG)
13 | 
14 | formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
15 | file_handler.setFormatter(formatter)
16 | 
17 | # add the logger to handler
18 | 
19 | logger.addHandler(file_handler)
20 | 


--------------------------------------------------------------------------------
/data/mxdata/basic_module/basic_transform.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | from mxnet.gluon import nn
 3 | import mxnet.gluon.data.vision.transforms as T
 4 | 
 5 | 
 6 | class RandomCrop(nn.Block):
 7 |     def __init__(self,size):
 8 |         self.size = size
 9 |     def forward(self,x):
10 |         return mx.image.random_crop(x,(size,size))
11 | 
12 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
13 | default_transform = T.Compose([
14 |     T.Resize(256),
15 |     RandomCrop(224),
16 |     T.RandomFlipLeftRight(),
17 |     T.ToTensor(), # last to swap  channel to c,w,h
18 |     normalize
19 | ])
20 | 
21 | test_transform = T.Compose([
22 |     T.Resize(256),
23 |     T.CenterCrop(224),
24 |     T.ToTensor(),
25 |     normalize
26 | ])


--------------------------------------------------------------------------------
/data/n_pair_mc/deep_in_fashion.py:
--------------------------------------------------------------------------------
 1 | # this is an implementation of pytorch deep_fashion_in dataset,
 2 | # aim to train an multi-class-n-pair model as base line
 3 | 
 4 | import torchvision.transforms as T
 5 | #from configs import opt
 6 | from torch.utils.data import Dataset
 7 | import os
 8 | import csv
 9 | import fnmatch
10 | from PIL import Image
11 | import numpy as np
12 | import pandas as pd
13 | import torch
14 | 
15 | #normalize = T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
16 | normalize = T.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5, 0.5, 0.5])
17 | default_transform  = T.Compose([
18 |          T.Resize(256),
19 |          T.RandomCrop(227),
20 |          T.RandomHorizontalFlip(),
21 |          T.ToTensor(),
22 |          normalize,
23 | ])
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/models/loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.nn.functional import cross_entropy
 3 | import torch
 4 | 
 5 | class NpairLoss(nn.Module):
 6 |     """the multi-class n-pair loss"""
 7 |     def __init__(self,l2_reg=3e-3):
 8 |         super(NpairLoss,self).__init__()
 9 |         self.l2_reg = l2_reg
10 | 
11 |     def forward(self,feature,target):
12 |         """
13 |         compute the feature pair loss,the first half is anchor
14 |         the last half is pair feature
15 |         :param feature:
16 |         :return:
17 |         """
18 | 
19 |         batch_size = feature.size(0)
20 |         fa = feature[:int(batch_size/2)]
21 |         fp = feature[int(batch_size/2):]
22 |         logit = torch.matmul(fa,torch.transpose(fp,0,1))
23 |         loss_sce = cross_entropy(logit,target)
24 |         l2_loss = sum(torch.norm(feature,p=2,dim=1))/batch_size
25 |         loss = loss_sce + self.l2_reg*l2_loss
26 |         return loss
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/utils/parse_deepinshopdata.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | from mxnet.image import imread
 4 | import os
 5 | bboxfile = r'../Logs/Anno/list_bbox_inshop.txt'
 6 | line = None
 7 | skip=40006
 8 | with open(bboxfile,'r') as f_box:
 9 |     f_box.readline() #
10 |     f_box.readline() #
11 |     for i in range(skip):
12 |         f_box.readline()
13 |     line = f_box.readline()
14 | 
15 | img_dir = r'C:\download\In-shop-clothes'
16 | line_list  = line.strip().split(' ')
17 | path,bbox = line_list[0],line_list[-4:]
18 | print('path:',path,"bbox",bbox)
19 | fig = plt.figure()
20 | plt.subplot(2,1,1)
21 | image = imread(os.path.join(img_dir,path))
22 | bbox=[int(x) for x in bbox]
23 | plt.imshow(image.asnumpy())
24 | plt.subplot(2,1,2)
25 | plt.imshow(image[bbox[1]:bbox[3],bbox[0]:bbox[2]].asnumpy())
26 | plt.show()
27 | 
28 | img_path= r'C:\Users\Dengjun\Pictures\a.jpg'
29 | img = imread(img_path)
30 | print(img.shape)
31 | 
32 | 


--------------------------------------------------------------------------------
/configs.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | class DefaultConfig(object):
 3 |     num_classes = 13 # street2shop classes
 4 |     train_classify_dir = "/data/jh/notebooks/hudengjun/VisNet/classify"
 5 |     valid_classify_dir = "/data/jh/notebooks/hudengjun/VisNet/"
 6 |     persist = 'data/persist.csv'
 7 |     lr=0.001
 8 |     lr_step = 50
 9 |     use_gpu = True
10 |     gpu_id = 2
11 |     load_model_path=None
12 |     num_workers = 4
13 |     momentum=0.89
14 |     max_epoch = 800
15 |     print_freq = 40
16 |     batch_size = 32
17 |     vis_host="http://hpc3.yud.io"
18 |     vis_port=8088
19 |     vis_env='Street2shop'
20 |     debug = False
21 | 
22 | 
23 |     #tripplet dataset config
24 |     ebay_dir = '/data/jh/notebooks/hudengjun/DML/deep_metric_learning/lib/online_products/Stanford_Online_Products/'
25 |     n_pair_train = 'Ebay_train.txt'
26 |     n_pair_test = 'Ebay_test.txt'
27 |     embeding_size=512
28 |     dml_model_path=None
29 |     l2_reg=3e-3
30 |     use_viz=True
31 |     freeze_level=0
32 | 
33 | 
34 | def parse(self,kwargs):
35 |     """update dict with kwargs params"""
36 |     for k,v in kwargs.items():
37 |         if not hasattr(self,k):
38 |             warnings.warn("does not has attribute",k)
39 |         setattr(self,k,v)
40 |     print("use config:")
41 |     for k,v in self.__class__.__dict__.items():
42 |         if not k.startswith('__'):
43 |             print(k,':',getattr(self,k))
44 | DefaultConfig.parse = parse
45 | opt = DefaultConfig()
46 | 


--------------------------------------------------------------------------------
/docs/deep-metric-learning.MD:
--------------------------------------------------------------------------------
 1 | - 从度量学习，监督或非监督降维理论及流型学习理论以来，诸多学习算法力求学习一种数据从原始空间和低维嵌入空间的映射，在嵌入空间中能保持
 2 | 原生空间数据的相似距离关系，这种空间转换函数的学习可称之为度量学习；相对于传统的分类问题而言，其种类数目比较固定，如ImageNet根据WordNet分为1000类别，
 3 | 然而很多现实中的问题不是**closeset**（闭集）问题，是openset（开放集合）问题，即在泛化测试时有很多样本和种类类别是从未遇见的，典型的特征是种类多，单个种类样本少，最为典型的是人脸识别（亿级别的类别，单个人最多就抓拍十几张头像）。
 4 | - 将分类和度量对比，分类学习的是一种概念，粗糙的概观，类似孤立看待问题的观点，度量学习的是精细的尺度，强调的是对隐含属性概念的整体布局，一种尺度。
 5 | 也可以认为，分类问题对事物的编码是一种**one-hot**编码格式，而度量是对事物的一种**denseVector**编码格式。
 6 | 概念在未见过的种类无法进行描述，而尺度在未见过的种类依然可以按照尺度测量规范进行测量。（举例，前者学习的是每一个的人名，后者学习的是对人的测量方法，遇到向别人描述一个他不认识的人时，通过属性描述的方式，更容易让别人形成画像）
 7 | 
 8 | - 现实世界中，很多时候并不需要对所有见过的事物说名道姓，我们不关心他是谁，关心的是他和其他事物的距离关系。这也就是从DeepID2的分类loss转换到FaceNet时triplet loss之后人脸识别效果提升的原因，当然这也是人脸识别，行人重识别，车辆检索（车辆套牌分析），商品检索应用落地的原因。  
 9 | - 通过度量学习，我们可以把现实世界中的搜索问题（视觉搜索，VisualSearch or Visual Recommendation）定义为简单的两步，将原生数据空间通过学习得来的映射函数映射到嵌入空间，在嵌入空间通过近似临近搜索算法搜索最近点，通过最近点id反查原始空间的数据。
10 | 其核心技术在于 __关系高保持的空间映射函数__ 和 __高速高容量的向量搜索引擎__。前者可以保证从原生数据空间映射到嵌入空间之后，数据的相似关系保持的精细，数据在嵌入空间的距离与原生数据的观测属性成正相关。
11 | 后者保证能在广泛的搜索空间中高效的查找嵌入空间中距离相近的点（可以在亿级的库中以毫秒级时间搜索出近似最近点）。
12 | 
13 | -传统的LDA，MDS，IsoMap等方法是一种浅层次的映射函数或关系保持的数据转换，而且MDS实际上无法对未见的数据点进行低维度嵌入，只能使用拟合函数进行转换。而SNE算法是一种已指数据样本高维空间距离之后，根据数据距离分布近似的目标，将高位数据点降维，并不适合视觉图片。
14 | 深度卷积神经网络通过层层递进特征提取的方式，具备深层次语义特征提取的能力，通过合适的数据加载和目标函数的梯度引导，该卷积神经网络函数能学习一种深层次的特征提取。该方法又被称为深度度量学习，即Deep Metric Learning
15 | 以人们分辨狗和猫为例，如果让一个婴儿来学习分辨，是不可能的；必须得是大脑发育完整的儿童，如果让儿童整天看白猫和黑狗，久而久之，儿童会以为白颜色的是猫，黑颜色的是狗，遇到一个黑猫，也认为是狗。当然有的是每次给一对动物来分辨，有的是给一批动物来分辨。
16 | 如果一直让儿童去分辨容易分辨的狗和猫，儿童没有经过艰难的判别隔离训练，对于之后遇到的困难样本，就变得无所适从。
17 | 从上述例子可知：深度度量学习函数要训练好的三个前提条件分别是：模型结构，原始数据采样方式，目标函数，困难样本。深度度量学习很多提升改进都是基于基本要素的考虑。
18 | 


--------------------------------------------------------------------------------
/models/vgg_classify.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import time
 4 | from torchvision.models import vgg16_bn
 5 | 
 6 | class BaseModule(nn.Module):
 7 |     """model save and load"""
 8 |     def __init__(self):
 9 |         super(BaseModule,self).__init__()
10 |         self.model_name = str(type(self))
11 |         self.model_name='basemodel'
12 | 
13 |     def load(self,path):
14 |         """
15 |         加载模型
16 |         :param path: reload model path
17 |         :return: None
18 |         """
19 |         self.load_state_dict(torch.load(path))
20 | 
21 |     def save(self,name=None):
22 |         """default modelname and time"""
23 |         if name is None:
24 |             prefix = 'checkpoints/'+self.model_name+'_'
25 |             name = time.strftime(prefix + '%m%d_%H:%M:%S.pth')
26 |         torch.save(self.state_dict(), name)
27 |         return name
28 | 
29 | class VggClassify(BaseModule):
30 |     """a model viaries from vgg_16"""
31 |     def __init__(self,num_classes):
32 |         super(VggClassify, self).__init__()
33 |         vgg16_model = vgg16_bn(pretrained=False)
34 |         features,classifier = vgg16_model.features,vgg16_model.classifier
35 |         classifier = list(classifier)
36 |         del classifier[-1]
37 |         classifier.append(nn.Linear(4096,num_classes))
38 |         self.features = features
39 |         self.classifier = nn.Sequential(*classifier)
40 |         self.model_name = 'vgg_bn'
41 |     def forward(self,x):
42 |         features = self.features(x)
43 |         features = features.view(features.size(0), -1)
44 |         output = self.classifier(features)
45 |         return output
46 | 
47 | if __name__=='__main__':
48 |     model = BaseModule()
49 |     model.save()
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/utils/visulization.py:
--------------------------------------------------------------------------------
 1 | from configs import opt
 2 | import visdom
 3 | import torch as t
 4 | import time
 5 | import numpy as np
 6 | class Visulizer(object):
 7 |     """the object interface to store train trace to website"""
 8 |     def __init__(self,host=opt.vis_host,port=opt.vis_port,env=opt.vis_env):
 9 |         self.vis = visdom.Visdom(server=host,port=port,env=env)
10 | 
11 |         self.index ={}
12 |         self.log_text=""
13 | 
14 |     def reinit(self,env='default'):
15 |         self.vis = visdom.Visdom(server=opt.vis_host,port=opt.vis_port,env=opt.vis_env)
16 |         return self
17 | 
18 |     def plot(self,name,y):
19 |         """plot loss:1.0"""
20 |         x = self.index.get(name,0)
21 |         self.vis.line(Y=np.array([y]),X=np.array([x]),
22 |                       win=name,
23 |                       opts=dict(title=name),
24 |                       update=None if x==0 else 'append')
25 |         self.index[name] = x+1
26 | 
27 |     def img(self,name,img_,**kwargs):
28 |         """
29 |         :param name: the window name
30 |         :param img_: img shape and data type,t.Tensor(64,64),Tensor(3,64,64),Tensor(100,1,64,64)
31 |         :param kwargs:
32 |         :return:
33 |         """
34 |         self.vis.images(t.Tensor(img_).cpu().numpy(),
35 |                         win=name,
36 |                         opts=dict(title=name),
37 |                         **kwargs)
38 | 
39 |     def log(self,info,win='log_text'):
40 |         """self.log({loss:1,'lr':0.0001}"""
41 |         self.log_text += ('[{time}] {info} <br>'.format(
42 |             time=time.strftime('%m%d_%H%M%S'), \
43 |             info=info))
44 |         self.vis.text(self.log_text, win)
45 | 
46 |     def delete_env(self,env):
47 |         self.vis.delete_env(env)
48 | 
49 | if __name__=='__main__':
50 |     """nohup python -m visdom.server --port-8088 & 
51 |     this to start visdom server"""
52 |     viz = Visulizer(host='http://192.168.3.13',port=8088,env='street')
53 |     viz.log("this is a start")
54 |     viz.plot('loss',2.3)
55 |     viz.plot('loss',2.2)
56 |     viz.plot('loss',2.1)
57 | 
58 |     viz.img('origin',np.random.random((10,3,224,224)))


--------------------------------------------------------------------------------
/data/mxdata/composedataset.py:
--------------------------------------------------------------------------------
 1 | import mxnet.gluon.data.vision.transforms as T
 2 | from mxnet.gluon.data import DataLoader,Dataset
 3 | from mxnet import nd
 4 | import numpy as np
 5 | import os
 6 | from mxnet.image import imread
 7 | import pandas as pd
 8 | 
 9 | 
10 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
11 | default_transform = T.Compose([
12 |     T.Resize(256),
13 |     T.RandomResizedCrop(224),
14 |     T.RandomFlipLeftRight(),
15 |     T.ToTensor(),
16 |     normalize
17 | ])
18 | 
19 | class ComposeDataSet(Dataset):
20 |     """ an Ebay and DeepFashion Composite Dataset to metric learning"""
21 |     def __init__(self,ebay_dir,fashion_dir,batch_k,batch_size,is_train,transform=default_transform):
22 |         self.ebay_dir = ebay_dir
23 |         self.fashion_dir = fashion_dir
24 |         self.batch_k = batch_k
25 |         self.batch_size = batch_size
26 |         self.is_train = is_train
27 |         self._transform = transform
28 | 
29 |         #begin to resolve ebay data
30 | 
31 |         if self.is_train:
32 |             #start ebay data
33 |             table_name = os.path.join(self.ebay_dir,'Ebay_train.txt')
34 |             table_data = pd.read_table(table_name,header=0,delim_whitespace=True)
35 |             min_super_id, max_super_id = min(table_data.super_class_id), max(table_data.super_class_id)
36 | 
37 |             #this is the super id for ebaydata
38 |             self.super_ids = np.arange(min_super_id, max_super_id + 1)
39 |             self.super2class = {} #store a dict for {super_id:[class_id1,class_id2]}
40 |             for super_id in self.super_ids:
41 |                 self.super2class[super_id] = table_data[table_data.super_class_id == super_id].class_id.tolist()
42 | 
43 |             min_class_id, max_class_id = min(table_data.class_id), max(table_data.class_id)
44 |             self.class_ids = list(np.arange(min_class_id, max_class_id + 1))
45 |             self.train_length = max_class_id + 1 - min_class_id
46 |             self.super_id_dist = [len(v) for k, v in self.super2class.items()]
47 |             for class_id in self.class_ids:
48 |                 one_class_paths = table_data[table_data.class_id == class_id].path.tolist()  # type list
49 |                 self.class2imagefiless.append(one_class_paths)
50 | 
51 |             #Process deepfashion data
52 |             extract_super_ids_to_class_ids
53 |         else:
54 | 


--------------------------------------------------------------------------------
/models/dml_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | import torch.nn as nn
 4 | 
 5 | import torch.nn.functional as F
 6 | from torchvision.models.inception import  inception_v3
 7 | from .vgg_classify import BaseModule
 8 | from collections import OrderedDict
 9 | class ModGoogLeNet(BaseModule):
10 |     """the change the head from googlenet"""
11 |     def __init__(self,embeding_size=512,with_drop=False):
12 |         super(ModGoogLeNet,self).__init__()
13 |         basic_model = inception_v3(pretrained=True, transform_input=False)
14 |         basic_model.aux_logits=False
15 |         feature = list(basic_model.named_children())
16 |         def aux(name_module):
17 |             return 'AuxLogits' not in name_module[0]
18 | 
19 |         del feature[-1]
20 |         feature = filter(aux, feature) #generator
21 |         feature = [m for m in feature]
22 |         self.level1_2 = nn.Sequential(OrderedDict(feature[0:3]))
23 |         self.level_3_4 = nn.Sequential(OrderedDict(feature[3:5]))
24 |         self.level_5_6 = nn.Sequential(OrderedDict(feature[5:13]))
25 |         self.level_7 = nn.Sequential(OrderedDict(feature[13:16]))
26 |         self.fc = nn.Linear(in_features=2048,out_features=embeding_size)
27 |         self.model_name = 'DMLGoogle'
28 |         self.with_drop = with_drop
29 | 
30 |     def freeze_model(self,level=5):
31 |         """
32 | 
33 |         :param level: the freeze level,all the model split in (
34 |         Conv2d_1a_3x3
35 |         Conv2d_2a_3x3,Conv2d_2b_3x3,
36 |         Conv2d_3b_1x1,Conv2d_4a_3x3,
37 |         Mixed_5b,Mixed_5c,Mixed_5d,
38 |         Mixed_6a,Mixed_6b,Mixed_6c,Mixed_6d,Mixed_6e,AuxLogits
39 |         Mixed_7a,Mixed_7b,Mixed_7c
40 | 
41 |         :return:
42 |         """
43 |         for i,(name,module) in enumerate(self.basic_model.named_children()):
44 |             if i<10 and int(name.split('_')[1][0])<=level:
45 |                 for param in module.parameters():
46 |                     param.requried_grad = False
47 | 
48 | 
49 |     def forward(self,x,normalize=False):
50 |         """
51 |         forward data data shape (32,2,227,227)
52 |         :param x: torch.tensor
53 |         :return: feature embeding
54 |         """
55 |         x = self.level1_2(x)
56 |         x = F.max_pool2d(x, kernel_size=3, stride=2)
57 |         x = self.level_3_4(x)
58 |         x = F.max_pool2d(x, kernel_size=3, stride=2)
59 |         x = self.level_5_6(x)
60 |         x = self.level_7(x)
61 | 
62 |         x = F.avg_pool2d(x, kernel_size=x.size(-1)) #default 8*8,another 5*5
63 |         # 1 x 1 x 2048
64 |         if self.with_drop:
65 |             x = F.dropout(x, training=self.training)
66 |         # 1 x 1 x 2048
67 |         x = x.view(x.size(0), -1)
68 |         embeding = self.fc(x)
69 |         return embeding
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/train_classify.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.optim as optim
 3 | from data import Street2shop
 4 | import os
 5 | from configs import opt
 6 | from models import VggClassify
 7 | from torch.utils.data import DataLoader
 8 | from tqdm import tqdm
 9 | from utils import Visulizer
10 | 
11 | def val(model,dataloader):
12 |     """run model with data,"""
13 |     model.eval()
14 |     num_total =0
15 |     num_correct =0
16 |     for i,(data,label) in tqdm(enumerate(dataloader)):
17 |         if opt.use_gpu:
18 |             data = data.cuda()
19 |             label = label.cuda()
20 |         score = model(data)
21 |         num_total += data.size(0)
22 |         pred = torch.argmax(score,dim=1)
23 |         acc = torch.eq(pred, label)
24 |         num_correct += acc.sum().item()
25 |         if opt.debug:
26 |             break
27 |     print("valid, correct rate",1.0*num_correct/num_total)
28 |     model.train()
29 | 
30 | def train(**kwargs):
31 |     opt.parse(kwargs)
32 |     os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.gpu_id)
33 |     #data
34 |     train_data = Street2shop(opt.train_classify_dir,train=True,persist=opt.persist)
35 |     val_data = Street2shop(opt.train_classify_dir,train=False,persist=opt.persist)
36 | 
37 |     #model
38 |     model = VggClassify(num_classes=opt.num_classes)
39 |     if opt.load_model_path:
40 |         model.load(opt.load_model_path)
41 |     if opt.use_gpu:
42 |         model = model.cuda()
43 | 
44 |     #data loader
45 |     train_dataloader= DataLoader(train_data,batch_size=opt.batch_size,shuffle=True,num_workers=opt.num_workers)
46 |     val_dataloader = DataLoader(val_data,batch_size=opt.batch_size,shuffle=True,num_workers=opt.num_workers)
47 | 
48 |     #visulizer
49 |     viz = Visulizer(host=opt.vis_host,port=opt.vis_port,env=opt.vis_env)
50 |     viz.log("start to train")
51 |     #loss function
52 |     criterion = torch.nn.CrossEntropyLoss()
53 |     lr = opt.lr
54 |     m = opt.momentum
55 |     optimizer = optim.SGD(model.parameters(),lr=lr,momentum=m)
56 |     lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,20,0.1)
57 |     for epoch in range(opt.max_epoch):
58 |         lr_scheduler.step()
59 |         train_loss = 0
60 |         for i,(data,label) in tqdm(enumerate(train_dataloader)):
61 |             if opt.use_gpu:
62 |                 data = data.cuda()
63 |                 label = label.cuda()
64 |             optimizer.zero_grad()
65 |             score = model(data)
66 |             loss = criterion(score,label)
67 |             loss.backward()
68 |             optimizer.step()
69 |             train_loss = train_loss + loss.item()
70 |             if i%opt.print_freq == opt.print_freq-1:
71 |                 average_loss = train_loss/opt.batch_size
72 |                 viz.plot('loss',average_loss)
73 |                 train_loss =0
74 |             if opt.debug:
75 |                 break
76 |         print("epoch :{0} finished,begin to valid test".format(epoch))
77 |         model.save()
78 |         val(model,val_dataloader)
79 |         if opt.debug:
80 |             print("finished one iter")
81 |             break
82 | def help():
83 |     """print information"""
84 |     print("""
85 |     useage: python file.py <function> --args=value
86 |     function := train help
87 |     example:
88 |             python {0} train
89 |             python {0} help""")
90 | 
91 | if __name__=='__main__':
92 |     import fire
93 |     fire.Fire()
94 | 
95 | 


--------------------------------------------------------------------------------
/data/classify/ClassifyData.py:
--------------------------------------------------------------------------------
  1 | import torchvision.transforms as T
  2 | from configs import opt
  3 | from torchvision.datasets import ImageFolder
  4 | from torch.utils.data import Dataset
  5 | import os
  6 | import csv
  7 | import fnmatch
  8 | from PIL import Image
  9 | import numpy as np
 10 | from torch.utils.data.dataloader import default_collate
 11 | 
 12 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
 13 | default_transform  = T.Compose([
 14 |          T.RandomResizedCrop(224),
 15 |          T.RandomHorizontalFlip(),
 16 |          T.ToTensor(),
 17 |          normalize,
 18 | ])
 19 | 
 20 | 
 21 | #origin_dataset = ImageFolder(opt.train_classify_dir,target_transform=transform)
 22 | def my_collate_fn(batch):
 23 |     '''
 24 |     batch中每个元素形如(data, label)
 25 |     '''
 26 |     # 过滤为None的数据
 27 |     batch = list(filter(lambda x:hasattr(x[0],'size'), batch))
 28 |     if len(batch) == 0: return t.Tensor()
 29 |     return default_collate(batch) # 用默认方式拼接过滤后的batch数据
 30 | 
 31 | class Street2shop(Dataset):
 32 |     """ dataset split to train and test
 33 |     root is a ln -s link like this
 34 |     --root
 35 |        --bags
 36 |        --tops
 37 |        --skirts
 38 |        --hats
 39 |        --;;
 40 |        have 13 catergory consumer"""
 41 |     def __init__(self,root,train=True,persist = opt.persist,trans = default_transform):
 42 |         self.train = train
 43 |         self.root = root
 44 | 
 45 |         self.names_idx = {}
 46 |         self.transform = trans
 47 | 
 48 |         if persist is None and not os.path.exists(persist):
 49 |             folders = os.listdir(root)
 50 |             folders.sort()  # from a to x sort
 51 |             self.names_idx={fold:i for i,fold in enumerate(folders)}
 52 |             with open('data/persist.csv','w') as f:
 53 |                 writer = csv.writer(f)
 54 |                 for fold in folders:
 55 |                     index = self.names_idx[fold]
 56 |                     imgs = os.listdir(os.path.join(self.root,fold))
 57 |                     for img in imgs:
 58 |                         writer.writerow([fold+'/{0}'.format(img),index])
 59 | 
 60 |         # start to read data
 61 |         with open(persist, 'r') as f:
 62 |             reader = csv.reader(f)
 63 |             self.imgs = [row for row in reader]
 64 |         print("dataset size",len(self.imgs))
 65 | 
 66 |         np.random.shuffle(self.imgs)
 67 |         if self.train:
 68 |             self.imgs = self.imgs[:int(0.7 * len(self.imgs))]
 69 |         else:
 70 |             self.imgs = self.imgs[int(0.7 * len(self.imgs)):]
 71 | 
 72 |     def __getitem__(self, index):
 73 |         """get data and transform"""
 74 |         img_path,label = self.imgs[index]
 75 |         img_path = os.path.join(self.root,img_path)
 76 |         try:
 77 |             data = Image.open(img_path)
 78 |             if not hasattr(data,'size'):
 79 |                 raise Exception("no size or data channel problem")
 80 |             if self.transform:
 81 |                 data = self.transform(data)
 82 |             if not data.size(0) is 3:
 83 |                 print("channel not 3,img_path is :{0}".format(img_path))
 84 |                 raise Exception("channel not 3")
 85 |             return data,int(label)
 86 |         except Exception as e:
 87 |             print(e,img_path)
 88 |             img_path,label = self.imgs[0]
 89 |             data = Image.open(os.path.join(self.root,img_path))
 90 |             label = int(label)
 91 |             if self.transform:
 92 |                 data = self.transform(data)
 93 |             return data,label
 94 | 
 95 | 
 96 | 
 97 | 
 98 |     def __len__(self):
 99 |         return len(self.imgs)
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/data/hashdata/hash_tri_files.py:
--------------------------------------------------------------------------------
  1 | # hash data from three files,contain coco,nus_wide and imangenet
  2 | 
  3 | from torch.utils.data import Dataset,DataLoader
  4 | import torchvision.transforms as T
  5 | from PIL import Image
  6 | import os
  7 | import numpy as np
  8 | 
  9 | 
 10 | root_path = '/data/jh/notebooks/hudengjun/DeepEmbeding/data/hashdata'
 11 | 
 12 | def image_train(resize_size=256, crop_size=224):
 13 |     normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
 14 |                                        std=[0.229, 0.224, 0.225])
 15 |     return  T.Compose([
 16 |         T.Resize(resize_size),
 17 |         T.RandomResizedCrop(crop_size),
 18 |         T.RandomHorizontalFlip(),
 19 |         T.ToTensor(),
 20 |         normalize])
 21 | 
 22 | def image_test(resize_size = 256,crop_size=224):
 23 |     normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
 24 |                                        std=[0.229, 0.224, 0.225])
 25 |     #start_first = 0
 26 |     start_center = (resize_size - crop_size - 1) / 2
 27 |     #start_last = resize_size - crop_size - 1
 28 | 
 29 |     return T.Compose([
 30 |         T.Resize(resize_size),
 31 |         PlaceCrop(crop_size,start_center,start_center),
 32 |         T.ToTensor(),
 33 |         normalize])
 34 | class PlaceCrop(object):
 35 |     """Crops the given PIL.Image at the particular index.
 36 |     Args:
 37 |         size (sequence or int): Desired output size of the crop. If size is an
 38 |             int instead of sequence like (w, h), a square crop (size, size) is
 39 |             made.
 40 |     """
 41 | 
 42 |     def __init__(self, size, start_x, start_y):
 43 |         if isinstance(size, int):
 44 |             self.size = (int(size), int(size))
 45 |         else:
 46 |             self.size = size
 47 |         self.start_x = start_x
 48 |         self.start_y = start_y
 49 | 
 50 |     def __call__(self, img):
 51 |         """
 52 |         Args:
 53 |             img (PIL.Image): Image to be cropped.
 54 |         Returns:
 55 |             PIL.Image: Cropped image.
 56 |         """
 57 |         th, tw = self.size
 58 |         return img.crop((self.start_x, self.start_y, self.start_x + tw, self.start_y + th))
 59 | 
 60 | 
 61 | class ImageList(Dataset):
 62 |     def __init__(self,file,transform=None):
 63 |         if transform is None:
 64 |             self._transform = image_train(256,224) if 'train.txt' in file else image_test(256,224)
 65 |         else:
 66 |             self._transform = transform
 67 |         if not os.path.exists(file):
 68 |             raise Exception("file not exist")
 69 |         self.file = file
 70 | 
 71 |         self.images = []
 72 |         with open(self.file,'r') as f:
 73 |             for line in f.readlines():
 74 |                 items =  line.strip().split(' ')
 75 |                 self.images.append((items[0],np.array([int(la) for la in items[1:]],dtype=np.float32)))
 76 | 
 77 |     def __len__(self):
 78 |         return len(self.images)
 79 | 
 80 |     def __getitem__(self, index):
 81 |         path,target = self.images[index]
 82 |         img = Image.open(path).convert('RGB')
 83 |         if self._transform:
 84 |             img = self._transform(img)
 85 |         return img,target
 86 | 
 87 | def get_hash_dataloader(dataset_name,train_batch,test_batch,database_batch):
 88 |     """
 89 |     return the double train dataset
 90 |     :param dataset:
 91 |     :return:
 92 |     """
 93 |     file_names = ['train.txt','test.txt','database.txt']
 94 |     files = [os.path.join(root_path,dataset_name,file_name) for file_name in file_names]
 95 |     datasets = [ImageList(file) for file in files]
 96 |     train1 = DataLoader(datasets[0],batch_size=train_batch,shuffle=True,num_workers=6)
 97 |     train2 = DataLoader(datasets[0],batch_size=train_batch,shuffle=True,num_workers=6)
 98 |     test = DataLoader(datasets[1],batch_size=test_batch,shuffle=False,num_workers=4)
 99 |     database = DataLoader(datasets[2],batch_size=database_batch,shuffle=False,num_workers=4)
100 |     return train1,train2,test,database
101 | 
102 | 
103 | 
104 | if __name__== '__main__':
105 |     coco_train = ImageList('/data/jh/notebooks/hudengjun/DeepEmbeding/data/hashdata/coco/train.txt')
106 |     print("size of cocotrain",len(coco_train))
107 |     print("start to get data",coco_train[0][0].shape,coco_train[0][1].shape)
108 | 
109 | 
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/data/margin_cub200/cub200_margin.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data import Dataset
  3 | from PIL import Image
  4 | 
  5 | import torchvision.transforms as T
  6 | import numpy as np
  7 | import os
  8 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
  9 | default_transform  = T.Compose([
 10 |          T.Resize(256),
 11 |          T.RandomCrop(224),
 12 |          T.RandomHorizontalFlip(),
 13 |          T.ToTensor(),
 14 |          normalize,
 15 | ])
 16 | 
 17 | class CUB200DataSet(Dataset):
 18 |     """
 19 |     the cub200 bird dataset,dataset description:
 20 |     200 catagory bird, 100 for train ,100 for test ,each catagory hase 60 images
 21 | 
 22 |     """
 23 |     def __init__(self,data_path,batch_k=5,batch_size=70,is_train=True,transform = default_transform):
 24 |         self.is_train = is_train
 25 |         self.batch_k = batch_k                #sample numbers in every calsses,for example,5
 26 |         self.batch_size = batch_size          #the whole batch samples to fetch ,for example,70,so the sampled classes is 12
 27 |         self.train_image_files = [[]for _ in range(100)]
 28 |         self.test_image_files =[]
 29 |         self.test_labels =[]
 30 |         self.boxes = {}
 31 |         self.transform = transform
 32 | 
 33 |         with open(os.path.join(data_path,'images.txt'),'r') as f_img,\
 34 |             open(os.path.join(data_path,'image_class_labels.txt'),'r') as f_label,\
 35 |             open(os.path.join(data_path,'bounding_boxes.txt'),'r') as f_box:
 36 |             for line_img,line_label,line_box in zip(f_img,f_label,f_box):
 37 |                 fname = os.path.join(data_path,'images',line_img.strip().split()[-1])
 38 |                 label = int(line_label.strip().split()[-1])-1
 39 |                 box = [int(float(v)) for v in line_box.split()[-4:]]
 40 |                 self.boxes[fname]=box
 41 | 
 42 |                 if label<100:
 43 |                     self.train_image_files[label].append(fname)
 44 |                 else:
 45 |                     self.test_image_files.append(fname)
 46 |                     self.test_labels.append(label)
 47 | 
 48 |         self.n_test = len(self.test_image_files)
 49 | 
 50 |     def __getitem__(self, index):
 51 |         """
 52 |         get data item in train dataset,all test dataset
 53 |         :param index:  the index of training or test of sample
 54 |         :return: return the origin image data and labels based on sample method,
 55 |                   search batch/batch_k classes ,every class,choose batch_k iamges to compound a batch
 56 |         """
 57 |         if self.is_train:
 58 |             #get train batch
 59 |             images = []
 60 |             labels = []
 61 |             num_groups = self.batch_size//self.batch_k
 62 |             sampled_classes = np.random.choice(100,num_groups,replace=False)
 63 |             for class_id in sampled_classes:
 64 |                 img_fnames = np.random.choice(self.train_image_files[class_id],self.batch_k,replace=False)
 65 |                 for file_path in img_fnames:
 66 |                     x,y,w,h = self.boxes[file_path]
 67 |                     img = Image.open(file_path).convert('RGB').crop((x,y,x+w,y+h))
 68 |                     try:
 69 |                         img_tensor = self.transform(img)
 70 |                         images.append(img_tensor)
 71 |                         labels.append(class_id)
 72 |                     except Exception as e:
 73 |                         print(file_path)
 74 |                         break
 75 | 
 76 |             batch_data = torch.stack(images,dim=0)                   # from list of tensor to batch tensor
 77 |             label_data = torch.tensor(np.array(labels,dtype=np.int32))   # from list to tensor
 78 |             return batch_data,label_data
 79 |         else:
 80 |             #get one sample
 81 |             image = Image.open(self.test_image_files[index]).convert('RGB')
 82 |             label = self.test_labels[index]
 83 |             if self.transform:
 84 |                 image = self.transform(image)
 85 |             return image,label
 86 | 
 87 |     def __len__(self):
 88 |         if self.is_train:
 89 |             return 200  #
 90 |         else:
 91 |             return self.n_test # will return all test_image_files
 92 | 
 93 | 
 94 | 
 95 | if __name__=='__main__':
 96 |     import ipdb
 97 |     ipdb.set_trace()
 98 |     dataset = CUB200DataSet(data_path='data/cub200_2011/CUB_200_2011/')
 99 |     data = dataset[1]
100 |     print(type(data))
101 |     print(data[1])


--------------------------------------------------------------------------------
/models/hashnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torchvision.models.resnet import resnet50
  4 | 
  5 | class HashNetRes50(nn.Module):
  6 |     """
  7 |     this is a hash net based on resnet50
  8 |     """
  9 |     def __init__(self,n_bit):
 10 |         super(HashNetRes50,self).__init__()
 11 |         model_resnet = resnet50(pretrained=True)
 12 |         self.conv1 = model_resnet.conv1
 13 |         self.bn1 = model_resnet.bn1
 14 |         self.relu = model_resnet.relu
 15 |         self.maxpool = model_resnet.maxpool
 16 |         self.layer1 = model_resnet.layer1
 17 |         self.layer2 = model_resnet.layer2
 18 |         self.layer3 = model_resnet.layer3
 19 |         self.layer4 = model_resnet.layer4
 20 |         self.avgpool = model_resnet.avgpool
 21 |         self.feature_layers = nn.Sequential(self.conv1,
 22 |                                             self.bn1,
 23 |                                             self.relu,
 24 |                                             self.maxpool,
 25 |                                             self.layer1,
 26 |                                             self.layer2,
 27 |                                             self.layer3,
 28 |                                             self.layer4,
 29 |                                             self.avgpool)
 30 |         self.hash_layer = nn.Linear(model_resnet.fc.in_features,n_bit)
 31 |         self.hash_layer.weight.data.normal_(0,0.01)
 32 |         self.hash_layer.bias.data.fill_(0.0)
 33 |         self.activation = torch.nn.Tanh()
 34 | 
 35 |         self.iter_num =0
 36 |         self.gamma = 0.005
 37 |         self.step_size = 200
 38 |         self.power =0.5
 39 |         self.init_scale = 1.0
 40 |         self.scale = self.init_scale
 41 |         self.__in_features = n_bit
 42 | 
 43 |     def forward(self,x):
 44 |         """ the image x contains x and x' to generate similairty"""
 45 |         if self.training:
 46 |             self.iter_num +=1
 47 |         x = self.feature_layers(x)
 48 |         x = x.view(x.size(0),-1)
 49 |         y = self.hash_layer(x) # just a linear transform
 50 |         if self.iter_num % self.step_size == 0:
 51 |             self.scale = self.init_scale*math.pow((1+self.gamma*self.iter_num),self.power)
 52 |         y = self.activation(self.scale*y)
 53 |         return y
 54 | 
 55 |     def ouput_num(self):
 56 |         return self.__in_features
 57 | 
 58 | class HashLoss(nn.Module):
 59 |     def __init__(self,hash_bit):
 60 |         super(HashLoss,self).__init__()
 61 |         self.hash_bit = hash_bit
 62 | 
 63 | 
 64 |     def forward(self,x,y,sigmoid_param = 1.0,l_threshold=15.0,class_num =1.0):
 65 |         """
 66 | 
 67 |         :param x:
 68 |         :param y:
 69 |         :param sigmoid_param:
 70 |         :param l_threshold:  the big dot_product use the limitation
 71 |         :param class_num: the imbalance data distribution
 72 |         :return:
 73 |         """
 74 |         total_size = x.shape[0]
 75 |         x1 = x.narrow(0,0,total_size//2)
 76 |         x2 = x.narrow(0,total_size//2,total_size//2) # narrow,dimension,start,length
 77 |         y1 = y.narrow(0,0,total_size//2)
 78 |         y2 = y.narrow(0,total_size//2,total_size//2)
 79 | 
 80 |         similarity = torch.mm(y1,y2.t())
 81 |         dot_product = sigmoid_param * torch.mm(x1,x2.t())
 82 |         exp_product = torch.exp(dot_product)
 83 | 
 84 |         mask_dot = dot_product.data>l_threshold
 85 |         mask_exp = dot_product.data<=l_threshold #dot_product  比较小时候，使用log（1+exp(x)) - sij <hi,hj>
 86 | 
 87 |         mask_positive = similarity.data>0
 88 |         mask_negative = similarity.data<=0
 89 | 
 90 |         mask_dp = mask_dot & mask_positive
 91 |         mask_dn = mask_dot & mask_negative
 92 |         mask_ep = mask_exp & mask_positive
 93 |         mask_en = mask_exp & mask_negative
 94 | 
 95 |         dot_loss = dot_product*(1-similarity) # dot_loss 是对exp_loss在dot_product比较大时候的近似，能让dot_loss =0,
 96 |         # 在dot_product 比较大时候使用x近似log（1+exp(x))
 97 |         exp_loss = torch.log(1+exp_product) - similarity*dot_product
 98 | 
 99 |         loss = (torch.sum(torch.mask_select(exp_loss,mask_ep))+
100 |                 torch.sum(torch.mask_select(dot_loss,mask_dp)))*class_num + torch.sum(torch.mask_select(exp_loss,mask_en))+torch.sum(torch.mask_select(dot_loss,mask_dn))
101 | 
102 |         loss = loss /(torch.sum(mask_positive.float())*class_num +torch.sum(mask_negative.float()))
103 |         return loss
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | if __name__=='__main__':
111 | 
112 |     base_resnet = HashNetRes50(n_bit=48)
113 |     x = torch.rand((10,3,224,224))
114 |     x = base_resnet(x)
115 |     print(x.shape)
116 | 
117 |     #base_resnet.zero_grad()
118 |     torch.save(nn.Sequential(base_resnet),"hashnet.pth.tar")
119 |     print("finished")
120 |     model = torch.load("hashnet.pth.tar")
121 |     print(model)


--------------------------------------------------------------------------------
/utils/vis_tsne_images.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from PIL import Image
  4 | from lapjv import lapjv
  5 | from sklearn.manifold import TSNE
  6 | from scipy.spatial.distance import cdist
  7 | import matplotlib as mlp
  8 | import matplotlib.pyplot as plt
  9 | import os
 10 | from tqdm import tqdm
 11 | from sklearn.cluster import KMeans
 12 | from sklearn.metrics import normalized_mutual_info_score
 13 | def load_img(file_list,in_dir):
 14 |     #pred_img = [f for f in os.listdir(in_dir) if os.path.isfile(os.path.join(in_dir, f))]
 15 |     pred_img =file_list
 16 |     img_collection = []
 17 |     for idx, img in enumerate(pred_img):
 18 |         img = os.path.join(in_dir, img)
 19 |         img_collection.append(Image.open(img))
 20 |     return img_collection
 21 | 
 22 | def save_tsne_grid(img_list, x2d, out_res,crop_size,in_dir):
 23 |     """
 24 |     plot all the images in X_2d pictures
 25 |     :param img_collection: the image
 26 |     :param X_2d: the point
 27 |     :param out_res: the output picture resolution
 28 |     :return:
 29 |     """
 30 |     out_img = np.ones((out_res+crop_size,out_res+crop_size,3),dtype='uint8')
 31 |     out_img = out_img*255
 32 | 
 33 |     i=0
 34 |     for img_path,point in tqdm(zip(img_list,x2d)):
 35 |         i +=1
 36 |         point = point*out_res
 37 |         px = int(point[0])
 38 |         py = int(point[1])
 39 |         img = Image.open(os.path.join(in_dir,img_path))
 40 |         img.thumbnail((crop_size,crop_size))
 41 | 
 42 |         a = np.array(img)
 43 | 
 44 |         try:
 45 |             h,w = a.shape[:2]
 46 |             if len(a.shape)==3:
 47 |                 out_img[py:py + h, px:px + w]= a
 48 |         except Exception as e:
 49 |             print(e)
 50 |             print(a.shape)
 51 |             print(img_path)
 52 |         # if i%5000==4999:
 53 |         #     tm = out_img.astype('uint8')
 54 |         #     tm_pl_img = Image.fromarray(tm)
 55 |         #     tm_pl_img.save('checkpoints/tsne_product_{0}.jpg'.format(i+1))
 56 | 
 57 |     out_img = out_img.astype('uint8')
 58 |     pl_img = Image.fromarray(out_img)
 59 |     pl_img.save('checkpoints/tsne_product.jpg')
 60 | 
 61 | 
 62 | def generate_tsne(activations):
 63 |     perplexity=30
 64 |     tsne = TSNE(perplexity=perplexity, n_components=2, init='random')
 65 |     X_2d = tsne.fit_transform(activations) # activations dtype is numpy.ndarray
 66 |     X_2d -= X_2d.min(axis=0)
 67 |     X_2d /= X_2d.max(axis=0)
 68 |     return X_2d
 69 | 
 70 | def visualize(im_files_list,features,data_dir,tsne_size=20000,crop_size=100):
 71 |     """
 72 |     visualize t-sne data
 73 |     :param im_files_list: image file list
 74 |     :param features: image features numpy.ndarray shape (n,512)
 75 |     :return:
 76 |     """
 77 |     print("dimension deduction from  features ...")
 78 |     feature_2d = generate_tsne(features)
 79 |     np.save('fashion.npy',feature_2d)
 80 |     #feature_2d = np.load('x2d.npy')
 81 |     print("build t-sne image ... ...")
 82 |     save_tsne_grid(im_files_list, feature_2d, tsne_size,crop_size,data_dir)
 83 | 
 84 | 
 85 | def nmi(gt_class,features):
 86 |     """
 87 |     normal mutual information,for features
 88 |     :param im_class: np.ndarray,  shape [n,1],dtype=np.int32
 89 |     :param features: image features to clustering ,numpy.ndarray [n,512]
 90 |     :return:
 91 |     """
 92 | 
 93 |     gt_class = gt_class - min(gt_class)
 94 |     n_cluster = len(set(gt_class)) #gt_class from 0 to n_cluster
 95 |     #convert
 96 |     st_class = set(gt_class)
 97 |     kv={}
 98 |     for k in st_class:
 99 |         kv[k]=len(kv)
100 |     gt_class = np.array([kv[k] for k in gt_class])
101 | 
102 |     model = KMeans(n_clusters=n_cluster)
103 |     Y=model.fit(features) # this would take 40 minutes
104 |     cl_class = Y.labels_
105 |     score = normalized_mutual_info_score(gt_class,cl_class)
106 |     print("the normal_mutal_info_score",score)
107 | 
108 | 
109 | 
110 | 
111 | def vis_ebay_n_pair():
112 |     """
113 |     read compute data and visualize t-sne picture,then comput nmi index 
114 |     """
115 |     features_file = 'checkpoints/online_product_compute.csv'
116 |     test_info_file =  'data/Stanford_Online_Products/Ebay_test.txt'
117 | 
118 |     vectors = None
119 |     features = pd.read_csv(features_file,header=None)
120 |     id_class = features.iloc[:,0:2]
121 |     id_class = np.array(id_class)
122 |     vectors = np.array(features.iloc[:,2:])
123 | 
124 |     image_id_path= pd.read_table(test_info_file, header=0, delim_whitespace=True)
125 |     file_list = np.array(image_id_path.path)
126 | 
127 |     visualize(file_list,vectors,'data/Stanford_Online_Products')
128 |     file_class = np.array(image_id_path.class_id)
129 |     file_class = file_class.astype(np.int32)
130 |     nmi(file_class,vectors)
131 | 
132 | 
133 | def vis_deep_fashon_margin():
134 |     feature_file = 'checkpoints/deepfashion.csv'
135 |     test_info_file = 'checkpoints/fashion_test.txt'
136 |     features = pd.read_csv(feature_file,header=None)
137 |     vectors = np.array(features.iloc[:,2:])
138 |     gt_class = np.array(features.iloc[:,1],dtype=np.int32)
139 |     image_path_id = pd.read_table(test_info_file,header=None,sep=',')
140 |     file_list = np.array(image_path_id.iloc[:,0])
141 |     #visualize(file_list,vectors,'data/DeepInShop')
142 |     nmi(gt_class,vectors)
143 | 
144 | 
145 | if __name__=='__main__':
146 |     vis_deep_fashon_margin()
147 | 
148 | 


--------------------------------------------------------------------------------
/data/mxdata/mxcub_simple.py:
--------------------------------------------------------------------------------
  1 | # an implementation of mxnet in vision data dataset similar like pytorch.
  2 | 
  3 | from mxnet.gluon.data import DataLoader,Dataset
  4 | from mxnet import nd
  5 | from mxnet.image import imread
  6 | 
  7 | import os
  8 | import numpy as np
  9 | import mxnet as mx
 10 | from mxnet.gluon import nn
 11 | import mxnet.gluon.data.vision.transforms as T
 12 | 
 13 | 
 14 | class RandomCrop(nn.Block):
 15 |     def __init__(self,size):
 16 |         self.size = size
 17 |     def forward(self,x):
 18 |         return mx.image.random_crop(x,(self.size,self.size))
 19 | 
 20 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
 21 | default_transform = T.Compose([
 22 |     T.Resize(256),
 23 |     T.RandomResizedCrop(size=224,scale=(1.0,1.0),ratio=(1.0,1.0)),# just crop,not scale
 24 |     T.RandomFlipLeftRight(),
 25 |     T.ToTensor(), # last to swap  channel to c,w,h
 26 |     normalize
 27 | ])
 28 | 
 29 | test_transform = T.Compose([
 30 |     T.Resize(256),
 31 |     T.CenterCrop(224),
 32 |     T.ToTensor(),
 33 |     normalize
 34 | ])
 35 | 
 36 | class CUB200Data(Dataset):
 37 |     def __init__(self,dir_path,batch_k,batch_size,is_train,transform = default_transform):
 38 |         self.dir_path = dir_path
 39 |         self.batch_k = batch_k
 40 |         self.batch_size = batch_size
 41 |         self._transform = transform
 42 |         self.is_train = is_train
 43 |         self.train_image_files = [ [] for _ in range(100)]
 44 |         self.test_images_files = [] # to store test image files
 45 |         self.test_labels = [] # to store test iamge and image label
 46 |         self.boxes = {} # to store image bounding box
 47 | 
 48 |         with open(os.path.join(dir_path,'images.txt'),'r') as f_img,\
 49 |             open(os.path.join(dir_path,'image_class_labels.txt'),'r') as f_label,\
 50 |             open(os.path.join(dir_path,'bounding_boxes.txt'),'r') as f_box:
 51 |             for line_img,line_label,line_box in zip(f_img,f_label,f_box):
 52 |                 fname = os.path.join(self.dir_path,'images',line_img.strip().split()[-1])
 53 |                 label = int(line_label.strip().split()[-1])-1
 54 |                 box = [int(float(v)) for v in line_box.split()[-4:]]
 55 |                 self.boxes[fname]=box
 56 | 
 57 |                 if label<100:
 58 |                     self.train_image_files[label].append(fname)
 59 |                 else:
 60 |                     self.test_images_files.append(fname)
 61 |                     self.test_labels.append(label)
 62 |         self.n_test = len(self.test_images_files)
 63 |         self.train_class_ids = list(np.arange(0,100)) #list(self.train_image_files.keys()) # get all train class id list
 64 | 
 65 |     def __len__(self):
 66 |         if self.is_train:
 67 |             return 200
 68 |         else:
 69 |             return self.n_test
 70 | 
 71 |     def __getitem__(self, index):
 72 |         """
 73 |         get the batch //batch_k for train and single for test
 74 |         """
 75 |         if self.is_train:
 76 |             image_names,labels = self.sample_train_batch()
 77 |             # get sampled order image_file names and corresponding label
 78 |             image_list,label_list=[],[]
 79 |             for img,label in zip(image_names,labels):
 80 |                 image = imread(img,flag=1,to_rgb=True)
 81 |                 x,y,w,h = self.boxes[img]
 82 |                 image = image[y:min(y+h,image.shape[0]),x:min(x+w,image.shape[1])]
 83 |                 if image.shape[2]==1:
 84 |                     print("has gray file",img)
 85 |                     image = nd.tile(image,(1,1,3))
 86 |                 image =self._transform(image) # for rgb same value
 87 |                 image_list.append(image)
 88 |                 label_list.append(label)
 89 |             batch_data = nd.stack(*image_list,axis=0)
 90 |             batch_label = nd.array(label_list)
 91 |             return batch_data,batch_label
 92 |         else:
 93 |             img = self.test_images_files[index] # get the file name full path
 94 |             image = imread(img,flag=1,to_rgb=1)
 95 |             x,y,w,h = self.boxes[img]
 96 |             image = image[y:min(y+h,image.shape[0]),x:min(x+w,image.shape[1])]
 97 |             image = self._transform(image)
 98 | 
 99 |             return image,self.test_labels[index]
100 | 
101 |     def sample_train_batch(self):
102 |         """sample batch_size//batch_k and sample small batch_k in each instance"""
103 |         batch = []
104 |         labels =[]
105 |         num_groups = self.batch_size // self.batch_k
106 |         sampleed_classes = np.random.choice(self.train_class_ids,num_groups,replace=False)
107 |         for class_id in sampleed_classes:
108 |             img_fname = np.random.choice(self.train_image_files[class_id],self.batch_k,replace=False)
109 |             batch += img_fname.tolist()
110 |             labels += [class_id]*self.batch_k
111 |         return batch,labels
112 | 
113 | 
114 | def getCUB200(data_path,batch_k,batch_size):
115 |     train_dataset = CUB200Data(data_path,batch_k=batch_k,batch_size=batch_size,is_train=True,transform=default_transform)
116 |     test_dataset = CUB200Data(data_path,batch_k=batch_k,batch_size=batch_size,is_train=False,transform=test_transform)
117 |     train_loader = DataLoader(train_dataset,batch_size=1,shuffle=False,num_workers=6)
118 |     test_loader = DataLoader(test_dataset,batch_size=batch_size,num_workers=6)
119 |     return train_loader,test_loader
120 | 
121 | 
122 | if __name__=='__main__':
123 |     import ipdb
124 |     #ipdb.set_trace()
125 |     train_loader, test_loader = getCUB200('data/CUB_200_2011',batch_k=5,batch_size=10)
126 |     # for train_batch,test_batch in zip(train_loader,test_loader):
127 |     #     print("begin to resolve data from train_loader and test_loader")
128 |     #     ipdb.set_trace()
129 |     #     print("data",train_batch[0][0].shape,train_batch[1][0].shape)
130 |     #     print("test_data",test_batch[0].shape,test_batch[1].shape)
131 |     #     break
132 |     train_dataset = CUB200Data('data/CUB_200_2011', batch_k=5, batch_size=10, is_train=True)
133 |     ipdb.set_trace()
134 |     data = train_dataset[0]
135 |     print(data)
136 |     test_dataset = CUB200Data('data/CUB_200_2011',batch_k=5,batch_size=10,is_train=False)
137 |     data = test_dataset[0]
138 |     print(data)
139 |     # for test_batch in test_loader:
140 |     #     ipdb.set_trace()
141 |     #     print(test_batch[0].shape,test_batch[1].shape)
142 |     #     break
143 | 
144 | 
145 | 
146 | 
147 | 


--------------------------------------------------------------------------------
/train_hash.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.optim as optim
  3 | import argparse
  4 | from data import get_hash_dataloader
  5 | from models import HashNetRes50,HashLoss
  6 | from utils import Visulizer
  7 | import torch.nn as nn
  8 | import os
  9 | import numpy as np
 10 | from pprint import pprint
 11 | args = argparse.ArgumentParser()
 12 | args.add_argument('--gpus',type=str,default='0',help="gpus ids")
 13 | args.add_argument('--dataset',type=str,default='coco',help='the dataset name in coco,nus_wide imagent')
 14 | args.add_argument('--hash_bit',type=int,default=48,help='the hash bit of deephashing output')
 15 | args.add_argument('--iter_nums',type=int,default=10000,help='the max train iter')
 16 | args.add_argument('--train_batch',type=int,default=32,help='the train batch_size')
 17 | args.add_argument('--lr',type=float,default=0.0001,help='the train learning rate')
 18 | args.add_argument('--class_num',type=float,default=1.0,help='the imbalance ratio')
 19 | args.add_argument('--viz_env',type=str,default='cocohash',help='the visdom env name')
 20 | args.add_argument('--log_interval',type=int,default=20,help='the loss print log interval')
 21 | args.add_argument('--snapshot_interval',type=int,default=3000,help='the snapshot archive model interval')
 22 | args.add_argument('--test_interval',type=int,default=500,help='the test hash search interval')
 23 | 
 24 | 
 25 | def test_model(model,test_loader,database_loader,viz):
 26 |     def code_predict(net,loader):
 27 |         code = []
 28 |         label = []
 29 |         for data in loader:
 30 |             x,y = data
 31 |             if torch.cuda.is_available():
 32 |                 x = x.cuda()
 33 |             x = model(x)
 34 |             code.append(x.cpu())
 35 |             label.append(y)
 36 |         code = torch.cat(code,dim=0)
 37 |         code = torch.sign(code) # the quantization sign function
 38 |         label = torch.cat(label,dim=0)
 39 |         return code.numpy(),label.numpy()
 40 |     test_code,test_label = code_predict(model,test_loader)
 41 |     database_code,database_label = code_predict(model,database_loader)
 42 | 
 43 |     #compute the mean average precision--namely map
 44 |     query_num = test_code.shape[0]
 45 |     sim = np.dot(database_code, test_code.T)
 46 |     ids = np.argsort(-sim, axis=0)
 47 |     APx = []
 48 | 
 49 |     for i in range(query_num):
 50 |         label = test_label[i, :]
 51 |         label[label == 0] = -1
 52 |         idx = ids[:, i]
 53 |         imatch = np.sum(database_label[idx[0:R], :] == label, axis=1) > 0
 54 |         relevant_num = np.sum(imatch)
 55 |         Lx = np.cumsum(imatch)
 56 |         Px = Lx.astype(float) / np.arange(1, R + 1, 1)
 57 |         if relevant_num != 0:
 58 |             APx.append(np.sum(Px * imatch) / relevant_num)
 59 |     mAP = np.mean(np.array(APx))
 60 |     viz.plot("mAP",str(mAP))
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | if __name__=='__main__':
 68 |     config={}
 69 |     ags = args.parse_args()
 70 |     config['gpus']=int(ags.gpus)
 71 |     os.environ['CUDA_VISIBLE_DEVICES']=ags.gpus
 72 |     config['dataset']=ags.dataset
 73 |     config['hash_bit'] = ags.hash_bit
 74 |     config['iter_nums']= ags.iter_nums
 75 |     config['train_batch'] = ags.train_batch
 76 |     config['lr']=ags.lr
 77 |     config['log_interval'] = ags.log_interval
 78 |     config['snapshot_interval'] = ags.snapshot_interval
 79 |     config['test_interval'] = ags.test_interval
 80 |     config['viz_env'] = ags.viz_env
 81 | 
 82 | 
 83 | 
 84 |     #program setting
 85 |     config['weight_decay']=0.0005
 86 | 
 87 |     config["optimiz_params"] = {"lr": config['lr'], "momentum": 0.9, "weight_decay": 0.0005, "nesterov": True}
 88 |     config['lr_scheduler']={"gamma":0.5, "step":2000}
 89 |     config["loss"] = {"l_weight": 1.0, "q_weight": 0,
 90 |                       "l_threshold": 15.0, "sigmoid_param": 10. / config["hash_bit"],
 91 |                       "class_num": ags.class_num}
 92 | 
 93 |     pprint(config) # print the config data
 94 |     #prepare model and dataset
 95 |     model = HashNetRes50(n_bit=config['hash_bit'])
 96 |     criteria = HashLoss(hash_bit=config['hash_bit'])
 97 | 
 98 |     train1,train2,test_loader,database_loader = get_hash_dataloader(config['dataset'],config['train_batch'],
 99 |                                                       config['train_batch']//2,config['train_batch']//2)
100 |     if torch.cuda.is_available():
101 |         model = model.cuda()
102 |     params_list = [{"params":model.feature_layers.parameters(),'lr':1},
103 |               {"params":model.hash_layer.parameters(),'lr':10}]
104 |     optimizer = optim.SGD(params_list,lr=config['lr'],momentum=0.9,weight_decay=config['weight_decay'],nesterov=True)
105 |     lr_schedualer = optim.lr_scheduler.StepLR(optimizer,step_size=2000,
106 |                                               gamma=0.5,last_epoch=-1)
107 | 
108 |     viz = Visulizer(host='http://hpc3.yud.io',port=8088,env=config['viz_env'])
109 |     viz.log("start the hash learning")
110 |     viz.log(config)
111 |     len_train = len(train1)
112 |     train_loss = 0
113 |     for it in range(config['iter_nums']):
114 | 
115 |         lr_schedualer.step()
116 |         if it % len_train==0:
117 |             iter1 = iter(train1)
118 |             iter2 = iter(train2)
119 |         train_part1 = iter1.next()
120 |         train_part2 = iter2.next() # same train data two different shuffle
121 | 
122 |         x1,y1 = train_part1
123 |         x2,y2 = train_part2
124 |         if torch.cuda.is_available():
125 |             x1 = x1.cuda()
126 |             x2 = x2.cuda()
127 |             y1 = y1.cuda()
128 |             y2 = y2.cuda()
129 |         inputs = torch.cat((x1,x2),dim=0)
130 |         labels = torch.cat((y1,y2),dim=0)
131 |         outputs = model(inputs)
132 |         loss = criteria(outputs,labels,sigmoid_param=config["loss"]["sigmoid_param"], \
133 |                              l_threshold=config["loss"]["l_threshold"], \
134 |                              class_num=config["loss"]["class_num"])
135 |         loss.backward()
136 |         train_loss += loss.item()
137 |         if (it+1)%config['log_interval']==0:
138 |             print("Iter: {:05d}, loss: {:.3f}".format(it,train_loss/config['log_interval']))
139 |             train_loss =0
140 |         optimizer.step()
141 | 
142 |         if it%config['snapshot_interval'] ==0:
143 |             torch.save(nn.Sequential(model),
144 |                        './checkpoints/resnet_{0}_{1}_{2}.pth.tar'.format(config['dataset'],config['hash_bit'],it))
145 |         if it%config['test_interval']==0:
146 |             test_model(model,test_loader,database_loader) # to validate the efficiency of hash code
147 |     viz.log("finish train model")
148 | 
149 | 


--------------------------------------------------------------------------------
/models/sample_dml.py:
--------------------------------------------------------------------------------
  1 | """
  2 | this is a model explemenation for "sampling matters in deep embeding learning" paper
  3 | in this paper,we take model resnet-50 as base model to extract 128 dimension discriminative features,and then use distance weighted sampling ,
  4 | along with margin-based loss as loss function.
  5 | """
  6 | 
  7 | import torch
  8 | from torchvision.models import resnet50
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | from .vgg_classify import BaseModule
 12 | from collections import OrderedDict
 13 | import numpy as np
 14 | 
 15 | class SampleModel(BaseModule):
 16 |     """
 17 |     this is the resnet-50 based model
 18 |     """
 19 |     def __init__(self,embeding_dim = 128):
 20 |         super(SampleModel,self).__init__()
 21 |         basic_model = resnet50(pretrained=True)
 22 | 
 23 |         feature = list(basic_model.named_children())[:-1]
 24 |         self.base_model = nn.Sequential(OrderedDict(feature))
 25 |         self.dense = nn.Linear(in_features=2048,out_features=128)
 26 | 
 27 | 
 28 |     def forward(self,x):
 29 |         """
 30 |         extract the 128 dimension feature,for x is a batch of image data with every batch_k image is same images
 31 |         :param x: batch of images in data type of torch Tensor(N,C,W,H) of (70,3,224,224)
 32 |         :return: feature of images,in data type of torch Tensor(N,D) of [70,128]
 33 |         """
 34 |         res_feature = self.base_model(x)
 35 |         res_feature = res_feature.view(res_feature.size(0),-1)
 36 |         embeding = self.dense(res_feature)
 37 |         embeding = F.normalize(embeding,p=2,dim=1)
 38 |         return embeding
 39 | 
 40 | 
 41 | class Margin_Loss(nn.Module):
 42 |     """ the margin losss contain the distane weighted sampling and margin based loss,
 43 |     sampling and margin loss compute based on paper 'Sampling Matters in Deep Embedding Learning' """
 44 |     def __init__(self,batch_k=5,margin=0.2,nu=0.0,cutoff=0.5,nonzero_loss_cutoff=1.4):
 45 |         """
 46 |         this loss function receive batch of image_feature,then compute the distance weighted sampling loss
 47 |         :param batch_k: images count for every class
 48 |         :param margin: margin for alpha in paper
 49 |         :param nu: regularization parameter for beta
 50 |         """
 51 |         super(Margin_Loss,self).__init__()
 52 |         self.margin = torch.tensor(margin,dtype=torch.float32)
 53 |         self.nu = torch.tensor(nu,dtype=torch.float32)
 54 |         self.batch_k = batch_k
 55 |         self.cutoff = cutoff                           # to cut for probbality
 56 |         self.nonzero_loss_cutoff = nonzero_loss_cutoff # to cut the distance upper bound
 57 |         self.relu1 = torch.nn.ReLU()
 58 |         self.relu2 = torch.nn.ReLU()
 59 | 
 60 |     def convert_param(self,to_cuda=True):
 61 |         """
 62 |         convert parameter margin and nu coeff to cuda or to cpu
 63 |         :param to_cuda:
 64 |         :return:
 65 |         """
 66 |         if to_cuda:
 67 |             self.margin = self.margin.cuda()
 68 |             self.nu = self.nu.cuda()
 69 |         else:
 70 |             self.margin = self.margin.cpu()
 71 |             self.nu = self.nu.cpu()
 72 | 
 73 | 
 74 | 
 75 |     def forward(self,x,y,beta_in):
 76 |         """
 77 | 
 78 |         :param x: x is the feature extracted from resnet,data type torch.tensor,data.shape (n.d) typical (70,128)
 79 |         :param y: the label for each small class range from 0-200,so as the same dimension of beta_in
 80 |         :param beta_in: beta_in is a torch variable (tensor) with require_grad = True
 81 |         :return: the loss of beta_reg_loss and margin loss
 82 |         """
 83 |         a_index,p_index,n_index = self.sampling(x) # so the corresponding anchor,postive and negitve has belong the distance weighted distribution
 84 |         beta_work =  beta_in[a_index] # get the coeffient of the beta data
 85 |         beta_reg_loss = torch.sum(beta_work)*self.nu   # loss batckward valid
 86 | 
 87 |         # compute margin loss from feature
 88 |         anchors = x[a_index]
 89 |         postives = x[p_index]
 90 |         negtives = x[n_index]
 91 |         d_ap = torch.sqrt(torch.sum((anchors - postives)*(anchors - postives),1)+1e-8)
 92 |         d_an = torch.sqrt(torch.sum((anchors - negtives)*(anchors - negtives),1)+1e-8)
 93 | 
 94 |         pos_loss = self.relu1(d_ap - beta_work + self.margin)
 95 |         neg_loss = self.relu2(beta_work - d_an + self.margin)
 96 |         pair_cnt = torch.sum((pos_loss>0.0) +(neg_loss>0.0))
 97 | 
 98 |         # normalize based on the number of pairs
 99 |         loss = (torch.sum(pos_loss + neg_loss) + beta_reg_loss)/ pair_cnt.float()#pair_cnt.numpy()[0]
100 |         return loss
101 | 
102 | 
103 |     def sampling(self,x):
104 |         """
105 |         sampling images pairs based on distance of each images
106 |         :param x: x is the [N,128] tensor of the extracted features
107 |         :return: anchors,postives,negtives
108 |         """
109 |         np_feature = x.cpu().detach().numpy()
110 |         k = self.batch_k
111 |         n,d = np_feature.shape
112 | 
113 |         # compute distance
114 |         dis_matrix = self.get_distance(np_feature)
115 | 
116 |         # cut off to avoid hight variance
117 |         dis_matrix = np.maximum(dis_matrix,self.cutoff)
118 | 
119 |         log_weights = ((2.0 - float(d)) * np.log(dis_matrix)
120 |                        - (float(d-3)/2)*np.log(1.0-0.25*(dis_matrix**2)))
121 |         #weights = np.exp(log_weights - log_weights.max(1).reshape(-1,1)) #log_weights-log_weights.max(1).reshape(-1,1), every line subtract the max weight number ,not the total number
122 |         weights = np.exp(log_weights - log_weights.max())
123 |         mask = np.ones(weights.shape)
124 |         for i in range(0,n,k):
125 |             mask[i:i+k,i:i+k] = 0 # to set block in indentity line surrounding box is 0
126 | 
127 |         weights = weights * mask *(dis_matrix<self.nonzero_loss_cutoff)
128 |         weights = weights / weights.sum(axis=1,keepdims=True)
129 | 
130 |         a_index ,p_index,n_index = [],[],[]
131 |         for i in range(n):
132 |             block_idx = i//k    # k is self.batch_k,typical is 5
133 |             try:
134 |                 n_index += np.random.choice(n,k-1,p= weights[i]).tolist()
135 |             except:
136 |                 n_index += np.random.choice(n,k-1).tolist()
137 |             for j in range(block_idx*k,(block_idx+1)*k):
138 |                 if j != i:
139 |                     a_index.append(i)
140 |                     p_index.append(j)
141 | 
142 |         return a_index,p_index,n_index
143 | 
144 |     def get_distance(self,x):
145 |         """
146 |         compute the distance between every two feature vector, construct a distance matrix shaped [N,N] for same vecotr the distance set 1 instead
147 |         :param x: feature vector of N samples
148 |         :return: distance of two feature,set 1 for same vector
149 |         """
150 |         n = x.shape[0]
151 |         square = np.sum(x*x,axis=1,keepdims = True)
152 |         distance_square = square + square.transpose() - (2*np.matmul(x,x.transpose()))
153 |         distance_identity = distance_square + np.identity(n)
154 |         return np.sqrt(distance_identity)
155 | 


--------------------------------------------------------------------------------
/data/mxdata/mxcub200.py:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one
  2 | # or more contributor license agreements.  See the NOTICE file
  3 | # distributed with this work for additional information
  4 | # regarding copyright ownership.  The ASF licenses this file
  5 | # to you under the Apache License, Version 2.0 (the
  6 | # "License"); you may not use this file except in compliance
  7 | # with the License.  You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing,
 12 | # software distributed under the License is distributed on an
 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 14 | # KIND, either express or implied.  See the License for the
 15 | # specific language governing permissions and limitations
 16 | # under the License.
 17 | 
 18 | import os
 19 | import random
 20 | 
 21 | import numpy as np
 22 | 
 23 | import mxnet as mx
 24 | from mxnet import nd
 25 | 
 26 | def transform(data, target_wd, target_ht, is_train, box):
 27 |     """Crop and normnalize an image nd array."""
 28 |     if box is not None:
 29 |         x, y, w, h = box
 30 |         data = data[y:min(y+h, data.shape[0]), x:min(x+w, data.shape[1])]
 31 | 
 32 |     # Resize to target_wd * target_ht.
 33 |     data = mx.image.imresize(data, target_wd, target_ht)
 34 | 
 35 |     # Normalize in the same way as the pre-trained model.
 36 |     data = data.astype(np.float32) / 255.0
 37 |     data = (data - mx.nd.array([0.485, 0.456, 0.406])) / mx.nd.array([0.229, 0.224, 0.225])
 38 | 
 39 |     if is_train:
 40 |         if random.random() < 0.5:
 41 |             data = nd.flip(data, axis=1)
 42 |         data, _ = mx.image.random_crop(data, (224, 224))
 43 |     else:
 44 |         data, _ = mx.image.center_crop(data, (224, 224))
 45 | 
 46 |     # Transpose from (target_wd, target_ht, 3)
 47 |     # to (3, target_wd, target_ht).
 48 |     data = nd.transpose(data, (2, 0, 1))
 49 | 
 50 |     # If image is greyscale, repeat 3 times to get RGB image.
 51 |     if data.shape[0] == 1:
 52 |         data = nd.tile(data, (3, 1, 1))
 53 |     return data.reshape((1,) + data.shape)
 54 | 
 55 | 
 56 | class CUB200Iter(mx.io.DataIter):
 57 |     """Iterator for the CUB200-2011 dataset.
 58 |     Parameters
 59 |     ----------
 60 |     data_path : str,
 61 |         The path to dataset directory.
 62 |     batch_k : int,
 63 |         Number of images per class in a batch.
 64 |     batch_size : int,
 65 |         Batch size.
 66 |     batch_size : tupple,
 67 |         Data shape. E.g. (3, 224, 224).
 68 |     is_train : bool,
 69 |         Training data or testig data. Training batches are randomly sampled.
 70 |         Testing batches are loaded sequentially until reaching the end.
 71 |     """
 72 |     def __init__(self, data_path, batch_k, batch_size, data_shape, is_train):
 73 |         super(CUB200Iter, self).__init__(batch_size)
 74 |         self.data_shape = (batch_size,) + data_shape
 75 |         self.batch_size = batch_size
 76 |         self.provide_data = [('data', self.data_shape)]
 77 |         self.batch_k = batch_k
 78 |         self.is_train = is_train
 79 | 
 80 |         self.train_image_files = [[] for _ in range(100)]
 81 |         self.test_image_files = []
 82 |         self.test_labels = []
 83 |         self.boxes = {}
 84 |         self.test_count = 0
 85 | 
 86 |         with open(os.path.join(data_path, 'images.txt'), 'r') as f_img, \
 87 |              open(os.path.join(data_path, 'image_class_labels.txt'), 'r') as f_label, \
 88 |              open(os.path.join(data_path, 'bounding_boxes.txt'), 'r') as f_box:
 89 |             for line_img, line_label, line_box in zip(f_img, f_label, f_box):
 90 |                 fname = os.path.join(data_path, 'images', line_img.strip().split()[-1])
 91 |                 label = int(line_label.strip().split()[-1]) - 1
 92 |                 box = [int(float(v)) for v in line_box.split()[-4:]]
 93 |                 self.boxes[fname] = box
 94 | 
 95 |                 # Following "Deep Metric Learning via Lifted Structured Feature Embedding" paper,
 96 |                 # we use the first 100 classes for training, and the remaining for testing.
 97 |                 if label < 100:
 98 |                     self.train_image_files[label].append(fname)
 99 |                 else:
100 |                     self.test_labels.append(label)
101 |                     self.test_image_files.append(fname)
102 | 
103 |         self.n_test = len(self.test_image_files)
104 | 
105 |     def get_image(self, img, is_train):
106 |         """Load and transform an image."""
107 |         img_arr = mx.image.imread(img)
108 |         img_arr = transform(img_arr, 256, 256, is_train, self.boxes[img])
109 |         return img_arr
110 | 
111 |     def sample_train_batch(self):
112 |         """Sample a training batch (data and label)."""
113 |         batch = []
114 |         labels = []
115 |         num_groups = self.batch_size // self.batch_k
116 | 
117 |         # For CUB200, we use the first 100 classes for training.
118 |         sampled_classes = np.random.choice(100, num_groups, replace=False)
119 |         for i in range(num_groups):
120 |             img_fnames = np.random.choice(self.train_image_files[sampled_classes[i]],
121 |                                           self.batch_k, replace=False)
122 |             batch += [self.get_image(img_fname, is_train=True) for img_fname in img_fnames]
123 |             labels += [sampled_classes[i] for _ in range(self.batch_k)]
124 | 
125 |         return nd.concatenate(batch, axis=0), labels
126 | 
127 |     def get_test_batch(self):
128 |         """Sample a testing batch (data and label)."""
129 | 
130 |         batch_size = self.batch_size
131 |         batch = [self.get_image(self.test_image_files[(self.test_count*batch_size + i)
132 |                                                       % len(self.test_image_files)],
133 |                                 is_train=False) for i in range(batch_size)]
134 |         labels = [self.test_labels[(self.test_count*batch_size + i)
135 |                                    % len(self.test_image_files)] for i in range(batch_size)]
136 |         return nd.concatenate(batch, axis=0), labels
137 | 
138 |     def reset(self):
139 |         """Reset an iterator."""
140 |         self.test_count = 0
141 | 
142 |     def next(self):
143 |         """Return a batch."""
144 |         if self.is_train:
145 |             data, labels = self.sample_train_batch()
146 |         else:
147 |             if self.test_count * self.batch_size < len(self.test_image_files):
148 |                 data, labels = self.get_test_batch()
149 |                 self.test_count += 1
150 |             else:
151 |                 self.test_count = 0
152 |                 raise StopIteration
153 |         return mx.io.DataBatch(data=[data], label=[labels])
154 | 
155 | def cub200_iterator(data_path, batch_k, batch_size, data_shape):
156 |     """Return training and testing iterator for the CUB200-2011 dataset."""
157 |     return (CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=True),
158 |             CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=False))
159 | 


--------------------------------------------------------------------------------
/data/n_pair_mc/npair_dataset.py:
--------------------------------------------------------------------------------
  1 | import torchvision.transforms as T
  2 | #from configs import opt
  3 | from torch.utils.data import Dataset
  4 | import os
  5 | import csv
  6 | import fnmatch
  7 | from PIL import Image
  8 | import numpy as np
  9 | import pandas as pd
 10 | import torch
 11 | 
 12 | #normalize = T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
 13 | normalize = T.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5, 0.5, 0.5])
 14 | default_transform  = T.Compose([
 15 |          T.Resize(256),
 16 |          T.RandomCrop(227),
 17 |          T.RandomHorizontalFlip(),
 18 |          T.ToTensor(),
 19 |          normalize,
 20 | ])
 21 | 
 22 | class EbayDataset(Dataset):
 23 |     """this is an implementation of n-pair mc dataset from paper
 24 |     Improved Deep Metric Learning with Multi-class N-pair Loss """
 25 |     def __init__(self,dir_root=None,
 26 |                  train=True,
 27 |                  batch_size=32,
 28 |                  trans = default_transform):
 29 |         """
 30 | 
 31 |         :param dir_root: online product dir
 32 |         :param train: if train get data with pair else return each image with label
 33 |         :param persist: Ebay_test.txt and Ebay_train.txt to read basic data
 34 |         :param batch_size: pair*2 ,if batch_size =32,the pairsize is 16
 35 |         :param trans: data transformation
 36 |         """
 37 |         self.batch_size = batch_size
 38 |         self.root = dir_root
 39 |         self.train = train
 40 |         self.transform = trans
 41 | 
 42 |         file_name =os.path.join(self.root,'Ebay_train.txt')
 43 |         if not self.train:
 44 |             file_name = os.path.join(self.root, 'Ebay_test.txt')
 45 |         self.data = pd.read_table(file_name,header=0,delim_whitespace=True)
 46 |         min_super_id,max_super_id = min(self.data.super_class_id),max(self.data.super_class_id)
 47 |         self.super_ids =np.arange(min_super_id,max_super_id+1)
 48 |         self.super2class={}
 49 |         for super_id in self.super_ids:
 50 |             self.super2class[super_id]=self.data[self.data.super_class_id==super_id].class_id.tolist()
 51 | 
 52 |         self.all_class = list(set(self.data.class_id.tolist()))
 53 |         self.classid2imageid = {}
 54 |         for class_id in self.all_class:
 55 |             group_image_id = self.data[self.data.class_id==class_id].image_id.tolist()
 56 |             if len(group_image_id)>=2:
 57 |                 self.classid2imageid[class_id]=group_image_id #one group must have more than 2 images
 58 |         self.image_nums = self.data.image_id.count()
 59 | 
 60 |     def __len__(self):
 61 |         """the lengh and data loader recycle size"""
 62 |         if self.train:
 63 |             return len(self.all_class) # 11318
 64 |         else:
 65 |             return self.image_nums
 66 | 
 67 |     def __getitem__(self, index):
 68 |         """get pair size pair data with index
 69 |         when using dataloader ,the batchsize is always 1
 70 |         for train model:
 71 |             the index is class_id ,so this will select a batch of different class type to construct a n-pair
 72 |         for test model:
 73 |             the index is image_id,so this will get one picture with it's image_id and class_id,the extracted feature will send to cluster
 74 |         """
 75 |         if self.train:
 76 |             class_id = self.all_class[index]
 77 |             super_id = self.data[self.data.class_id==1].super_class_id[0]
 78 |             anchor_class=[]
 79 |             anchor_class.append(class_id)
 80 |             innder_count = int(0.9* self.batch_size//2) # image pair of different class in same super class
 81 | 
 82 |             inner_class = np.random.choice(self.super2class[super_id], innder_count, False) # in same super class choose most
 83 |             anchor_class.extend(inner_class)
 84 |             anchor_class = list(set(anchor_class))# duplicate repeate
 85 | 
 86 |             outer_count = self.batch_size//2 - len(anchor_class)
 87 |             outer_class = np.random.choice(self.super_ids,outer_count,True)
 88 |             for outer_id in outer_class:
 89 |                 anchor_class.extend(np.random.choice(self.super2class[outer_id],1))
 90 | 
 91 |             #from each anchor_class,select the anchor image and the postive image
 92 |             image_id =[]
 93 |             for anchor_id in anchor_class:
 94 |                 select = np.random.choice(self.classid2imageid[anchor_id],2,False)
 95 |                 image_id.extend(select)
 96 | 
 97 | 
 98 |             anchor_path = self.data[self.data.image_id.isin(image_id)][['image_id', 'path']]
 99 |             anchor_path.sort_index(0) # sort by the first colum index id
100 |             # to stack image in a to construct to one bulk. first construct 32 image to a numpy ndarray,
101 |             tensor_list=[]
102 |             tensor_p=[]
103 |             jump = False
104 |             for i,image_path in enumerate(anchor_path.path):
105 |                 image = Image.open(os.path.join(self.root,image_path)).convert('RGB')
106 |                 if self.transform:
107 |                     data = self.transform(image)
108 |                 if i%2==0:
109 |                     if data.size(0)<3:   # the anchor image channel not 3
110 |                         jump = True  # jump the next image
111 |                         continue
112 |                     jump = False
113 |                     tensor_list.append(data)
114 |                 else:
115 |                     if jump:
116 |                         continue
117 |                     if data.size(0)<3: # the pair iamge channel not 3
118 |                         tensor_list.pop(-1) # delete the last one in tensor_list
119 |                         continue
120 |                     tensor_p.append(data)
121 | 
122 | 
123 |             tensor_list.extend(tensor_p)
124 |             #print("tensor dataset",len(tensor_list))
125 |             batch_tensor = torch.stack(tensor_list,dim=0)
126 |             return batch_tensor
127 |         else:
128 |             item = self.data.loc[index]
129 |             image_path = item['path']
130 |             image_id = int(item['image_id'])
131 |             image_class= int(item['class_id'])
132 |             default_path = self.data.loc[0]['path']
133 |             image = Image.open(os.path.join(self.root, image_path)).convert('RGB')
134 |             if self.transform:
135 |                 data = self.transform(image)
136 |                 if data.size(0)<3:
137 |                     image = Image.open(os.path.join(self.root, default_path))
138 |                     data = self.transform(image)
139 |                     image_id =0
140 |                     image_class =0
141 |             return data,image_id,image_class
142 | 
143 | 
144 | 
145 | if __name__=='__main__':
146 |     """ to test the dataset"""
147 |     import ipdb
148 |     ipdb.set_trace()
149 |     root = '/data/jh/notebooks/hudengjun/DML/deep_metric_learning/lib/online_products/Stanford_Online_Products/'
150 |     dataset = EbayDataset(dir_root=root)
151 |     data = dataset[0]
152 |     print(type(data))
153 | 
154 |     test_dataset  = EbayDataset(dir_root=root,train=False)
155 |     data = test_dataset[0]
156 |     print(data)
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/train_mc_npair.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.optim as optim
  3 | from data import EbayDataset
  4 | import os
  5 | from configs import opt
  6 | from models import ModGoogLeNet,NpairLoss
  7 | from torch.utils.data import DataLoader
  8 | from tqdm import tqdm
  9 | from utils import Visulizer
 10 | import csv
 11 | import numpy as np
 12 | 
 13 | def train(**kwargs):
 14 |     print("run train")
 15 |     opt.parse(kwargs)
 16 |     os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.gpu_id)
 17 | 
 18 |     if opt.debug:
 19 |         import ipdb
 20 |         ipdb.set_trace()
 21 |     model =ModGoogLeNet(embeding_size=opt.embeding_size)
 22 |     if opt.dml_model_path:
 23 |         model.load(opt.dml_model_path)
 24 |     if opt.use_gpu:
 25 |         model = model.cuda()
 26 |     #model.freeze_model(level=opt.freeze_level)
 27 | 
 28 |     if opt.use_viz:
 29 |         viz = Visulizer(host=opt.vis_host,port=opt.vis_port,env='dml'+opt.vis_env)
 30 |         viz.log("start to train dml npair mc model")
 31 | 
 32 |     #loss function
 33 |     criterion = NpairLoss(l2_reg=opt.l2_reg)
 34 |     lr = opt.lr
 35 |     m = opt.momentum
 36 |     optimizer = optim.SGD([{'params':model.level1_2.parameters()},
 37 |                            {'params': model.level_3_4.parameters()},
 38 |                            {'params': model.level_5_6.parameters()},
 39 |                            {'params': model.level_7.parameters()},
 40 |                            {'params':model.fc.parameters(),'lr':10*lr}],lr=lr,momentum=m)
 41 |     #optimizer = optim.SGD(model.parameters(),lr=lr,momentum=m)
 42 |     lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,1,0.2)
 43 | 
 44 |     # data and dataloader
 45 |     train_data = EbayDataset(dir_root=opt.ebay_dir, train=True, batch_size=opt.batch_size)
 46 |     cycle_length = len(train_data)
 47 |     #val_data = EbayDataset(dir_root=opt.ebay_dir, train=False, batch_size=opt.batch_size)
 48 |     train_dataloader = DataLoader(train_data, batch_size=1, shuffle=True, num_workers=opt.num_workers)
 49 |     #val_dataloader = DataLoader(val_data, batch_size=60, shuffle=False, num_workers=opt.num_workers)
 50 | 
 51 |     print("dataloader setted ,begin to train")
 52 | 
 53 |     #f = open('dml_log.out','w')
 54 |     for epoch in range(opt.max_epoch):
 55 |         lr_scheduler.step()
 56 |         train_loss = 0
 57 | 
 58 |         for i,data in enumerate(train_dataloader):
 59 |             # if i in [200, 800, 1500]:
 60 |             #     lr_scheduler.step()
 61 | 
 62 |             data = data[0]
 63 |             if opt.use_gpu:
 64 |                 data = data.cuda()
 65 |             optimizer.zero_grad()
 66 |             feature = model(data)
 67 |             batch_size = data.size(0)
 68 |             target = torch.arange(0, int(batch_size / 2), dtype=torch.int64).cuda()
 69 |             loss  = criterion(feature,target)
 70 |             loss.backward()
 71 |             optimizer.step()
 72 | 
 73 |             train_loss += loss.item()
 74 |             freq = int(opt.print_freq)
 75 |             if i%freq==(freq-1):
 76 |                 average_loss = train_loss /opt.print_freq
 77 |                 #f.write("iteration:{0},dml_loss:{1}\n".format(i+ epoch*cycle_length,average_loss))
 78 |                 #f.flush()
 79 |                 if opt.use_viz:
 80 |                     viz.plot('dml_loss',average_loss)
 81 |                 train_loss =0
 82 |             if opt.debug:
 83 |                 break
 84 |         #f.write("epoch:{0} finished,begin to valid test".format(epoch))
 85 |         model.save()
 86 |         # if epoch>1 and epoch%5==0:
 87 |         #     val(model,val_dataloader,epoch)
 88 |         if opt.debug:
 89 |             #f.write("finish one iter")
 90 |             break
 91 |     #f.write("finish train epoch {0}".format(opt.max_epoch))
 92 |     #f.close()
 93 | 
 94 | 
 95 | def val(model,dataloder,epoch):
 96 |     """
 97 |     this val model will calculate the nmi index.normal mutual information
 98 |     :param model: the emebding model
 99 |     :param dataloder: val dataloder
100 |     :return:
101 |     """
102 |     # prepare file model to extract feature
103 |     file_name = 'checkpoints/online_product_{0}.csv'.format(epoch)
104 |     f = open(file_name,'w')
105 |     writer = csv.writer(f,dialect='excel')
106 |     model.eval()
107 |     # feature extreat,fisrt for all image,image_id,class_id extract the feature vector
108 |     for i,(data,image_id,class_id) in enumerate(dataloder):
109 |         if opt.use_gpu:
110 |             data = data.cuda()
111 |         feature = model(data) # the feature is [batch,512] vector
112 |         vector = feature.cpu().detach().numpy() if opt.use_gpu else feature.numpy()
113 |         image_id = image_id.numpy().reshape(-1,1)
114 |         class_id = class_id.numpy().reshape(-1,1)
115 |         result = np.hstack(image_id,class_id,vector)
116 | 
117 |         #write the data to dataframe file
118 |         writer.writerows(result)
119 |         if opt.debug:
120 |             print("test one batch of val data and save to csv file")
121 |             break
122 |     f.close()
123 |     # clustering to centriod, second, for all image_id,and corresponding feature vector,using kmeans cluster to fixed |class_id|
124 |     #featuredata = pd.read_csv(file_name,header=None)
125 | 
126 |     # for distribution from origin and cluster distribution.compute the nmi by sklearn metric nmi
127 | 
128 | 
129 |     model.train()
130 |     print("finished cluster and evalution")
131 | 
132 | 
133 | def compute(**kwargs):
134 |     print("run compute_vector")
135 |     opt.parse(kwargs)
136 |     os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.gpu_id)
137 | 
138 |     if opt.debug:
139 |         import ipdb
140 |         ipdb.set_trace()
141 |     model =ModGoogLeNet(embeding_size=opt.embeding_size)
142 |     if opt.dml_model_path:
143 |         model.load(opt.dml_model_path)
144 |     if opt.use_gpu:
145 |         model = model.cuda()
146 | 
147 |     val_data = EbayDataset(dir_root=opt.ebay_dir, train=False, batch_size=opt.batch_size)
148 |     val_dataloader = DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers)
149 | 
150 |     file_name = 'checkpoints/online_product_compute.csv'
151 |     f = open(file_name, 'w')
152 |     writer = csv.writer(f, dialect='excel')
153 |     model.eval()
154 |     # feature extreat,fisrt for all image,image_id,class_id extract the feature vector
155 |     for i, (data, image_id, class_id) in enumerate(val_dataloader):
156 |         if opt.use_gpu:
157 |             data = data.cuda()
158 |         feature = model(data)  # the feature is [batch,512] vector
159 |         vector = feature.cpu().detach().numpy() if opt.use_gpu else feature.numpy()
160 |         image_id = image_id.numpy().reshape(-1, 1)
161 |         class_id = class_id.numpy().reshape(-1, 1)
162 |         result = np.hstack([image_id, class_id, vector])
163 | 
164 |         # write the data to dataframe file
165 |         writer.writerows(result)
166 |         if opt.debug:
167 |             print("test one batch of val data and save to csv file")
168 |             break
169 |     f.close()
170 | 
171 | 
172 | def help():
173 |     """print function use information"""
174 |     print("""this file help to train product train:
175 |     exanple --:
176 |           python train_mc_npair.py help
177 |           python train_mc_npair.py train --gpu_id=3 --debug=True
178 |           python train_mc_npair.py train --gpu_id=2 --batch_size=72
179 |           python train_mc_npair.py train --gpu_id=3 --lr=0.0003 --batch_size=72
180 |           python train_mc_npair.py train --gpu_id=0 --debug=True --dml_model_path=checkpoints/DMLGoogle_0710_20\:24\:04.pth
181 |           python train_mc_npair.py train --batch_size=120 --gpu_id=3 --lr=0.0001  --debug=True --dml_model_path=checkpoints/DMLGoogle_0710_20\:24\:04.pth
182 |           python train_mc_npair.py compute --batch_size=300 --gpu_id=2 --dml_model_path=checkpoints/DMLGoogle_0714_07:51:44.pth --num_workers=6
183 | """)
184 | 
185 | if __name__=='__main__':
186 |     import fire
187 |     fire.Fire()


--------------------------------------------------------------------------------
/data/mxdata/online_products.py:
--------------------------------------------------------------------------------
  1 | from mxnet.image import *
  2 | from mxnet.gluon.data import Dataset,DataLoader
  3 | from mxnet.image import *
  4 | import numpy as np
  5 | import mxnet as mx
  6 | from mxnet.gluon import nn
  7 | import mxnet.gluon.data.vision.transforms as T
  8 | import pandas as pd
  9 | 
 10 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
 11 | default_transform = T.Compose([
 12 |     T.Resize(256),
 13 |     T.RandomResizedCrop(size=224,scale=(1.0,1.0),ratio=(1.0,1.0)),# just crop,not scale
 14 |     T.RandomFlipLeftRight(),
 15 |     T.ToTensor(), # last to swap  channel to c,w,h
 16 |     normalize
 17 | ])
 18 | 
 19 | test_transform = T.Compose([
 20 |     T.Resize(256),
 21 |     T.CenterCrop(224),
 22 |     T.ToTensor(),
 23 |     normalize
 24 | ])
 25 | 
 26 | class MxEbayInClass(Dataset):
 27 |     """this is an mxnet edition of Ebay dataset"""
 28 |     def __init__(self,dir_root,batch_k=4,batch_size=40,is_train=True,transform =default_transform):
 29 |         self.batch_size=batch_size
 30 |         self.batch_k = batch_k
 31 |         self.root = dir_root
 32 |         self._trans = transform
 33 |         self.is_train = is_train
 34 | 
 35 |         self.test_image_files =[]
 36 |         self.test_labels =[]
 37 |         self.train_length = 0
 38 | 
 39 |         if self.is_train:
 40 |             table_name = os.path.join(self.root,'Ebay_train.txt')
 41 |             table_data = pd.read_table(table_name, header=0, delim_whitespace=True)
 42 |             min_super_id, max_super_id = min(table_data.super_class_id), max(table_data.super_class_id)
 43 |             self.super_ids = np.arange(min_super_id, max_super_id + 1)
 44 |             self.super2class = {}
 45 |             for super_id in self.super_ids:
 46 |                 self.super2class[super_id] = table_data[table_data.super_class_id == super_id].class_id.tolist()
 47 | 
 48 |             min_class_id,max_class_id = min(table_data.class_id),max(table_data.class_id)
 49 |             self.class_ids = list(np.arange(min_class_id,max_class_id+1))
 50 |             self.train_length = max_class_id+1-min_class_id
 51 |             self.super_id_dist = [len(v) for k,v in self.super2class.items()]
 52 |             total = sum(self.super_id_dist)
 53 |             self.super_id_dist = [v*1.0/total for v in self.super_id_dist]
 54 |             self.class2imagefiless = [[]] # placeholder for class_id = 0
 55 |             for class_id in self.class_ids:
 56 |                 one_class_paths = table_data[table_data.class_id==class_id].path.tolist() # type list
 57 |                 self.class2imagefiless.append(one_class_paths)
 58 |         else:
 59 |             table_name = os.path.join(self.root,'Ebay_test.txt')
 60 |             table_data = pd.read_table(table_name,header=0,delim_whitespace=True)
 61 | 
 62 |             self.test_image_files =  table_data.path.tolist()
 63 |             self.test_labels = table_data.class_id.tolist()
 64 | 
 65 | 
 66 | 
 67 |     def __len__(self):
 68 |         if self.is_train:
 69 |             return 800
 70 |         else:
 71 |             return 4000
 72 | 
 73 |     def sample_train_batch(self):
 74 |         batch =[]
 75 |         labels =[]
 76 |         num_groups = self.batch_size // self.batch_k  # for every sample count k
 77 |         super_id = np.random.choice(list(self.super2class.keys()), size=1,p=self.super_id_dist)[0]  # the super class id
 78 |         sampled_class = np.random.choice(self.super2class[super_id], num_groups*2, replace=False)
 79 |         for i in sampled_class:
 80 |             try:
 81 |                 img_fnames = np.random.choice(self.class2imagefiless[i],
 82 |                                               self.batch_k,
 83 |                                               replace=False)
 84 |             except Exception as e: # just has not enough data to choose
 85 |                 continue
 86 |             batch += img_fnames.tolist()
 87 |             labels += [i]*self.batch_k
 88 |             if len(batch)>=self.batch_size:
 89 |                 break
 90 |         return batch,labels
 91 | 
 92 | 
 93 |     def __getitem__(self, index):
 94 |         """get data batch like pytorch,
 95 |         only smaple same super class_id,not cross sample"""
 96 |         if self.is_train:
 97 |             imagelist =[]
 98 |             batch,labels = self.sample_train_batch()
 99 |             for file in batch:
100 |                 file_path = os.path.join(self.root,file)
101 |                 img = image.imread(file_path,to_rgb=1,flag=1)
102 |                 img = self._trans(img)
103 |                 imagelist.append(img)
104 |             return nd.stack(*imagelist,axis=0),nd.array(labels)
105 |         else:
106 |             file = self.test_image_files[index]
107 |             label = self.test_labels[index]
108 |             img = image.imread(os.path.join(self.root,file),flag=1,to_rgb=1)
109 |             img = self._trans(img)
110 |             return img,label
111 | 
112 | 
113 | 
114 | def getEbayInClassData(root,batch_k,batch_size):
115 |     train_dataset = MxEbayInClass(root,batch_k=batch_k,batch_size=batch_size,is_train=True,transform=default_transform)
116 |     test_dataset = MxEbayInClass(root,batch_k=batch_k,batch_size=batch_size,is_train=False,transform=test_transform)
117 |     train_loader = DataLoader(train_dataset,batch_size=1,shuffle=False,num_workers=6)
118 |     test_loader = DataLoader(test_dataset,batch_size=test_dataset.batch_size,shuffle=False,num_workers=6)
119 |     return train_loader,test_loader
120 | 
121 | 
122 | class MxEbayCrossClass(MxEbayInClass):
123 |     """the cross class edition of StanfordOnlineProducts"""
124 |     def __init__(self,dir_root,batch_k=4,batch_size=40,is_train=True,transform =default_transform):
125 |         super(MxEbayCrossClass,self).__init__(dir_root=dir_root,batch_k=batch_k,batch_size=batch_size,is_train=is_train,transform=transform)
126 |         self.datatype="CrossClass"
127 | 
128 |     def sample_train_batch(self):
129 |         """rewrite the sample strategy"""
130 |         batch = []
131 |         labels = []
132 |         num_groups = self.batch_size // self.batch_k  # for every sample count k
133 | 
134 |         #directly choose the class_id
135 |         sampled_class = np.random.choice(self.class_ids, num_groups * 2, replace=False)
136 |         for i in sampled_class:
137 |             try:
138 |                 img_fnames = np.random.choice(self.class2imagefiless[i],
139 |                                               self.batch_k,
140 |                                               replace=False)
141 |             except:
142 |                 print("class id:{0},instance count small than {1}".format(i, self.batch_k))
143 |                 continue
144 |             batch += img_fnames.tolist()
145 |             labels += [i] * self.batch_k
146 |             if len(batch) >= self.batch_size:
147 |                 break
148 |         return batch, labels
149 | 
150 | def getEbayCrossClassData(root,batch_k,batch_size):
151 |     train_dataset = MxEbayCrossClass(root, batch_k=batch_k, batch_size=batch_size, is_train=True, transform=default_transform)
152 |     test_dataset = MxEbayCrossClass(root, batch_k=batch_k, batch_size=batch_size, is_train=False, transform=test_transform)
153 |     train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=6)
154 |     test_loader = DataLoader(test_dataset, batch_size=test_dataset.batch_size, shuffle=False, num_workers=6)
155 |     return train_loader, test_loader
156 | 
157 | if __name__=='__main__':
158 |     # construct the dataset and get data in train and test mode
159 | 
160 |     train_data = MxEbayInClass(dir_root='data/Stanford_Online_Products',\
161 |                         batch_k=4,batch_size=40,is_train=True,\
162 |                         transform=default_transform)
163 | 
164 |     data = train_data[0]
165 | 
166 |     train_crossdata = MxEbayCrossClass(dir_root='data/Stanford_Online_Products',\
167 |                         batch_k=4,batch_size=40,is_train=True,\
168 |                         transform=default_transform)
169 |     data2 = train_crossdata[0]
170 |     import ipdb
171 |     ipdb.set_trace()
172 |     test_data = MxEbayInClass(dir_root='data/Stanford_Online_Products',\
173 |                         batch_k=4,batch_size=40,is_train=False,\
174 |                         transform=test_transform)
175 |     data = test_data[0]
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 


--------------------------------------------------------------------------------
/train_margin_cub.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | import logging
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | from torch.utils.data import DataLoader
  8 | import os
  9 | from data import CUB200DataSet
 10 | from models import Margin_Loss,SampleModel
 11 | 
 12 | 
 13 | parser = argparse.ArgumentParser(description="train a margin based loss model")
 14 | parser.add_argument('--data_path',type=str,default="data/cub200_2011",
 15 |                     help='path of the cub_data')
 16 | parser.add_argument('--embed_dim',type=int,default=128,
 17 |                     help='dimensionality of image embeding,times of 8')
 18 | parser.add_argument('--batch_size',type=int,default=70,
 19 |                     help='training batch size per device')
 20 | parser.add_argument('--batch_k',type=int,default=5,
 21 |                     help='number of images per class in a batch,can be divided by batch_size')
 22 | parser.add_argument('--gpu_id',type=str,default='0',
 23 |                     help='the gpu_id of the runing batch')
 24 | parser.add_argument('--epochs',type=int,default=100,
 25 |                     help='number of training epochs,default is 100')
 26 | parser.add_argument('--optimizer',type=str,default='adam',
 27 |                     help='optimizer,default is adam')
 28 | parser.add_argument('--lr',type=float,default=0.0001,
 29 |                     help='learning rate of the resnet and dense layer')
 30 | parser.add_argument('--lr_beta',type=float,default=0.1,
 31 |                     help='learning rate for the beta in margin based loss')
 32 | parser.add_argument('--margin',type=float,default=0.2,
 33 |                     help='margin for the margin based loss,default is 0.2')
 34 | parser.add_argument('--beta',type=float,default=1.2,
 35 |                     help='the class specific beta parameter')
 36 | parser.add_argument('--nu',type=float,default=0.0,
 37 |                     help='regularization parameter for beta,default is 0')
 38 | parser.add_argument('--steps',type=str,default='30,50,100,300',
 39 |                     help='epochs to updata learning rate')
 40 | parser.add_argument('--wd',type=float,default=0.0001,
 41 |                     help='weight decay rate,default is 0.0001')
 42 | parser.add_argument('--seed',type=int,default=123,
 43 |                     help='random seed to use,default=123')
 44 | parser.add_argument('--factor',type=float,default=0.5,
 45 |                     help='learning rate schedule factor,default is 0.5')
 46 | parser.add_argument('--print_freq',type=int,default=20,
 47 |                     help='print the accumulate loss for training process')
 48 | parser.add_argument('--debug',action='store_true',default=False)
 49 | 
 50 | 
 51 | opt = parser.parse_args()
 52 | logging.info(opt)
 53 | torch.random.manual_seed(opt.seed)
 54 | np.random.seed(opt.seed)
 55 | batch_size = opt.batch_size
 56 | os.environ['CUDA_VISIBLE_DEVICES']=opt.gpu_id
 57 | steps = [int(step) for step in opt.steps.split(',')]
 58 | 
 59 | 
 60 | def train():
 61 |     """
 62 |     train the margin based loss model
 63 |     :return:
 64 |     """
 65 |     # prepare for data for loader
 66 |     train_data = CUB200DataSet(data_path='data/cub200_2011/CUB_200_2011',batch_k=opt.batch_k,batch_size = opt.batch_size,is_train=True)
 67 |     test_data = CUB200DataSet(data_path='data/cub200_2011/CUB_200_2011',is_train=False)
 68 | 
 69 |     train_loader = DataLoader(train_data,batch_size=1,shuffle=False,num_workers=6)
 70 |     test_loader = DataLoader(test_data,batch_size=60,shuffle=False,num_workers=6)
 71 | 
 72 |     #begin to set model loss,optimizer,lr_rate, lr_schedule
 73 |     model = SampleModel(embeding_dim=opt.embed_dim)
 74 |     beta = torch.tensor(np.ones(100)*opt.beta, requires_grad=True,dtype=torch.float32)
 75 | 
 76 |     loss_criterion = Margin_Loss(batch_k=opt.batch_k,\
 77 |                                  margin=opt.margin,nu=opt.nu) # set loss function for this model
 78 | 
 79 |     conv_params = []
 80 |     non_conv_param =[]
 81 |     for name,param in model.base_model.named_parameters():
 82 |         if 'conv' in name:
 83 |             conv_params.append({'params':param,'lr':opt.lr*0.01})
 84 |         else:
 85 |             non_conv_param.append({'params':param,'lr':opt.lr})
 86 |     total_param =[]
 87 |     total_param.append({'params':model.dense.parameters(),'lr':opt.lr})
 88 |     total_param.extend(conv_params)
 89 |     total_param.extend(non_conv_param)
 90 |     #optimizer = torch.optim.Adam(total_param,lr=opt.lr,weight_decay=opt.wd)
 91 |     optimizer = torch.optim.SGD(total_param,lr=opt.lr,momentum=0.89)
 92 |     optimizer_beta = torch.optim.SGD([{'params':beta}],lr=opt.lr_beta,momentum= 0.9)
 93 | 
 94 | 
 95 |     lr_schedule = torch.optim.lr_scheduler.MultiStepLR(optimizer,
 96 |                                                        milestones=steps,gamma=opt.factor)
 97 | 
 98 |     if int(opt.gpu_id)>=0:
 99 |         model = model.cuda()  # the loss function has paramter to convey to cuda
100 |         beta = beta.cuda() # the beta parameter has parameter to stored in cuda
101 |         loss_criterion = loss_criterion.cuda()  # the loss criterion has compute in cuda
102 |         loss_criterion.convert_param(to_cuda=True)
103 | 
104 |     # begin to fetch data and train model
105 |     for epoch in range(opt.epochs):
106 |         print("begin to train epochs:{0}",epoch)
107 |         cumulative_loss =0
108 |         prev_loss = 0
109 |         lr_schedule.step()
110 |         for i,data in enumerate(train_loader):
111 |             images,label = data[0][0],data[1][0]
112 |             if int(opt.gpu_id)>=0:
113 |                 images = images.cuda()
114 |                 label = label.cuda()
115 |             features = model(images)
116 |             loss = loss_criterion(features,label,beta)
117 |             loss.backward()
118 |             optimizer.step()
119 |             optimizer_beta.step()
120 |             cumulative_loss += loss.item()
121 |             if (i+1)%(opt.print_freq)==0:
122 |                 print("[Epoch %d,Iter %d] training loss=%f"%(epoch,i+1,cumulative_loss-prev_loss))
123 |                 prev_loss = cumulative_loss
124 |             if opt.debug:
125 |                 break
126 | 
127 |         print("[Epoch %d] trainin loss =%f"%(epoch,cumulative_loss))
128 |         # print test val recall index
129 |         names,val_accs = val_model(model,test_loader)
130 |         for name,val_acc in zip(names,val_accs):
131 |             print("Epoch %d,validation:%s=%f"%(epoch,name,val_acc))
132 |     print("job finished")
133 | 
134 | 
135 | def val_model(model,test_loader):
136 |     """
137 |     val the model,return the recall@K k=1 index
138 |     :param model: Margin based model to extract feature of 128 dimension
139 |     :param test_loader: Test dataloader to load images data
140 |     :return: the recall@K k=1 index
141 |     """
142 |     model.eval()
143 |     outputs = []
144 |     labels =[]
145 |     with torch.no_grad():
146 |         for data,label in test_loader:
147 |             if int(opt.gpu_id)>=0:
148 |                 data = data.cuda()
149 |             feature = model(data)
150 |             outputs += feature.detach().cpu().numpy().tolist()
151 |             labels  += label.numpy().tolist()
152 |         model.train()
153 | 
154 |     #eval recall@k
155 |     features = np.array(outputs)
156 |     labels = np.array(labels)
157 | 
158 |     return evaluate_emb(features,labels)
159 | 
160 | def evaluate_emb(features,labels):
161 |     """
162 |     evaluate embedding in recall
163 |     :param features:
164 |     :param labels:
165 |     :return:
166 |     """
167 |     d_mat = get_distance_matrix(features)
168 |     names =[]
169 |     accs =[]
170 |     for k in [1,2,4,8,16]:
171 |         names.append('Recall@%d'%k)
172 |         correct,cnt = 0.0,0.0
173 |         for i in range(features.shape[0]):
174 |             d_mat[i,i]=1e10
175 |             nns = d_mat[i].argpartition(k)[:k]
176 |             if any(labels[i] ==labels[nn] for nn in nns):
177 |                 correct +=1
178 |             cnt +=1
179 |         accs.append(correct/cnt)
180 |     return names,accs # names is a list of ["Recall@K",,,,] accs is a list of [float_value]
181 | 
182 | 
183 | 
184 | def get_distance_matrix(x):
185 |     """
186 |     compute the distance matirx of features,
187 |     :param x: np.ndarray in shape (n,d) d is 128
188 |     :return: distance matrix of [n,n] for distance in each vector
189 |     """
190 |     squrare = np.sum(x*x,axis=1,keepdims=True)
191 |     distance_squrare = squrare + squrare.transpose() -2*np.dot(x,x.transpose())
192 |     return distance_squrare
193 | 
194 | 
195 | 
196 | 
197 | if __name__=='__main__':
198 |     print("begin to train the model of margin based loss")
199 |     train()
200 | 


--------------------------------------------------------------------------------
/server/copy_nn.py:
--------------------------------------------------------------------------------
  1 | # import pymongo
  2 | # import mxnet
  3 | # from mxnet import nd
  4 | #
  5 | # #every time yield 20 items and read iobytes extract feature then insert to new nnindex
  6 | #
  7 | # import asyncio
  8 | # import aiohttp
  9 | # from io import BytesIO
 10 | # import time
 11 | # import requests
 12 | #
 13 | #
 14 | # @asyncio.coroutine
 15 | # def get_image(img_url):
 16 | #     resp = yield from requests.get(img_url)
 17 | #     return resp.content
 18 | #
 19 | # def save_image(img,fobj):
 20 | #     fobj.write(img)
 21 | #
 22 | # @asyncio.coroutine
 23 | # def download_one(img_url,fobj):
 24 | #     image = yield from get_image(img_url)
 25 | #     save_image(image,fobj)
 26 | 
 27 | # !/usr/bin/env python
 28 | # import asyncio
 29 | # import aiohttp
 30 | #
 31 | # async def fetch_img(session, url):
 32 | #     with aiohttp.Timeout(10):
 33 | #         async with session.get(url) as response:
 34 | #             assert response.status == 200
 35 | #             return await response.read()
 36 | #
 37 | # loop = asyncio.get_event_loop()
 38 | # with aiohttp.ClientSession(loop=loop) as session:
 39 | #     img = loop.run_until_complete(
 40 | #         fetch_img(session, 'https://cdn.aidigger.com/images/instagram/f95f00da22a2e143e6e457b10544a120.jpeg'))
 41 | #     with open("img.png", "wb") as f:
 42 | #         f.write(img)
 43 | 
 44 | # if __name__ == '__main__':
 45 | #     url_list = ['https://cdn.aidigger.com/images/instagram/e2452f9daaad3ef7070adb22ee70958a.jpeg',
 46 | #                 'https://cdn.aidigger.com/images/instagram/bd717eaa4c351b842a497e8907b69855.jpeg',
 47 | #                 'https://cdn.aidigger.com/images/instagram/189a2af5d9661500b32271ca9b1865be.jpeg',
 48 | #                 'https://cdn.aidigger.com/images/instagram/6e70c94dd3fac214c5d7e6c061df2b2f.jpeg',
 49 | #                 'https://cdn.aidigger.com/images/instagram/f95f00da22a2e143e6e457b10544a120.jpeg']
 50 | #     fobj_list =[BytesIO() for _ in range(len(url_list))]
 51 | #     start = time.time()
 52 | #     loop = asyncio.get_event_loop()
 53 | #     to_do_tasks = [download_one(url,f) for url,f in zip(url_list,fobj_list)]
 54 | #     res,= loop.run_until_complete(asyncio.wait(to_do_tasks))
 55 | #     print(len(res))
 56 | #     print(time.time()-start)
 57 | 
 58 | 
 59 | import asyncio
 60 | import logging
 61 | from contextlib import closing
 62 | import aiohttp # $ pip install aiohttp
 63 | from io import BytesIO
 64 | from PIL import Image
 65 | import numpy as np
 66 | from pymongo import MongoClient
 67 | from mxnet import nd
 68 | import mxnet as mx
 69 | import mxnet.gluon.data.vision.transforms as T
 70 | import mxnet.gluon.model_zoo.vision as vision_model
 71 | from models import MarginNet
 72 | import mxnet
 73 | from mxnet.image import imread
 74 | 
 75 | logging.basicConfig(level=logging.WARNING, format='%(asctime)s %(message)s')
 76 | import requests
 77 | import json
 78 | import binascii
 79 | import numpy as np
 80 | from pymongo import MongoClient
 81 | from requests import ReadTimeout
 82 | from pprint import pprint
 83 | 
 84 | 
 85 | 
 86 | 
 87 | #image transform
 88 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
 89 | test_transform = T.Compose([
 90 |     T.Resize(256),
 91 |     T.CenterCrop(224),
 92 |     T.ToTensor(),
 93 |     normalize
 94 | ])
 95 | 
 96 | # define mongodb connect
 97 | def get_db():
 98 |     mongdb={}
 99 |     mongdb['host']='cc.com'
100 |     mongdb['port']=3717
101 |     client=MongoClient(host=mongdb['host'],port=mongdb['port'])
102 |     dev=client.get_database('dev')
103 |     dev.authenticate(name='cc',password='cc')
104 |     return dev
105 | 
106 | 
107 | @asyncio.coroutine
108 | def download(url, session, semaphore, chunk_size=1<<15):
109 |     with (yield from semaphore): # limit number of concurrent downloads
110 |         file = BytesIO()
111 |         logging.info('downloading %s', file)
112 |         response = yield from session.get(url)
113 |         with closing(response):
114 |             while True: # save file
115 |                 chunk = yield from response.content.read(chunk_size)
116 |                 if not chunk:
117 |                     break
118 |                 file.write(chunk)
119 |         logging.info('done %s', file)
120 |     return file, (response.status, tuple(response.headers.items()))
121 | 
122 | def get_net(gpu_id):
123 |     param_path = 'checkpoints/Fashion_In.params'
124 |     base_net = vision_model.get_model('resnet50_v2')
125 |     net = MarginNet(base_net.features, 128, batch_k=5)
126 |     context = [mxnet.gpu(gpu_id)]
127 |     net.initialize()
128 |     net.collect_params().reset_ctx(context)
129 |     net.load_parameters(filename=param_path, ctx=context[0])
130 |     return net,context
131 | 
132 | def get_cursor(db,collection_name,batch_size):
133 |     #define source nn_prod data fetch
134 |     nn_prod = db.get_collection(collection_name)
135 |     cursor = nn_prod.find({},{'vector':0,},batch_size=batch_size)
136 |     return cursor
137 | 
138 | def get_target_colection(db):
139 |     colletion_name = 'image_metric_taobao128'
140 |     target_collection = db.get_collection(colletion_name)
141 |     return target_collection
142 | 
143 | 
144 | def convert_vector_to_ascii(vector):
145 |     """convert a numpy array or a list to bytes, and to make it can be dumped by json, we convert the bytes to string
146 |     """
147 |     if isinstance(vector, (list, np.ndarray, np.generic)):
148 |         vector = np.asarray(vector, dtype=np.float32)
149 |     else:
150 |         raise ValueError("vector must be list or numpy array")
151 |     # add decode to convert base64 bytes to string
152 |     return binascii.b2a_base64(vector.tobytes()).decode()
153 | 
154 | def get_nn_config(model_name ='image_metric_taobao128'):
155 | 
156 |     host = 'https://alpha-nnsearch.aidigger.com/api/v1/'
157 |     path = 'model/'+model_name+'/'
158 |     return host,path
159 | 
160 | # begin to set basic paramter
161 | batch_size=20
162 | urls= []
163 | records = []
164 | db = get_db()
165 | cursor = get_cursor(db,'image_nn_prod',batch_size)
166 | net,context = get_net(0)
167 | host,path = get_nn_config('image_metric_taobao128')
168 | # set basic parameter finished
169 | 
170 | targe_collection = get_target_colection(db)
171 | 
172 | loop = asyncio.get_event_loop()
173 | session = aiohttp.ClientSession()
174 | semaphore = asyncio.Semaphore(20)
175 | 
176 | for item in cursor:
177 |     if len(urls)==batch_size:
178 |         #process
179 |         #with closing(asyncio.get_event_loop()) as loop, closing(aiohttp.ClientSession()) as session:
180 |         try:
181 |             download_tasks = (download(url, session, semaphore) for url in urls)
182 |             result = loop.run_until_complete(asyncio.gather(*download_tasks))
183 |         except Exception as e:
184 |             print(e)
185 |             urls = []
186 |             records = []
187 |             continue
188 | 
189 |         nd_img_list = []
190 |         succeed_ids = []
191 |         docs = []
192 |         for i,(f_ret,rec) in enumerate(zip(result,records)):
193 |             try:
194 |                 pil_img = Image.open(f_ret[0])
195 |                 nd_img_list.append(test_transform(nd.array(np.asarray(pil_img))))
196 |                 new_rec = {}
197 |                 new_rec['_id'] = rec['_id']
198 |                 new_rec['_int_id'] = rec['int_id']
199 |                 new_rec.update(rec['_source'])
200 |                 docs.append(new_rec)
201 |             except Exception as e:
202 |                 print(urls[i])
203 |                 print(e)
204 | 
205 | 
206 |         #nd_img_list = [test_transform(nd.array(np.asarray(Image.open(f_ret[0])))) for f_ret in result ]
207 |         if len(nd_img_list)!=len(records) or len(nd_img_list)< 2:
208 |             if len(nd_img_list)<2:
209 |                 print(urls[0])
210 |                 print("caution,failed to download all pictures")
211 |                 print(result[0][1][0],result[0][1][1])
212 | 
213 |             records.clear()
214 |             urls.clear()
215 |             docs.clear()
216 |             for f_ret in result:
217 |                 try:
218 |                     if not f_ret[0].closed:
219 |                         f_ret[0].close()
220 |                 except Exception as e:
221 |                     print(e)
222 |             continue
223 | 
224 |         nd_tensor_img = nd.stack(*nd_img_list,axis=0)
225 |         nd_tensor_img = nd_tensor_img.as_in_context(context[0])
226 |         data = net.extract(nd_tensor_img)
227 |         data = data.asnumpy()
228 | 
229 | 
230 | 
231 |         doc_types =['image']*len(records)
232 |         vectors = [convert_vector_to_ascii(v) for v in data ]
233 | 
234 |         ret = requests.post(host + path + "add/batch", json={"docs": docs, "doc_types": doc_types, "vectors": vectors})
235 |         print(ret.json())
236 | 
237 |         #for annother loop
238 |         doc_types=[]
239 |         vectors =[]
240 |         doc_types=[]
241 |         records = []
242 |         urls=[]
243 |         for f_ret in result:
244 |             try:
245 |                 if not f_ret[0].closed:
246 |                     f_ret[0].close()
247 |             except Exception as e:
248 |                 print(e)
249 |     else:
250 |         records.append(item)
251 |         urls.append(item['_source']['cdn_url'])
252 | 
253 | 
254 | 
255 | 


--------------------------------------------------------------------------------
/models/mx_margin_model.py:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one
  2 | # or more contributor license agreements.  See the NOTICE file
  3 | # distributed with this work for additional information
  4 | # regarding copyright ownership.  The ASF licenses this file
  5 | # to you under the Apache License, Version 2.0 (the
  6 | # "License"); you may not use this file except in compliance
  7 | # with the License.  You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing,
 12 | # software distributed under the License is distributed on an
 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 14 | # KIND, either express or implied.  See the License for the
 15 | # specific language governing permissions and limitations
 16 | # under the License.
 17 | 
 18 | 
 19 | from mxnet import gluon
 20 | from mxnet.gluon import nn, Block, HybridBlock
 21 | import numpy as np
 22 | 
 23 | class L2Normalization(HybridBlock):
 24 |     r"""Applies L2 Normalization to input.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     mode : str
 29 |         Mode of normalization.
 30 |         See :func:`~mxnet.ndarray.L2Normalization` for available choices.
 31 | 
 32 |     Inputs:
 33 |         - **data**: input tensor with arbitrary shape.
 34 | 
 35 |     Outputs:
 36 |         - **out**: output tensor with the same shape as `data`.
 37 |     """
 38 |     def __init__(self, mode, **kwargs):
 39 |         self._mode = mode
 40 |         super(L2Normalization, self).__init__(**kwargs)
 41 | 
 42 |     def hybrid_forward(self, F, x):
 43 |         return F.L2Normalization(x, mode=self._mode, name='l2_norm')
 44 | 
 45 |     def __repr__(self):
 46 |         s = '{name}({_mode})'
 47 |         return s.format(name=self.__class__.__name__,
 48 |                         **self.__dict__)
 49 | 
 50 | 
 51 | def get_distance(F, x):
 52 |     """Helper function for margin-based loss. Return a distance matrix given a matrix."""
 53 |     n = x.shape[0]
 54 | 
 55 |     square = F.sum(x ** 2.0, axis=1, keepdims=True)
 56 |     distance_square = square + square.transpose() - (2.0 * F.dot(x, x.transpose()))
 57 | 
 58 |     # Adding identity to make sqrt work.
 59 |     return F.sqrt(distance_square + F.array(np.identity(n)))
 60 | 
 61 | class DistanceWeightedSampling(HybridBlock):
 62 |     r"""Distance weighted sampling. See "sampling matters in deep embedding learning"
 63 |     paper for details.
 64 | 
 65 |     Parameters
 66 |     ----------
 67 |     batch_k : int
 68 |         Number of images per class.
 69 | 
 70 |     Inputs:
 71 |         - **data**: input tensor with shape (batch_size, embed_dim).
 72 |         Here we assume the consecutive batch_k examples are of the same class.
 73 |         For example, if batch_k = 5, the first 5 examples belong to the same class,
 74 |         6th-10th examples belong to another class, etc.
 75 | 
 76 |     Outputs:
 77 |         - a_indices: indices of anchors.
 78 |         - x[a_indices]: sampled anchor embeddings.
 79 |         - x[p_indices]: sampled positive embeddings.
 80 |         - x[n_indices]: sampled negative embeddings.
 81 |         - x: embeddings of the input batch.
 82 |     """
 83 |     def __init__(self, batch_k, cutoff=0.5, nonzero_loss_cutoff=1.4, **kwargs):
 84 |         self.batch_k = batch_k
 85 |         self.cutoff = cutoff
 86 | 
 87 |         # We sample only from negatives that induce a non-zero loss.
 88 |         # These are negatives with a distance < nonzero_loss_cutoff.
 89 |         # With a margin-based loss, nonzero_loss_cutoff == margin + beta.
 90 |         self.nonzero_loss_cutoff = nonzero_loss_cutoff
 91 |         super(DistanceWeightedSampling, self).__init__(**kwargs)
 92 | 
 93 |     def hybrid_forward(self, F, x):
 94 |         k = self.batch_k
 95 |         n, d = x.shape
 96 | 
 97 |         distance = get_distance(F, x)
 98 |         # Cut off to avoid high variance.
 99 |         distance = F.maximum(distance, self.cutoff)
100 | 
101 |         # Subtract max(log(distance)) for stability.
102 |         log_weights = ((2.0 - float(d)) * F.log(distance)
103 |                        - (float(d - 3) / 2) * F.log(1.0 - 0.25 * (distance ** 2.0)))
104 |         weights = F.exp(log_weights - F.max(log_weights))
105 | 
106 |         # Sample only negative examples by setting weights of
107 |         # the same-class examples to 0.
108 |         mask = np.ones(weights.shape)
109 |         for i in range(0, n, k):
110 |             mask[i:i+k, i:i+k] = 0
111 |         mask_uniform_probs = mask * (1.0/(n-k))
112 | 
113 |         weights = weights * F.array(mask) * (distance < self.nonzero_loss_cutoff)
114 |         weights_sum = F.sum(weights, axis=1, keepdims=True)
115 |         weights = weights / weights_sum
116 | 
117 |         a_indices = []
118 |         p_indices = []
119 |         n_indices = []
120 | 
121 |         np_weights = weights.asnumpy()
122 |         for i in range(n):
123 |             block_idx = i // k
124 | 
125 |             if weights_sum[i] != 0:
126 |                 n_indices += np.random.choice(n, k-1, p=np_weights[i]).tolist()
127 |             else:
128 |                 # all samples are above the cutoff so we sample uniformly
129 |                 n_indices += np.random.choice(n, k-1, p=mask_uniform_probs[i]).tolist()
130 |             for j in range(block_idx * k, (block_idx + 1) * k):
131 |                 if j != i:
132 |                     a_indices.append(i)
133 |                     p_indices.append(j)
134 | 
135 |         return a_indices, x[a_indices], x[p_indices], x[n_indices], x
136 | 
137 |     def __repr__(self):
138 |         s = '{name}({batch_k})'
139 |         return s.format(name=self.__class__.__name__,
140 |                         **self.__dict__)
141 | 
142 | 
143 | class MarginNet(Block):
144 |     r"""Embedding network with distance weighted sampling.
145 |     It takes a base CNN and adds an embedding layer and a
146 |     sampling layer at the end.
147 | 
148 |     Parameters
149 |     ----------
150 |     base_net : Block
151 |         Base network.
152 |     emb_dim : int
153 |         Dimensionality of the embedding.
154 |     batch_k : int
155 |         Number of images per class in a batch. Used in sampling.
156 | 
157 |     Inputs:
158 |         - **data**: input tensor with shape (batch_size, channels, width, height).
159 |         Here we assume the consecutive batch_k images are of the same class.
160 |         For example, if batch_k = 5, the first 5 images belong to the same class,
161 |         6th-10th images belong to another class, etc.
162 | 
163 |     Outputs:
164 |         - The output of DistanceWeightedSampling.
165 |     """
166 |     def __init__(self, base_net, emb_dim, batch_k=5, **kwargs):
167 |         super(MarginNet, self).__init__(**kwargs)
168 |         with self.name_scope():
169 |             self.base_net = base_net
170 |             self.dense = nn.Dense(emb_dim)
171 |             self.normalize = L2Normalization(mode='instance')
172 |             self.sampled = DistanceWeightedSampling(batch_k=batch_k)
173 | 
174 |     def forward(self, x):
175 |         z = self.base_net(x)
176 |         z = self.dense(z)
177 |         z = self.normalize(z)
178 |         z = self.sampled(z)
179 |         return z
180 | 
181 |     def extract(self,x):
182 |         z = self.base_net(x)
183 |         z = self.dense(z)
184 |         z = self.normalize(z)
185 |         return z # just return feature vector
186 | 
187 | 
188 | class MarginLoss(gluon.loss.Loss):
189 |     r"""Margin based loss.
190 | 
191 |     Parameters
192 |     ----------
193 |     margin : float
194 |         Margin between positive and negative pairs.
195 |     nu : float
196 |         Regularization parameter for beta.
197 | 
198 |     Inputs:
199 |         - anchors: sampled anchor embeddings.
200 |         - positives: sampled positive embeddings.
201 |         - negatives: sampled negative embeddings.
202 |         - beta_in: class-specific betas.
203 |         - a_indices: indices of anchors. Used to get class-specific beta.
204 | 
205 |     Outputs:
206 |         - Loss.
207 |     """
208 |     def __init__(self, margin=0.2, nu=0.0, weight=None, batch_axis=0, **kwargs):
209 |         super(MarginLoss, self).__init__(weight, batch_axis, **kwargs)
210 |         self._margin = margin
211 |         self._nu = nu
212 | 
213 |     def hybrid_forward(self, F, anchors, positives, negatives, beta_in, a_indices=None):
214 |         if a_indices is not None:
215 |             # Jointly train class-specific beta.
216 |             beta = beta_in.data()[a_indices]
217 |             beta_reg_loss = F.sum(beta) * self._nu
218 |         else:
219 |             # Use a constant beta.
220 |             beta = beta_in
221 |             beta_reg_loss = 0.0
222 | 
223 |         d_ap = F.sqrt(F.sum(F.square(positives - anchors), axis=1) + 1e-8)
224 |         d_an = F.sqrt(F.sum(F.square(negatives - anchors), axis=1) + 1e-8)
225 | 
226 |         pos_loss = F.maximum(d_ap - beta + self._margin, 0.0)
227 |         neg_loss = F.maximum(beta - d_an + self._margin, 0.0)
228 | 
229 |         pair_cnt = F.sum((pos_loss > 0.0) + (neg_loss > 0.0))
230 |         if pair_cnt == 0.0:
231 |             # When poss_loss and neg_loss is zero then total loss is zero as well
232 |             loss = F.sum(pos_loss + neg_loss)
233 |         else:
234 |             # Normalize based on the number of pairs.
235 |             loss = (F.sum(pos_loss + neg_loss) + beta_reg_loss) / pair_cnt
236 |         return gluon.loss._apply_weighting(F, loss, self._weight, None)
237 | 


--------------------------------------------------------------------------------
/train_mx_margin.py:
--------------------------------------------------------------------------------
  1 | # copy from mxnet/examples/gluon/emebding_learning example code
  2 | 
  3 | from __future__ import division
  4 | 
  5 | import argparse
  6 | import logging
  7 | import time
  8 | 
  9 | import numpy as np
 10 | from bottleneck import argpartition
 11 | 
 12 | import mxnet as mx
 13 | from data import cub200_iterator
 14 | from mxnet import gluon
 15 | from mxnet.gluon.model_zoo import vision as models
 16 | from mxnet import autograd as ag, nd
 17 | from models.mx_margin_model import MarginNet, MarginLoss
 18 | 
 19 | logging.basicConfig(level=logging.INFO)
 20 | 
 21 | # CLI
 22 | parser = argparse.ArgumentParser(description='train a model for image classification.')
 23 | parser.add_argument('--data-path', type=str, default='data/CUB_200_2011',
 24 |                     help='path of data.')
 25 | parser.add_argument('--embed-dim', type=int, default=128,
 26 |                     help='dimensionality of image embedding. default is 128.')
 27 | parser.add_argument('--batch-size', type=int, default=70,
 28 |                     help='training batch size per device (CPU/GPU). default is 70.')
 29 | parser.add_argument('--batch-k', type=int, default=5,
 30 |                     help='number of images per class in a batch. default is 5.')
 31 | parser.add_argument('--gpus', type=str, default='',
 32 |                     help='list of gpus to use, e.g. 0 or 0,2,5. empty means using cpu.')
 33 | parser.add_argument('--epochs', type=int, default=20,
 34 |                     help='number of training epochs. default is 20.')
 35 | parser.add_argument('--optimizer', type=str, default='adam',
 36 |                     help='optimizer. default is adam.')
 37 | parser.add_argument('--lr', type=float, default=0.0001,
 38 |                     help='learning rate. default is 0.0001.')
 39 | parser.add_argument('--lr-beta', type=float, default=0.1,
 40 |                     help='learning rate for the beta in margin based loss. default is 0.1.')
 41 | parser.add_argument('--margin', type=float, default=0.2,
 42 |                     help='margin for the margin based loss. default is 0.2.')
 43 | parser.add_argument('--beta', type=float, default=1.2,
 44 |                     help='initial value for beta. default is 1.2.')
 45 | parser.add_argument('--nu', type=float, default=0.0,
 46 |                     help='regularization parameter for beta. default is 0.0.')
 47 | parser.add_argument('--factor', type=float, default=0.5,
 48 |                     help='learning rate schedule factor. default is 0.5.')
 49 | parser.add_argument('--steps', type=str, default='12,14,16,18',
 50 |                     help='epochs to update learning rate. default is 12,14,16,18.')
 51 | parser.add_argument('--wd', type=float, default=0.0001,
 52 |                     help='weight decay rate. default is 0.0001.')
 53 | parser.add_argument('--seed', type=int, default=123,
 54 |                     help='random seed to use. default=123.')
 55 | parser.add_argument('--model', type=str, default='resnet50_v2',
 56 |                     help='type of model to use. see vision_model for options.')
 57 | parser.add_argument('--save-model-prefix', type=str, default='margin_loss_model',
 58 |                     help='prefix of models to be saved.')
 59 | parser.add_argument('--use_pretrained', action='store_true',
 60 |                     help='enable using pretrained model from gluon.')
 61 | parser.add_argument('--kvstore', type=str, default='device',
 62 |                     help='kvstore to use for trainer.')
 63 | parser.add_argument('--log-interval', type=int, default=20,
 64 |                     help='number of batches to wait before logging.')
 65 | opt = parser.parse_args()
 66 | 
 67 | logging.info(opt)
 68 | 
 69 | # Settings.
 70 | mx.random.seed(opt.seed)
 71 | np.random.seed(opt.seed)
 72 | 
 73 | batch_size = opt.batch_size
 74 | 
 75 | gpus = [] if opt.gpus is None or opt.gpus is '' else [
 76 |     int(gpu) for gpu in opt.gpus.split(',')]
 77 | num_gpus = len(gpus)
 78 | 
 79 | batch_size *= max(1, num_gpus)
 80 | context = [mx.gpu(i) for i in gpus] if num_gpus > 0 else [mx.cpu()]
 81 | steps = [int(step) for step in opt.steps.split(',')]
 82 | 
 83 | # Construct model.
 84 | kwargs = {'ctx': context, 'pretrained': opt.use_pretrained}
 85 | net = models.get_model(opt.model, **kwargs)
 86 | 
 87 | if opt.use_pretrained:
 88 |     # Use a smaller learning rate for pre-trained convolutional layers.
 89 |     for v in net.collect_params().values():
 90 |         if 'conv' in v.name:
 91 |             setattr(v, 'lr_mult', 0.01)
 92 | 
 93 | net.hybridize()
 94 | net = MarginNet(net.features, opt.embed_dim, opt.batch_k)
 95 | beta = mx.gluon.Parameter('beta', shape=(100,))
 96 | 
 97 | # Get iterators.
 98 | train_data, val_data = cub200_iterator(opt.data_path, opt.batch_k, batch_size, (3, 224, 224))
 99 | 
100 | 
101 | def get_distance_matrix(x):
102 |     """Get distance matrix given a matrix. Used in testing."""
103 |     square = nd.sum(x ** 2.0, axis=1, keepdims=True)
104 |     distance_square = square + square.transpose() - (2.0 * nd.dot(x, x.transpose()))
105 |     return nd.sqrt(distance_square)
106 | 
107 | 
108 | def evaluate_emb(emb, labels):
109 |     """Evaluate embeddings based on Recall@k."""
110 |     d_mat = get_distance_matrix(emb)
111 |     d_mat = d_mat.asnumpy()
112 |     labels = labels.asnumpy()
113 | 
114 |     names = []
115 |     accs = []
116 |     for k in [1, 2, 4, 8, 16]:
117 |         names.append('Recall@%d' % k)
118 |         correct, cnt = 0.0, 0.0
119 |         for i in range(emb.shape[0]):
120 |             d_mat[i, i] = 1e10
121 |             nns = argpartition(d_mat[i], k)[:k]
122 |             if any(labels[i] == labels[nn] for nn in nns):
123 |                 correct += 1
124 |             cnt += 1
125 |         accs.append(correct/cnt)
126 |     return names, accs
127 | 
128 | 
129 | def test(ctx):
130 |     """Test a model."""
131 |     val_data.reset()
132 |     outputs = []
133 |     labels = []
134 |     for batch in val_data:
135 |         data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
136 |         label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
137 |         for x in data:
138 |             outputs.append(net(x)[-1])
139 |         labels += label
140 | 
141 |     outputs = nd.concatenate(outputs, axis=0)[:val_data.n_test]
142 |     labels = nd.concatenate(labels, axis=0)[:val_data.n_test]
143 |     return evaluate_emb(outputs, labels)
144 | 
145 | 
146 | def get_lr(lr, epoch, steps, factor):
147 |     """Get learning rate based on schedule."""
148 |     for s in steps:
149 |         if epoch >= s:
150 |             lr *= factor
151 |     return lr
152 | 
153 | 
154 | def train(epochs, ctx):
155 |     """Training function."""
156 |     if isinstance(ctx, mx.Context):
157 |         ctx = [ctx]
158 |     net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
159 | 
160 |     opt_options = {'learning_rate': opt.lr, 'wd': opt.wd}
161 |     if opt.optimizer == 'sgd':
162 |         opt_options['momentum'] = 0.9
163 |     if opt.optimizer == 'adam':
164 |         opt_options['epsilon'] = 1e-7
165 |     trainer = gluon.Trainer(net.collect_params(), opt.optimizer,
166 |                             opt_options,
167 |                             kvstore=opt.kvstore)
168 |     if opt.lr_beta > 0.0:
169 |         # Jointly train class-specific beta.
170 |         # See "sampling matters in deep embedding learning" paper for details.
171 |         beta.initialize(mx.init.Constant(opt.beta), ctx=ctx)
172 |         trainer_beta = gluon.Trainer([beta], 'sgd',
173 |                                      {'learning_rate': opt.lr_beta, 'momentum': 0.9},
174 |                                      kvstore=opt.kvstore)
175 | 
176 |     loss = MarginLoss(margin=opt.margin, nu=opt.nu)
177 | 
178 |     best_val = 0.0
179 |     for epoch in range(epochs):
180 |         tic = time.time()
181 |         prev_loss, cumulative_loss = 0.0, 0.0
182 | 
183 |         # Learning rate schedule.
184 |         trainer.set_learning_rate(get_lr(opt.lr, epoch, steps, opt.factor))
185 |         logging.info('Epoch %d learning rate=%f', epoch, trainer.learning_rate)
186 |         if opt.lr_beta > 0.0:
187 |             trainer_beta.set_learning_rate(get_lr(opt.lr_beta, epoch, steps, opt.factor))
188 |             logging.info('Epoch %d beta learning rate=%f', epoch, trainer_beta.learning_rate)
189 | 
190 |         # Inner training loop.
191 |         for i in range(200):
192 |             batch = train_data.next()
193 |             data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
194 |             label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
195 | 
196 |             Ls = []
197 |             with ag.record():
198 |                 for x, y in zip(data, label):
199 |                     a_indices, anchors, positives, negatives, _ = net(x)
200 | 
201 |                     if opt.lr_beta > 0.0:
202 |                         L = loss(anchors, positives, negatives, beta, y[a_indices])
203 |                     else:
204 |                         L = loss(anchors, positives, negatives, opt.beta, None)
205 | 
206 |                     # Store the loss and do backward after we have done forward
207 |                     # on all GPUs for better speed on multiple GPUs.
208 |                     Ls.append(L)
209 |                     cumulative_loss += nd.mean(L).asscalar()
210 | 
211 |                 for L in Ls:
212 |                     L.backward()
213 | 
214 |             # Update.
215 |             trainer.step(batch.data[0].shape[0])
216 |             if opt.lr_beta > 0.0:
217 |                 trainer_beta.step(batch.data[0].shape[0])
218 | 
219 |             if (i+1) % opt.log_interval == 0:
220 |                 logging.info('[Epoch %d, Iter %d] training loss=%f' % (
221 |                     epoch, i+1, cumulative_loss - prev_loss))
222 |                 prev_loss = cumulative_loss
223 | 
224 |         logging.info('[Epoch %d] training loss=%f'%(epoch, cumulative_loss))
225 |         logging.info('[Epoch %d] time cost: %f'%(epoch, time.time()-tic))
226 | 
227 |         names, val_accs = test(ctx)
228 |         for name, val_acc in zip(names, val_accs):
229 |             logging.info('[Epoch %d] validation: %s=%f'%(epoch, name, val_acc))
230 | 
231 |         if val_accs[0] > best_val:
232 |             best_val = val_accs[0]
233 |             logging.info('Saving %s.' % opt.save_model_prefix)
234 |             net.save_parameters('%s.params' % opt.save_model_prefix)
235 |     return best_val
236 | 
237 | 
238 | if __name__ == '__main__':
239 |     best_val_recall = train(opt.epochs, context)
240 |     print('Best validation Recall@1: %.2f.' % best_val_recall)
241 | 


--------------------------------------------------------------------------------
/data/mxdata/deep_fashion.py:
--------------------------------------------------------------------------------
  1 | print("program begin")
  2 | from mxnet.gluon.data import DataLoader,Dataset
  3 | from mxnet import nd
  4 | from mxnet.image import imread
  5 | import os
  6 | import numpy as np
  7 | import mxnet as mx
  8 | import mxnet.gluon.data.vision.transforms as T
  9 | from collections import Counter
 10 | 
 11 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
 12 | default_transform = T.Compose([
 13 |     T.Resize(256),
 14 |     T.RandomResizedCrop(size=224,scale=(1.0,1.0),ratio=(1.0,1.0)),# just crop,not scale
 15 |     T.RandomFlipLeftRight(),
 16 |     T.ToTensor(), # last to swap  channel to c,w,h
 17 |     normalize
 18 | ])
 19 | 
 20 | test_transform = T.Compose([
 21 |     T.Resize(256),
 22 |     T.CenterCrop(224),
 23 |     T.ToTensor(),
 24 |     normalize
 25 | ])
 26 | 
 27 | 
 28 | # like the
 29 | class DeepInClassFashion(Dataset):
 30 |     """
 31 |     the DeepInClassFashion dataset.read data from list_item_inshop.txt,
 32 | 
 33 |     """
 34 |     def __init__(self,dir_root,batch_k=4,batch_size=80,is_train = True,transform = default_transform):
 35 |         self.root = dir_root
 36 |         self.batch_k = batch_k
 37 |         self.batch_size = batch_size
 38 |         self._transform = transform
 39 |         self.is_train = is_train
 40 |         self.train_ids = []
 41 |         self.boxes = {} # a dictionary store {key:path,value:bbox}
 42 |         self.test_ids = set() # for super_type to subtract
 43 |         self.test_images2id=[]# a list to store[(path,id),(path,id)]
 44 |         with open(os.path.join(self.root,'Anno','list_item_inshop.txt'),'r') as f_instance:
 45 |             self.instance_count = int(f_instance.readline().strip())
 46 |             #self.instance_ids = list(f_instance.readlines())
 47 |             self.images_files = [ [] for _ in range(self.instance_count+1)]
 48 | 
 49 |         with open(os.path.join(self.root,'Anno','list_eval_partition.txt'),'r') as f_parti:
 50 |             f_parti.readline() # read pictures number
 51 |             f_parti.readline() # read information
 52 |             train_ids = []  # will use counter to duplicate checking
 53 |             for line in f_parti.readlines():
 54 |                 path,item_id,status = [ i for i in filter(lambda x:x is not '',line.strip().split(' '))]
 55 |                 int_id = int(item_id.split('_')[-1])
 56 |                 path = str(path)
 57 |                 if status == 'train':
 58 |                     self.images_files[int_id].append(path)
 59 |                     self.train_ids.append(int_id)
 60 |                 else:
 61 |                     self.test_images2id.append((path,int_id))
 62 |                     self.test_ids.add(int_id)
 63 |             # count train_ids and its distribution
 64 |         #post precessing for train_ids
 65 |         self.train_ids_list = list(set(self.train_ids))
 66 |         count = Counter(self.train_ids)
 67 |         self.train_ids_count = np.array([count[int_id] for int_id in self.train_ids_list])
 68 |         self.train_ids_dist = self.train_ids_count/sum(self.train_ids_count)
 69 | 
 70 |         with open(os.path.join(self.root,'Anno','list_bbox_inshop.txt'),'r') as f_bbox:
 71 |             f_bbox.readline() # read count
 72 |             f_bbox.readline() # read description
 73 |             for line in f_bbox.readlines():
 74 |                 list_info = line.strip().split(' ')
 75 |                 path,box = str(list_info[0]),list_info[-4:]
 76 |                 self.boxes[path]=[ i for i in map(lambda x:int(x),box)] # convert to int
 77 |         #read instance ,split set,bbox data
 78 | 
 79 |         # sub_list_test = self.images_files[list(self.test_ids)]
 80 |         # self.test_len = 0
 81 |         # for small_list in sub_list_test:
 82 |         #     self.test_len += len(small_list)
 83 |         self.build_structure()
 84 | 
 85 | 
 86 |     def write_test_files(self):
 87 |         """write the test files and label id"""
 88 |         import csv
 89 |         f = open('checkpoints/fashion_test.txt','w')
 90 |         writer = csv.writer(f,dialect='excel')
 91 |         print(len(self.test_images2id))
 92 |         print("begin to write")
 93 |         writer.writerows(self.test_images2id)
 94 |         f.close()
 95 | 
 96 | 
 97 | 
 98 |     def build_structure(self):
 99 |         """build the folder to id structure dataset,
100 |         construct the super class structure to select"""
101 |         print("the img_root:%s"%(self.root))
102 |         img_root = os.path.join(self.root,'img')
103 |         self.super_types = {} # super_type2 ids{'men_shorts':[1,23,4,5]}
104 |         for sexual in os.listdir(img_root):
105 |             for clothe_type in os.listdir(os.path.join(self.root,'img',sexual)):
106 |                 ids = os.listdir(os.path.join(self.root,'img',sexual,clothe_type))
107 |                 origin_ids = [int(instance_id.split('_')[-1]) for instance_id in ids]
108 |                 split_test = set(origin_ids) - self.test_ids
109 |                 self.super_types[sexual+'_'+clothe_type] = list(split_test) # after split to test
110 |         self.super_type_list = list(self.super_types.keys())
111 | 
112 |         self.super_type_count = np.array([len(self.super_types[k]) for k in self.super_types.keys()])
113 |         #containing classes count in a super type
114 | 
115 |         self.super_type_distri =self.super_type_count/sum(self.super_type_count) # the distribution ,assume every id instance has 4 or five  images
116 | 
117 |     def __len__(self):
118 |         if self.is_train:
119 |             return 1000
120 |         else:
121 |             return len(self.test_images2id) # to many picture to valid
122 | 
123 |     def sampled_batch_data(self):
124 |         """choose an super_types,
125 |         then choose the batch with batch_k iamges with bbox crop"""
126 |         #sample based on the distribution
127 |         batch =[]
128 |         labels =[]
129 |         num_groups = self.batch_size //self.batch_k
130 |         super_id = np.random.choice(self.super_type_list,size=1,replace=False,\
131 |                                     p=self.super_type_distri)[0]
132 |         try:
133 |             sampled_ids = np.random.choice(self.super_types[super_id],\
134 |                                            size=num_groups,replace=False)
135 |         except Exception as e:
136 |             sampled_ids = self.super_types[super_id] # type is list small than 25
137 | 
138 |         #the sampled_ids is like[1,2,5,45,23] in a super_type
139 |         for i in sampled_ids:
140 |             try:
141 |                 img_fname = np.random.choice(
142 |                     self.images_files[i],
143 |                     size=self.batch_k,
144 |                     replace=False
145 |                 )
146 |             except Exception as e:
147 |                 continue
148 |             batch += img_fname.tolist()
149 |             labels += [i]*self.batch_k
150 |         return batch,labels # format like img/man/short/id_xxxx01/01_shorts.jpg
151 | 
152 |     def __getitem__(self, index):
153 |         if self.is_train:
154 |             imagelist = []
155 |             batch,labels = self.sampled_batch_data()
156 |             for file in batch:
157 |                 file_path = os.path.join(self.root,file)
158 |                 image = imread(file_path,to_rgb=True,flag=1)
159 |                 if image.shape[2]==1:
160 |                     print("has gray file",file)
161 |                     image = nd.tile(image,(1,1,3))
162 |                 box = self.boxes.get(file,[0,0,256,256])
163 |                 image = image[box[1]:box[3],box[0]:box[2]] # crop image in width and height
164 |                 image = self._transform(image)
165 |                 imagelist.append(image)
166 |             return nd.stack(*imagelist,axis=0),nd.array(labels)
167 |         else:
168 |             path,class_id = self.test_images2id[index]
169 |             box = self.boxes.get(path, [0, 0, 256, 256]) # fetch path,id and box
170 |             file_path = os.path.join(self.root,path)
171 |             image = imread(file_path,to_rgb=True,flag=1)
172 |             if image.shape[2]==1:
173 |                 image = nd.tile(image,(1,1,3))
174 | 
175 |             image = image[box[1]:box[3], box[0]:box[2]]  # crop test image
176 |             image = self._transform(image)
177 |             return image,class_id
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 
184 | def getDeepInClassFashion(dir_root,batch_k,batch_size):
185 |     """three main paramter dir,batch_k,batch_size"""
186 |     train_data = DeepInClassFashion(dir_root=dir_root,batch_k=batch_k,batch_size=batch_size,is_train=True,\
187 |                               transform=default_transform)
188 |     test_data = DeepInClassFashion(dir_root=dir_root,batch_k=batch_k,batch_size=batch_size,is_train=False,\
189 |                               transform=test_transform)
190 |     train_loader = DataLoader(train_data,batch_size=1,shuffle=False,num_workers=6)
191 |     test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=False,num_workers=6)
192 |     return train_loader,test_loader
193 | 
194 | 
195 | class DeepCrossClassFashion(DeepInClassFashion):
196 |     def __init__(self,dir_root,batch_k=4,batch_size=80,is_train = True,transform = default_transform):
197 |         super(DeepCrossClassFashion,self).__init__(dir_root,batch_k,batch_size,is_train,transform)
198 |         self.datatype='CrossClass'
199 | 
200 |     def sampled_batch_data(self):
201 |         batch = []
202 |         labels = []
203 |         num_groups = self.batch_size//self.batch_k
204 |         sampled_ids = np.random.choice(self.train_ids_list,size=num_groups,replace=False,p=self.train_ids_dist)
205 |         for i in sampled_ids:
206 |             try:
207 |                 img_fnames = np.random.choice(self.images_files[i],\
208 |                                              size=self.batch_k,replace=False)
209 |             except Exception as e:
210 |                 continue
211 |             batch += img_fnames.tolist()
212 |             labels += [i]*self.batch_k
213 |         return batch, labels
214 | 
215 | def getDeepCrossClassFashion(dir_root,batch_k,batch_size):
216 |     train_data = DeepCrossClassFashion(dir_root,batch_k,batch_size=batch_size,\
217 |                                        is_train=True,transform=default_transform)
218 |     test_data  = DeepCrossClassFashion(dir_root,batch_k=batch_k,batch_size=batch_size,\
219 |                                        is_train=True,transform=test_transform)
220 |     train_loader = DataLoader(train_data,batch_size=1,shuffle=False,num_workers=6)
221 |     test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=False,num_workers=6)
222 |     return train_loader,test_loader
223 | 
224 | 
225 | if __name__ == '__main__':
226 |     train_data = DeepInClassFashion(dir_root='data/DeepInShop',batch_k=4,batch_size=80,is_train=True,\
227 |                                transform=default_transform)
228 |     # test_data = DeepCrossClassFashion(dir_root='data/DeepInShop',batch_k=4,batch_size=80,is_train=False,\
229 |     #                                   transform=test_transform)
230 |     #
231 |     # data = train_data[0]
232 |     # print('train data x shape',data[0].shape,'training data y shape ',data[1].shape)
233 |     # data = test_data[0]
234 |     # print('test data x shape',data[0].shape,'training data y shape',data[1])
235 |     train_data.write_test_files()
236 | 
237 | 
238 | 
239 | 


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
  1 | # Deep Embedding Learning for Image Retrieval
  2 | ---
  3 | # Deep Embedding Introduction  
  4 | DeepEmbedding 是使用深度学习的方法把多种媒体映射嵌入到相同向量空间，在统一空间中进行搜索的技术。  
  5 | 本项目通过视觉级别搜索，细粒度类别（实例检索）和图像-文本互搜的方式来测试通用多媒体检索。  
  6 | # 图像检索问题的一般解法  
  7 | DeepEmbedding旨在使用深度度量学习（DeepMetric)或者深度哈希（DeepHash）的方法学习关系保持的空间映射函数，能将视觉空间映射到低维嵌入空间,使用向量搜索引擎进行搜索.第一个问题为特征提取问题,即为本实验研究的问题,第二个问题为特征搜索问题.第二个问题解决方案可参见ANNS（近似邻近搜索）[NNSearchService](https://github.com/EigenLab/NNSearchService) 。  
  8 | ## Note
  9 | - 本项目实现了基于Multi-npair-loss的度量学习应用于检索和基于Sampling margin loss的方法进行检索
 10 | - 具体复现参见论文Triplet loss[FaceNet: ](http://arxiv.org/abs/1503.03832)  
 11 | [N-pair loss](http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf)  
 12 | [Margin lossSampling Matters in Deep Emebedding Learning](https://www.cs.utexas.edu/~cywu/projects/sampling_matters/)   
 13 | [BatchHard](https://arxiv.org/abs/1703.07737)   
 14 | 
 15 | # 实验结果 
 16 | (可点击下载百度网盘链接,查看图片)
 17 | - 在StanfordOnlineProduct训练，计算NMI聚类指标 nmi=0.866，对验证集的向量嵌入进行T-SNE降维后可以看出，降维图像约 43M，百度网盘链接在下：
 18 | - [margin_based loss :DeepFashion](https://pan.baidu.com/s/1zLZX24qBb_Op1vsry4LX6w)https://pan.baidu.com/s/1zLZX24qBb_Op1vsry4LX6w  
 19 | 计算指标：nmi=0.866  
 20 | - [Mc-n-pair loss:StanfordOnlineProduct](https://pan.baidu.com/s/12eNTVsRFu--SYMW8P8HPfQ)https://pan.baidu.com/s/12eNTVsRFu--SYMW8P8HPfQ  
 21 | 计算指标：nmi=0.830  
 22 | 
 23 | 
 24 | # 关于本项目的使用
 25 | 1.下载相应的数据集
 26 | 2.采用不同的loss类型对模型进行训练
 27 |   run train cub200 model
 28 |   ```angular2html
 29 |   nohup python train_mx_ebay_margin.py --gpus=1 --batch-k=5 --use_viz --epochs=30 --use_pretrained --steps=12,16,20,24 --name=CUB_200_2011 --save-model-prefix=cub200 > mycub200.out 2>&1 &
 30 |   ```
 31 |   run train stanford_online_product
 32 |   ```angular2html
 33 |   nohup python train_mx_ebay_margin.py --batch-k=2 --batch-size=80 --use_pretrained --use_viz --gpus=0 --name=Inclass_ebay --data=EbayInClass --save-model-prefix=ebayinclass > mytraininclass_ebay.log 2>&1
 34 | ```
 35 | 
 36 |   
 37 | 3.后续工作：
 38 |   - 测试R-MAC NetVLAD等网络在视觉检索中的效果和评测
 39 |   - 测试使用GAN的方法增强检索效果
 40 |   ```angularjs
 41 | 
 42 |     __Deep Adversarial Metric Learning__  
 43 |     Deep Metric learning cannot get full used the easy negative examples,to in [Deep Adversarial Metirc Learning](http://openaccess.thecvf.com/content_cvpr_2018/papers/Duan_Deep_Adversarial_Metric_CVPR_2018_paper.pdf) proposal a new framework called DAML  
 44 |     __DeepMetric and Deep Hashing Apply__  
 45 |      apply method to Fashion,vehicle and Person-ReID domain  
 46 |     __Construct a datasets__ crawl application-domain data  
 47 | ```
 48 | # Dataset
 49 | 
 50 | [CUB200_2011](http://www.vision.caltech.edu/visipedia/CUB-200.html): A small part of ImageNet  
 51 | [LFW](http://vis-www.cs.umass.edu/lfw/):face dataset  
 52 | [StanfordOnlineProducts](http://cvgl.stanford.edu/projects/lifted_struct/): a lot of types of products(furniture,bicycle,cups)  
 53 | [Street2Shop](http://www.tamaraberg.com/street2shop/):products data set from ebay  
 54 | [DeepFashion](http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion.html):all are colthes    
 55 | 
 56 | # 图像检索的应用
 57 | - Face indentification: deep metric learning for face cluster,from [FaceNet](http://arxiv.org/abs/1503.03832) to [SphereFace](http://ieeexplore.ieee.org/document/8100196/)
 58 | - Person ReIdentification:deep metric learning for Pedestrian Re-ID,from [MARS](https://pdfs.semanticscholar.org/c038/7e788a52f10bf35d4d50659cfa515d89fbec.pdf) to [NPSM&SVDNet](https://blog.csdn.net/u013982164/article/details/79608100)
 59 | - Vehicle Search:deep metric learning for fake-licensed car or Vehicle retrieval.
 60 | - Street2Products:search fashion clothe from street photos or in-shop photos,namely visual-search.from [DeepRanking](https://users.eecs.northwestern.edu/~jwa368/pdfs/deep_ranking.pdf) to [DAML](http://openaccess.thecvf.com/content_cvpr_2018/papers/Duan_Deep_Adversarial_Metric_CVPR_2018_paper.pdf)
 61 | ## Deep Metric Learning mile-stone paper:  
 62 | 1.[DrLIM:Dimensionality Reduction by Learning an Invariant Mapping](http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf)  
 63 | 2.[DeepRanking:Learning fine-graied Image Similarity with DeepRanking](https://users.eecs.northwestern.edu/~jwa368/pdfs/deep_ranking.pdf)  
 64 | 3.[DeepID2:Deep Learning Face Representation by Joint Identification-Verification](http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf)  
 65 | 4.[FaceNet:FaceNet: A Unified Embedding for Face Recognition and Clustering](http://arxiv.org/abs/1503.03832)  
 66 | 5.[Defense:In Defense of the Triplet Loss for Person Re-Identification](http://arxiv.org/abs/1703.07737)  
 67 | 6.[N-pair:Improved Deep Metric Learning with Multi-class N-pair Loss Objective](http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf)  
 68 | 7.[Sampling:Sampling Matters in Deep Embedding Learning](https://arxiv.org/abs/1706.07567)  
 69 | 8.[DAML:Deep Adversarial Metric Learning](http://openaccess.thecvf.com/content_cvpr_2018/papers/Duan_Deep_Adversarial_Metric_CVPR_2018_paper.pdf)  
 70 | 9.[SphereFace:Deep Hypersphere Embedding for Face Recognition](http://ieeexplore.ieee.org/document/8100196/)
 71 | 
 72 | 
 73 | # 部分DeepHash的工作  
 74 | DeepHash能将图片直接哈希到汉明码，使用faiss的IVF Binary 系列搜索加速，通过存储量大大减少。
 75 | 
 76 | ## ReImplementation of HashNet
 77 | ```angular2html
 78 | python train_hash.py --params
 79 | ```
 80 | 
 81 | 
 82 | ## Deep Hash Learning mile-stone paper:
 83 | 1.[CNNH:Supervised Hashing for Image Retrieval via Image Representation Learning](https://www.aaai.org/ocs/index.php/AAAI/AAAI14/paper/view/8137/8861)  
 84 | 2.[DNNH:Simultaneous feature learning and hash coding with deep neural networks](http://ieeexplore.ieee.org/document/7298947/)  
 85 | 3.[DLBHC:Deep Learning of Binary Hash Codes for Fast Image Retrieval](http://www.iis.sinica.edu.tw/~kevinlin311.tw/cvprw15.pdf)  
 86 | 4.[DSH:Deep Supervised Hashing for Fast Image Retrieval](https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Liu_Deep_Supervised_Hashing_CVPR_2016_paper.pdf)  
 87 | 5.[SUBIC:SuBiC: A Supervised, Structured Binary Code for Image Search](http://ieeexplore.ieee.org/document/8237358/)  
 88 | 6.[HashNet:HashNet:Deep Learning to Hash by Continuous](https://arxiv.org/abs/1702.00758)  
 89 | 7.[DCH:Deep Cauchy Hashing for Hamming Space Retrieval](http://openaccess.thecvf.com/content_cvpr_2018/html/Cao_Deep_Cauchy_Hashing_CVPR_2018_paper.html)  
 90 | 
 91 | 
 92 | ## 视觉和文本共同嵌入 Visual-semantic-align embedding(cross modal retrieval)
 93 | 1.[VSE++: Improving Visual-Semantic Embeddings with Hard Negatives](http://arxiv.org/abs/1707.05612)  
 94 | 2.[Dual-Path Convolutional Image-Text Embedding with Instance Loss](http://arxiv.org/abs/1711.05535)  
 95 | ```bash
 96 | python train_vse.py --params
 97 | ```
 98 | 
 99 | ## 其他类型的搜索方式
100 | 1.Sketch based [Deep Sketch Hashing: Fast Free-hand Sketch-Based Image Retrieval](https://github.com/ymcidence/DeepSketchHashing)  
101 | 2.Text cross modal based [Deep Cross-Modal Hashing](https://github.com/jiangqy/DCMH-CVPR2017)
102 | 
103 | ## 近似近邻搜索加速
104 | ANNS (Approximation Nearest Neighbor Search) to search a query vector in gallery database.
105 | 测试数据集
106 | - [SIFT1M](http://corpus-texmex.irisa.fr/) typical 128-dim sift vector
107 | - [DEEP1B](http://sites.skoltech.ru/compvision/noimi/)，proposed by yandex.inc,this is a deep descriptor
108 | - [GIST1M](http://corpus-texmex.irisa.fr/) typical 512-dim gist vector
109 | 
110 | ### papers
111 | -- PQ based  
112 | 1.将传统的标量量化，转成分段乘积量化[Product Quantization for Nearest Neighbor Search](http://ieeexplore.ieee.org/document/5432202/)  
113 | 2.类似于Cartisian Quantization，将向量整体进行旋转，使得聚类的分段坐标轴和向量对齐，聚类中心点和数据之间的重建误差小，压缩损失就小[Optimized Product Quantization](http://ieeexplore.ieee.org/document/6678503/) 
114 | 
115 | 3.[Revisiting the Inverted Indices for Billion-Scale Approximate Nearest Neighbors](http://arxiv.org/abs/1802.02422)提出使用anchor-point也即line-quantizition点来切分区域group,在搜索是pruning部分区域加速。和RobustiQ这篇论文撞衫。该论文中提出在一层中建立HNSW加速candidates center-point搜索选取。     
116 | 4.粗量化使用双倒排，可以降低聚类维度，增加聚类中心点，使用Multi-Sequence算法提高粗略命中速度[The Inverted MultiIndex](http://cache-ash04.cdn.yandex.net/download.yandex.ru/company/cvpr2012.pdf)      
117 | 5.多义码，将汉明码距离和量化中心点的距离建立映射关系，对entry-point有过滤作用 [Polysemous codes](https://arxiv.org/abs/1609.01882)  
118 | 6.Additive Quantization 两篇论文：Additive Quantization for Extreme Vector Compression.使用稀疏加量化，量化误差更小，但需要额外存储与计算。   
119 | 7.Composite Quantization.和上述Additive Quantizition 撞衫，额外提出NOCQ作为APQ替代，加速计算。     
120 | 8.RobustiQ:A Robust ANN Search Method for Billion-scale Similarity Search on GPUs. 和第3篇论文很像，在传统IVF上添加了Line Quantization，也叫分组剪枝点，提升PQ搜索指标。  
121 | 
122 | -- 索引分层：  
123 | 1.Zoom:Multi-View Vector Search for Optimizing Accuracy, Latency and Memory 使用全量数据k-means聚类，大量百万级中心点建立第一层导航图HNSW来代替以前的IVF，第二层使用量化加速计算排序，不限于PQ或者APQ，第三层全精度计算排序。这种典型的三层索引方式可以使得每层能采用组件替换的方式来优化索引。  
124 | 2.Pyramid:A General Framwork for Distributed Similarity Search.香港中文大学郑尚策实验室的，对比了naive-HNSW分图和metaHNSW-subHNSW两种分图方式对搜索效果影响。使用两层HNSW进行搜索。使用meta-HNSW的上层图解决了跨机器搜索的问题。该方法相对于简易切分naive-HNSW建图增加了meta-HNSW建图，全量数据点查询，分块构建三部分时间，建图比较慢。但搜索过程中meta-HNSW起到了哈希的方法，比naive-HNSW全集群请求再合并的吞吐量大不少。 
125 | 3.索引分层和图优化，量化优化结合在一起，可以进行索引改进设计。  
126 | 
127 | -- 子集索引  
128 | 1.Reconfigurable Inverted Index:旨在解决搜索过程中既有向量近似需求，也有标签过滤需求的搜索场景。文中提出了subset search 问题，并给出了两种解决方案。虽然已经相当巧妙，但未看出来如何解决十亿级别数据如何解决。  
129 | 
130 | 
131 | 
132 | -- Graph Based  
133 | 1.可参见NSW 浏览小世界 论文，结合跳表结构构造的索引[Approximate nearest neighbor algorithm based on navigable small world graphs](https://linkinghub.elsevier.com/retrieve/pii/S0306437913001300)   
134 | 2.[Efficient and robust approximate nearest neighbor search using hierarchical Navigable Small World graphs](http://arxiv.org/abs/1603.09320)   
135 | 7.[EFANNA : An Extremely Fast Approximate Nearest Neighbor Search Algorithm Based on kNN Graph](http://arxiv.org/abs/1609.07228)  
136 | 8.[A Revisit on Deep Hashings for Large-scale Content Based Image Retrieval](http://arxiv.org/abs/1711.06016)   
137 | 9.[RobustiQ A Robust ANN Search Method for Billion-scale Similarity Search on GPUs](http://users.monash.edu/~yli/assets/pdf/icmr19-sigconf.pdf)    
138 | 10.[GGNN: Graph-based GPU Nearest Neighbor Search](https://arxiv.org/pdf/1912.01059.pdf)   
139 | 11.[Zoom: Multi-View Vector Search for Optimizing Accuracy, Latency and Memory](https://www.microsoft.com/en-us/research/uploads/prod/2018/08/zoom-multi-view-tech-report.pdf)    
140 | 12. [Vector and Line Quantization for Billion-scale Similarity Search on GPUs](https://arxiv.org/pdf/1901.00275.pdf)    
141 | 13.[DiskANN:Fast Accurate Billion-point Nearest Neighbor Search on a Single Node](https://suhasjs.github.io/files/diskann_neurips19.pdf) 。提出vamana-Graph 解决了在单机建图解决十亿向量搜素问题。
142 | 
143 | -- Hamming Code  
144 | 1.[Fast Exact Search in Hamming Space with Multi-Index Hashing](http://arxiv.org/abs/1307.2982)  
145 | 2.[Fast Nearest Neighbor Search in the Hamming Space](http://link.springer.com/10.1007/978-3-319-27671-7_27)   
146 | 4.[Web-Scale Responsive Visual Search at Bing](http://arxiv.org/abs/1802.04914)  
147 | 5.[Recurrent Binary Embedding for GPU-Enabled Exhaustive Retrieval from Billion-Scale Semantic Vectors](http://arxiv.org/abs/1802.06466) 
148 | ### library
149 | 1.[faiss] 当前faiss包含IVF,IMI，PQ,OPQ,PCA,二级残差量化ReRank-PQ,HNSW,Link and Code 等各种类型的索引引擎
150 | 2.索引格式选择：高容量，低精度 IMI+OPQ+reRank  
151 |               高精度，选择HNSW,当前NSG索引不支持增量插入，没有采用
152 | 3.sptag,基于空间切分建图的方式   
153 | ### framework    
154 | 1.vearch :by JingDong AI.  和深度学习模型深度贴合     
155 | 2.milvus :by zilliz.  以数据库的思路来做的    
156 | 
157 | 
158 | 


--------------------------------------------------------------------------------
/train_mx_ebay_margin.py:
--------------------------------------------------------------------------------
  1 | # mxnet train ebay dataset,model copy from mxnet example deep learning emebding
  2 | 
  3 | import argparse
  4 | import logging
  5 | import time
  6 | import numpy as np
  7 | 
  8 | from bottleneck import argpartition
  9 | import mxnet as mx
 10 | from data import getCUB200,getEbayCrossClassData,getEbayInClassData
 11 | from data import getDeepInClassFashion,getDeepCrossClassFashion
 12 | import os
 13 | from mxnet import gluon
 14 | import mxnet.gluon.model_zoo.vision as vision
 15 | from mxnet import autograd as ag
 16 | from mxnet import nd
 17 | from models.mx_margin_model import MarginLoss,MarginNet
 18 | from utils import Visulizer
 19 | from configs import opt as opt_conf
 20 | import ipdb
 21 | from tqdm import tqdm
 22 | logging.basicConfig(level=logging.INFO)
 23 | 
 24 | parser = argparse.ArgumentParser(description='train a model for image classification.')
 25 | parser.add_argument('--data', type=str, default='CUB_200_2011',
 26 |                     help='path of data.')
 27 | parser.add_argument('--embed-dim', type=int, default=128,
 28 |                     help='dimensionality of image embedding. default is 128.')
 29 | parser.add_argument('--batch-size', type=int, default=70,
 30 |                     help='training batch size per device (CPU/GPU). default is 70.')
 31 | parser.add_argument('--batch-k', type=int, default=5,
 32 |                     help='number of images per class in a batch. default is 5.')
 33 | parser.add_argument('--gpus', type=str, default='',
 34 |                     help='list of gpus to use, e.g. 0 or 0,2,5. empty means using cpu.')
 35 | parser.add_argument('--epochs', type=int, default=20,
 36 |                     help='number of training epochs. default is 20.')
 37 | parser.add_argument('--optimizer', type=str, default='adam',
 38 |                     help='optimizer. default is adam.')
 39 | parser.add_argument('--lr', type=float, default=0.0001,
 40 |                     help='learning rate. default is 0.0001.')
 41 | parser.add_argument('--lr-beta', type=float, default=0.1,
 42 |                     help='learning rate for the beta in margin based loss. default is 0.1.')
 43 | parser.add_argument('--margin', type=float, default=0.2,
 44 |                     help='margin for the margin based loss. default is 0.2.')
 45 | parser.add_argument('--beta', type=float, default=1.2,
 46 |                     help='initial value for beta. default is 1.2.')
 47 | parser.add_argument('--nu', type=float, default=0.0,
 48 |                     help='regularization parameter for beta. default is 0.0.')
 49 | parser.add_argument('--factor', type=float, default=0.5,
 50 |                     help='learning rate schedule factor. default is 0.5.')
 51 | parser.add_argument('--steps', type=str, default='12,14,16,18',
 52 |                     help='epochs to update learning rate. default is 12,14,16,18.')
 53 | parser.add_argument('--wd', type=float, default=0.0001,
 54 |                     help='weight decay rate. default is 0.0001.')
 55 | parser.add_argument('--seed', type=int, default=123,
 56 |                     help='random seed to use. default=123.')
 57 | parser.add_argument('--model', type=str, default='resnet50_v2',
 58 |                     help='type of model to use. see vision_model for options.')
 59 | parser.add_argument('--save-model-prefix', type=str, default='margin_loss_model',
 60 |                     help='prefix of models to be saved.')
 61 | parser.add_argument('--use_pretrained', action='store_true',
 62 |                     help='enable using pretrained model from gluon.')
 63 | parser.add_argument('--kvstore', type=str, default='device',
 64 |                     help='kvstore to use for trainer.')
 65 | parser.add_argument('--log-interval', type=int, default=20,
 66 |                     help='number of batches to wait before logging.')
 67 | parser.add_argument('--debug',action='store_true',
 68 |                     help='enable debug to run through the model pipline')
 69 | parser.add_argument('--use_viz',action='store_true',
 70 |                     help='enable using visualization to vis the loss curve')
 71 | parser.add_argument('--name',type=str,default='cub200',
 72 |                     help='the train instance name')
 73 | parser.add_argument('--load_model_path',type=str,default='checkpoints/Fashion_In.params',
 74 |                     help='the trained model')
 75 | 
 76 | opt = parser.parse_args()
 77 | opt.save_model_prefix = opt.name # force save model prefix to name
 78 | logging.info(opt)
 79 | # Settings.
 80 | mx.random.seed(opt.seed)
 81 | np.random.seed(opt.seed)
 82 | 
 83 | batch_size = opt.batch_size
 84 | 
 85 | os.environ['CUDA_VISIBLE_DEVICES']='0,1,2,3'
 86 | gpus = [] if opt.gpus is None or opt.gpus is '' else [
 87 |     int(gpu) for gpu in opt.gpus.split(',')]
 88 | num_gpus = len(gpus)
 89 | 
 90 | batch_size *= max(1, num_gpus)
 91 | context = [mx.gpu(i) for i in gpus] if num_gpus > 0 else [mx.cpu()]
 92 | steps = [int(step) for step in opt.steps.split(',')]
 93 | 
 94 | # Construct model.
 95 | kwargs = {'ctx': context, 'pretrained': opt.use_pretrained}
 96 | net = vision.get_model(opt.model, **kwargs)
 97 | 
 98 | if opt.use_pretrained:
 99 |     # Use a smaller learning rate for pre-trained convolutional layers.
100 |     for v in net.collect_params().values():
101 |         if 'conv' in v.name:
102 |             setattr(v, 'lr_mult', 0.01)
103 | 
104 | net.hybridize()
105 | net = MarginNet(net.features, opt.embed_dim, opt.batch_k)
106 | beta = mx.gluon.Parameter('beta', shape=(100000,))
107 | data_dict={'CUB_200_2011':{'data_dir':'CUB_200_2011','func':getCUB200},
108 |            'EbayInClass':{'data_dir':'Stanford_Online_Products','func':getEbayInClassData},
109 |            'EbayCrossClass':{'data_dir':'Stanford_Online_Products','func':getEbayCrossClassData},
110 |            'DeepFashionInClass':{'data_dir':'DeepInShop','func':getDeepInClassFashion},
111 |            'DeepFashionCrossClass':{'data_dir':'DeepInShop','func':getDeepCrossClassFashion}}
112 | if opt.debug:
113 |     ipdb.set_trace()
114 | train_dataloader,val_dataloader = data_dict[opt.data]['func'](os.path.join('data/',data_dict[opt.data]['data_dir']),
115 |                                                               batch_k=opt.batch_k,batch_size=opt.batch_size)
116 | # if opt.data=='Ebay':
117 | #     train_dataloader,val_dataloader = getEbayData(os.path.join('data/',opt.data),batch_k=opt.batch_k,batch_size=batch_size )
118 | # elif opt.data=='CUB_200_2011':
119 | #     train_dataloader,val_dataloader = getCUB200(os.path.join('data/',opt.data),batch_k=opt.batch_k,batch_size=batch_size )
120 | #train_dataloader has datashape [1,batch_size,channel,W,H] for image data,[1,batch_size,1] for label
121 | #test_dataloader has datashape  [batch_size,channel,W,H] for image data,[batch_size,1] for label
122 | # use viz
123 | if opt.use_viz:
124 |     viz = Visulizer(host=opt_conf.vis_host,port=opt_conf.vis_port,env='mx_margin'+opt.name)
125 |     viz.log(str(opt))
126 |     viz.log("start to train mxnet marging model name:%s"%(opt.name))
127 | 
128 | def get_distance_matrix(x):
129 |     """Get distance matrix given a matrix. Used in testing."""
130 |     if opt.use_viz:
131 |         viz.log("begin to compute distance matrix")
132 |     square = nd.sum(x ** 2.0, axis=1, keepdims=True)
133 |     distance_square = square + square.transpose() - (2.0 * nd.dot(x, x.transpose()))
134 |     return nd.sqrt(distance_square)
135 | 
136 | def evaluate_emb(emb, labels):
137 |     """Evaluate embeddings based on Recall@k."""
138 |     d_mat = get_distance_matrix(emb)
139 |     #d_mat = d_mat.asnumpy()
140 |     #labels = labels.asnumpy() #directory operate on mxnet.ndarray if convert to numpy,would cause memeory error
141 | 
142 |     names = []
143 |     accs = []
144 |     for i in range(emb.shape[0]):
145 |         d_mat[i,i]=1e10
146 |     index_mat = nd.argsort(d_mat)
147 |     nd.waitall()
148 |     if opt.use_viz:
149 |         viz.log("nd all dist mat")
150 |     for k in [1, 2, 4, 8, 16]:
151 |         names.append('Recall@%d' % k)
152 |         correct, cnt = 0.0, 0.0
153 |         index_mat_part = index_mat[:,:k]
154 |         for i in range(emb.shape[0]):
155 |             if any(labels[i] == labels[nn] for nn in index_mat_part[i]):
156 |                 correct +=1
157 |             cnt +=1
158 |         # for i in range(emb.shape[0]):
159 |         #     d_mat[i, i] = 1e10
160 |         #     nns = argpartition(d_mat[i], k)[:k]
161 |         #     if any(labels[i] == labels[nn] for nn in nns):
162 |         #         correct += 1
163 |         #     cnt += 1
164 |         accs.append(correct/cnt)
165 |     return names, accs
166 | 
167 | 
168 | def test(ctx):
169 |     """Test a model."""
170 |     if opt.use_viz:
171 |         viz.log("begin to valid")
172 | 
173 |     outputs = []
174 |     labels = []
175 |     for i,batch in enumerate(val_dataloader):
176 |         data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
177 |         label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
178 |         # after split data is list of two data batch
179 |         for x in data:
180 |             outputs.append(net(x)[-1])
181 |         labels +=label
182 |         if (i+1)%(opt.log_interval*2) ==0:
183 |             viz.log("valid iter {0}".format(i))
184 |     outputs = nd.concatenate(outputs, axis=0)
185 |     labels = nd.concatenate(labels, axis=0)
186 |     viz.log("begin to eval embedding search")
187 |     return evaluate_emb(outputs, labels)
188 | 
189 | def get_lr(lr, epoch, steps, factor):
190 |     """Get learning rate based on schedule."""
191 |     for s in steps:
192 |         if epoch >= s:
193 |             lr *= factor
194 |     return lr
195 | 
196 | def train(epochs,ctx):
197 |     """Training function."""
198 |     if isinstance(ctx, mx.Context):
199 |         ctx = [ctx]
200 |     net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
201 | 
202 |     opt_options = {'learning_rate': opt.lr, 'wd': opt.wd}
203 |     if opt.optimizer == 'sgd':
204 |         opt_options['momentum'] = 0.9
205 |     if opt.optimizer == 'adam':
206 |         opt_options['epsilon'] = 1e-7
207 |     trainer = gluon.Trainer(net.collect_params(), opt.optimizer,
208 |                             opt_options,
209 |                             kvstore=opt.kvstore)
210 |     if opt.lr_beta > 0.0:
211 |         # Jointly train class-specific beta.
212 |         # See "sampling matters in deep embedding learning" paper for details.
213 |         beta.initialize(mx.init.Constant(opt.beta), ctx=ctx)
214 |         trainer_beta = gluon.Trainer([beta], 'sgd',
215 |                                      {'learning_rate': opt.lr_beta, 'momentum': 0.9},
216 |                                      kvstore=opt.kvstore)
217 | 
218 |     loss = MarginLoss(margin=opt.margin, nu=opt.nu)
219 | 
220 | 
221 |     best_val =0.0
222 |     for epoch in range(epochs):
223 |         tic = time.time()
224 |         prev_loss,cumulative_loss = 0.0,0.0
225 | 
226 |         # Learning rage schedule
227 |         trainer.set_learning_rate(get_lr(opt.lr,epoch,steps,opt.factor))
228 |         if opt.use_viz:
229 |             viz.log("Epoch {0} learning rate = {1}".format(epoch,trainer.learning_rate))
230 |         if opt.lr_beta>0:
231 |             trainer_beta.set_learning_rate(get_lr(opt.lr_beta,epoch,steps,opt.factor))
232 |             viz.log("Epoch {0} beta learning rate={1}".format(epoch,trainer_beta.learning_rate))
233 | 
234 |         #Inner training loop
235 |         for i,batch_data in enumerate(train_dataloader):
236 |             batch = batch_data[0][0] # batch_data is a tuple(x,y) x shape is [1,70,3,227,227]
237 |             label = batch_data[1][0]
238 |             data = gluon.utils.split_and_load(batch,ctx_list=ctx,batch_axis=0)
239 |             label = gluon.utils.split_and_load(label,ctx_list=ctx,batch_axis=0)
240 | 
241 |             # After split,the data and label datatype is list
242 |             Ls = []
243 |             with ag.record():
244 |                 for x, y in zip(data, label):
245 |                     a_indices, anchors, positives, negatives, _ = net(x)
246 | 
247 |                     if opt.lr_beta > 0.0:
248 |                         L = loss(anchors, positives, negatives, beta, y[a_indices])
249 |                     else:
250 |                         L = loss(anchors, positives, negatives, opt.beta, None)
251 | 
252 |                     # Store the loss and do backward after we have done forward
253 |                     # on all GPUs for better speed on multiple GPUs.
254 |                     Ls.append(L)
255 |                     cumulative_loss += nd.mean(L).asscalar()
256 | 
257 |                 for L in Ls:
258 |                     L.backward()
259 | 
260 |             # Update.
261 |             trainer.step(batch.shape[0])
262 |             if opt.lr_beta > 0.0:
263 |                 trainer_beta.step(batch.shape[0])
264 | 
265 |             if (i + 1) % opt.log_interval == 0:
266 |                 viz.log('[Epoch {0}, Iter {1}] training loss={2}'.format(
267 |                     epoch, i + 1, cumulative_loss - prev_loss))
268 |                 if opt.use_viz:
269 |                     viz.plot('margin_loss',cumulative_loss-prev_loss)
270 |                 prev_loss = cumulative_loss
271 |             if opt.debug:
272 |                 import ipdb
273 |                 ipdb.set_trace()
274 |                 break
275 | 
276 |         viz.log('[Epoch {0}] training loss={1}'.format(epoch, cumulative_loss))
277 |         viz.log('[Epoch {0}] time cost: {1}'.format(epoch, time.time() - tic))
278 | 
279 |         names, val_accs = test(ctx)
280 |         for name, val_acc in zip(names, val_accs):
281 |             viz.log('[Epoch {0}] validation: {1}={2}'.format(epoch, name, val_acc))
282 |         viz.plot('recall@1',val_accs[0])
283 | 
284 |         if val_accs[0] > best_val:
285 |             best_val = val_accs[0]
286 |             viz.log('Saving {0}'.format(opt.save_model_prefix))
287 |             net.save_parameters('checkpoints/%s.params' % opt.save_model_prefix)
288 |     return best_val
289 | 
290 | 
291 | def extract_feature():
292 |     """
293 |     extract data feature vector and save
294 |     :param model:
295 |     :param dataloader:
296 |     :return:
297 |     """
298 |     global net
299 |     deepfashion_csv = 'checkpoints/deepfashion.csv' # write vector to this file
300 |     net.initialize()
301 |     net.collect_params().reset_ctx(context)
302 |     net.load_parameters(opt.load_model_path,ctx=context)
303 |     import csv
304 |     f = open(deepfashion_csv,'w')
305 |     writer = csv.writer(f,dialect='excel')
306 | 
307 |     for i,batch in tqdm(enumerate(val_dataloader)):
308 |         batch_size = batch[0].shape[0]
309 |         data = gluon.utils.split_and_load(batch[0], ctx_list=context, batch_axis=0)
310 |         label = gluon.utils.split_and_load(batch[1], ctx_list=context, batch_axis=0)
311 |         # after split data is list of two data batch
312 |         small_batch_feature = []
313 |         for x in data:
314 |             feature = net.extract(x)
315 |             small_batch_feature.append(feature)
316 |         image_id = np.arange(i*batch_size,(i+1)*batch_size).reshape(-1,1) # prepare the image_id
317 |         vector = nd.concatenate(small_batch_feature,axis=0).asnumpy() # concatenate the feature
318 |         label = np.array([x.asnumpy() for x in label]).reshape(-1,1)
319 |         result = np.hstack((image_id,label,vector))
320 |         writer.writerows(result)
321 |     print("finished extract feature")
322 |     f.close()
323 |     return "True finished"
324 | 
325 | 
326 | 
327 | 
328 | 
329 | if __name__ == '__main__':
330 |     import ipdb
331 |     ipdb.set_trace()
332 |     best_val_recall = train(opt.epochs,context)
333 |     print("Best validation Recall@1:%.2f"%(best_val_recall))
334 | 
335 |     # result = extract_feature()
336 |     # print(result)


--------------------------------------------------------------------------------