├── main.py
├── __init__.py
├── models
├── visnet.py
├── __init__.py
├── loss.py
├── vgg_classify.py
├── dml_model.py
├── hashnet.py
├── sample_dml.py
└── mx_margin_model.py
├── checkpoints
└── deep_test.txt
├── data
├── classify
│ ├── __init__.py
│ └── ClassifyData.py
├── hashdata
│ ├── __init__.py
│ └── hash_tri_files.py
├── n_pair_mc
│ ├── __init__.py
│ ├── deep_in_fashion.py
│ └── npair_dataset.py
├── margin_cub200
│ ├── __init__.py
│ └── cub200_margin.py
├── mxdata
│ ├── basic_module
│ │ ├── __init__.py
│ │ └── basic_transform.py
│ ├── __init__.py
│ ├── composedataset.py
│ ├── mxcub_simple.py
│ ├── mxcub200.py
│ ├── online_products.py
│ └── deep_fashion.py
└── __init__.py
├── utils
├── train_mx_margin.py
├── __init__.py
├── mxnet_server_client.py
├── extract_Ebaytxt_fromDeepFashion.py
├── sku_viewer.py
├── log_config.py
├── parse_deepinshopdata.py
├── visulization.py
└── vis_tsne_images.py
├── server
├── __init__.py
├── ab_test.py
└── copy_nn.py
├── requiremetns.txt
├── extract_feature.sh
├── train_cub.sh
├── train_fashion_inclass.sh
├── train_In_classEbay.sh
├── train_cross_classEbay.sh
├── train_fashion_inclass2.sh
├── manage_visdom.py
├── .gitignore
├── configs.py
├── docs
└── deep-metric-learning.MD
├── train_classify.py
├── train_hash.py
├── train_mc_npair.py
├── train_margin_cub.py
├── train_mx_margin.py
├── README.MD
└── train_mx_ebay_margin.py
/main.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/models/visnet.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/checkpoints/deep_test.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/data/classify/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/data/hashdata/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/data/n_pair_mc/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/utils/train_mx_margin.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/data/margin_cub200/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/data/mxdata/basic_module/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/server/__init__.py:
--------------------------------------------------------------------------------
1 | from models import MarginNet
--------------------------------------------------------------------------------
/requiremetns.txt:
--------------------------------------------------------------------------------
1 | visdom
2 | fire
3 | numpy
4 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .visulization import Visulizer
2 | from .log_config import logger
--------------------------------------------------------------------------------
/data/mxdata/__init__.py:
--------------------------------------------------------------------------------
1 | from data.mxdata.basic_module.basic_transform import default_transform,test_transform
--------------------------------------------------------------------------------
/utils/mxnet_server_client.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import numpy as np
3 |
4 | host = 'http://hpc5.yud.io:8080/ping'
5 | ret = requests.post(host)
6 | print(ret)
--------------------------------------------------------------------------------
/extract_feature.sh:
--------------------------------------------------------------------------------
1 | python train_mx_ebay_margin.py --gpus=1 --batch-size=100 --use_viz --name=ExtractDeepFashion --data=DeepFashionInClass --load_model_path=checkpoints/Fashion_In.params
--------------------------------------------------------------------------------
/train_cub.sh:
--------------------------------------------------------------------------------
1 | nohup python train_mx_ebay_margin.py --gpus=0 --batch-k=5 --batch-size=70 --use_pretrained --use_viz --epochs=30 --name=CUB_200_2011 --data=CUB_200_2011 >mytraincub200.log 2>&1 &
2 |
--------------------------------------------------------------------------------
/train_fashion_inclass.sh:
--------------------------------------------------------------------------------
1 | nohup python train_mx_ebay_margin.py --gpus=2 --batch-k=4 --batch-size=100 --use_pretrained --use_viz --name=Fashion_In --data=DeepFashionInClass >mytrainDeep_Inclass.log 2>&1 &
--------------------------------------------------------------------------------
/train_In_classEbay.sh:
--------------------------------------------------------------------------------
1 | nohup python train_mx_ebay_margin.py --gpus=3 --batch-k=2 --batch-size=80 --use_pretrained --use_viz --epochs=55 --name=Ebay_Inclass --data=EbayInClass >mytrainEbay_Inclass.log 2>&1 &
2 |
--------------------------------------------------------------------------------
/train_cross_classEbay.sh:
--------------------------------------------------------------------------------
1 | nohup python train_mx_ebay_margin.py --gpus=0 --batch-k=2 --batch-size=80 --epochs=55 --use_pretrained --use_viz --name=Ebay_Crossclass --data=EbayCrossClass >mytrainEbay_Crossclass.log 2>&1 &
2 |
--------------------------------------------------------------------------------
/train_fashion_inclass2.sh:
--------------------------------------------------------------------------------
1 | nohup python train_mx_ebay_margin.py --gpus=2 --epochs=40 --steps=14,16,20,30 --batch-k=4 --batch-size=100 --use_pretrained --use_viz --name=Fashion_In2 --data=DeepFashionInClass >mytrainDeep_Inclass2.log 2>&1 &
--------------------------------------------------------------------------------
/manage_visdom.py:
--------------------------------------------------------------------------------
1 | from utils import Visulizer
2 | from configs import opt
3 | from visdom import Visdom
4 | from utils import Visulizer
5 |
6 | viz = Visulizer(opt.vis_host,opt.vis_port,env='main')
7 | print(viz)
8 | viz.delete_env('dmldml3')
9 | print("finished")
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .vgg_classify import BaseModule
2 | from .vgg_classify import VggClassify
3 | from .dml_model import ModGoogLeNet
4 | from .loss import NpairLoss
5 | from .vgg_classify import BaseModule
6 | from .sample_dml import Margin_Loss,SampleModel
7 | from .mx_margin_model import MarginNet
8 | from .hashnet import HashNetRes50,HashLoss
--------------------------------------------------------------------------------
/utils/extract_Ebaytxt_fromDeepFashion.py:
--------------------------------------------------------------------------------
1 | # We want to extract Ebay like txt file for DeepFashion Inshop dataset
2 | # From ImageFolder files to construct a pandas table to store in simple file
3 |
4 | # Then we can Instance Dataset from Stanford Online Products from DeepFashion
5 |
6 | import numpy as np
7 | import pandas as pd
8 | import csv
9 | import os
10 | import shutil
11 | import sys
12 |
13 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.out
2 | *.log
3 | checkpoints/*.pth
4 | checkpoints/*.csv
5 | checkpoints/*00.jpg
6 | data/Stanford_Online_Products
7 | data/DeepInShop
8 | data/cub200_2011
9 | data/Stanford_Online_Products
10 | data/CUB_200_2011
11 | data/hashdata/coco
12 | data/hashdata/nus_wide
13 | *.png
14 | *.npy
15 | *.csv
16 | .ipynb_checkpoints/
17 | __pycache__/
18 | *.pyc
19 | *.params
20 | *.jpg
21 | *.jpeg
--------------------------------------------------------------------------------
/server/ab_test.py:
--------------------------------------------------------------------------------
1 | # this is a A/B test from image_nn_prod and image_metric_taobao128
2 |
3 | old_index = 'image_nn_prod'
4 | new_index = 'image_metric_taobao128'
5 |
6 | from .copy_nn import get_net()
7 | from .copy_nn import get_target_colection
8 | from .copy_nn import get_nn_config
9 | from .copy_nn import get_db
10 |
11 | if __name__=='__main__':
12 | dev = get_db()
13 | host,path = get_nn_config()
14 | net = get_net(0)
15 | nn_128 = get_target_colection(db)
16 |
17 |
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | import torchvision.transforms as T
2 |
3 | from .classify.ClassifyData import my_collate_fn,Street2shop
4 | from .n_pair_mc.npair_dataset import EbayDataset
5 | from .margin_cub200.cub200_margin import CUB200DataSet
6 | from .mxdata.mxcub200 import cub200_iterator
7 |
8 | from .mxdata.online_products import getEbayCrossClassData,getEbayInClassData
9 | from .mxdata.mxcub_simple import getCUB200
10 | from .mxdata.deep_fashion import getDeepCrossClassFashion,getDeepInClassFashion
11 | from .hashdata.hash_tri_files import get_hash_dataloader
--------------------------------------------------------------------------------
/utils/sku_viewer.py:
--------------------------------------------------------------------------------
1 | import pymongo
2 | from pymongo import MongoClient
3 | from pprint import pprint
4 | from pymongo import MongoClient
5 | mongdb={}
6 | mongdb['host']=''
7 | mongdb['port']=3717
8 | client=MongoClient(host=mongdb['host'],port=mongdb['port'])
9 | dev=client.get_database('dev')
10 | dev.authenticate(name='',password='')
11 | print(dev.collection_names())
12 |
13 | tao_bao_collection = dev.get_collection('image_faiss_dual_taobao')
14 | print(tao_bao_collection.count())
15 |
16 | item = tao_bao_collection.find_one()
17 | pprint(item['_source'])
18 |
--------------------------------------------------------------------------------
/utils/log_config.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os.path
3 | import time
4 |
5 | logger = logging.getLogger()
6 | logger.setLevel(logging.INFO)
7 | rq = time.strftime('%Y%m%H%M',time.localtime(time.time()))
8 | log_path = os.path.join(os.getcwd() ,'Logs')
9 | log_name = os.path.join(log_path,rq+'.log')
10 | logfile = log_name
11 | file_handler = logging.FileHandler(logfile,'w')
12 | file_handler.setLevel(logging.DEBUG)
13 |
14 | formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
15 | file_handler.setFormatter(formatter)
16 |
17 | # add the logger to handler
18 |
19 | logger.addHandler(file_handler)
20 |
--------------------------------------------------------------------------------
/data/mxdata/basic_module/basic_transform.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | from mxnet.gluon import nn
3 | import mxnet.gluon.data.vision.transforms as T
4 |
5 |
6 | class RandomCrop(nn.Block):
7 | def __init__(self,size):
8 | self.size = size
9 | def forward(self,x):
10 | return mx.image.random_crop(x,(size,size))
11 |
12 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
13 | default_transform = T.Compose([
14 | T.Resize(256),
15 | RandomCrop(224),
16 | T.RandomFlipLeftRight(),
17 | T.ToTensor(), # last to swap channel to c,w,h
18 | normalize
19 | ])
20 |
21 | test_transform = T.Compose([
22 | T.Resize(256),
23 | T.CenterCrop(224),
24 | T.ToTensor(),
25 | normalize
26 | ])
--------------------------------------------------------------------------------
/data/n_pair_mc/deep_in_fashion.py:
--------------------------------------------------------------------------------
1 | # this is an implementation of pytorch deep_fashion_in dataset,
2 | # aim to train an multi-class-n-pair model as base line
3 |
4 | import torchvision.transforms as T
5 | #from configs import opt
6 | from torch.utils.data import Dataset
7 | import os
8 | import csv
9 | import fnmatch
10 | from PIL import Image
11 | import numpy as np
12 | import pandas as pd
13 | import torch
14 |
15 | #normalize = T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
16 | normalize = T.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5, 0.5, 0.5])
17 | default_transform = T.Compose([
18 | T.Resize(256),
19 | T.RandomCrop(227),
20 | T.RandomHorizontalFlip(),
21 | T.ToTensor(),
22 | normalize,
23 | ])
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/models/loss.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from torch.nn.functional import cross_entropy
3 | import torch
4 |
5 | class NpairLoss(nn.Module):
6 | """the multi-class n-pair loss"""
7 | def __init__(self,l2_reg=3e-3):
8 | super(NpairLoss,self).__init__()
9 | self.l2_reg = l2_reg
10 |
11 | def forward(self,feature,target):
12 | """
13 | compute the feature pair loss,the first half is anchor
14 | the last half is pair feature
15 | :param feature:
16 | :return:
17 | """
18 |
19 | batch_size = feature.size(0)
20 | fa = feature[:int(batch_size/2)]
21 | fp = feature[int(batch_size/2):]
22 | logit = torch.matmul(fa,torch.transpose(fp,0,1))
23 | loss_sce = cross_entropy(logit,target)
24 | l2_loss = sum(torch.norm(feature,p=2,dim=1))/batch_size
25 | loss = loss_sce + self.l2_reg*l2_loss
26 | return loss
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/utils/parse_deepinshopdata.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | from mxnet.image import imread
4 | import os
5 | bboxfile = r'../Logs/Anno/list_bbox_inshop.txt'
6 | line = None
7 | skip=40006
8 | with open(bboxfile,'r') as f_box:
9 | f_box.readline() #
10 | f_box.readline() #
11 | for i in range(skip):
12 | f_box.readline()
13 | line = f_box.readline()
14 |
15 | img_dir = r'C:\download\In-shop-clothes'
16 | line_list = line.strip().split(' ')
17 | path,bbox = line_list[0],line_list[-4:]
18 | print('path:',path,"bbox",bbox)
19 | fig = plt.figure()
20 | plt.subplot(2,1,1)
21 | image = imread(os.path.join(img_dir,path))
22 | bbox=[int(x) for x in bbox]
23 | plt.imshow(image.asnumpy())
24 | plt.subplot(2,1,2)
25 | plt.imshow(image[bbox[1]:bbox[3],bbox[0]:bbox[2]].asnumpy())
26 | plt.show()
27 |
28 | img_path= r'C:\Users\Dengjun\Pictures\a.jpg'
29 | img = imread(img_path)
30 | print(img.shape)
31 |
32 |
--------------------------------------------------------------------------------
/configs.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | class DefaultConfig(object):
3 | num_classes = 13 # street2shop classes
4 | train_classify_dir = "/data/jh/notebooks/hudengjun/VisNet/classify"
5 | valid_classify_dir = "/data/jh/notebooks/hudengjun/VisNet/"
6 | persist = 'data/persist.csv'
7 | lr=0.001
8 | lr_step = 50
9 | use_gpu = True
10 | gpu_id = 2
11 | load_model_path=None
12 | num_workers = 4
13 | momentum=0.89
14 | max_epoch = 800
15 | print_freq = 40
16 | batch_size = 32
17 | vis_host="http://hpc3.yud.io"
18 | vis_port=8088
19 | vis_env='Street2shop'
20 | debug = False
21 |
22 |
23 | #tripplet dataset config
24 | ebay_dir = '/data/jh/notebooks/hudengjun/DML/deep_metric_learning/lib/online_products/Stanford_Online_Products/'
25 | n_pair_train = 'Ebay_train.txt'
26 | n_pair_test = 'Ebay_test.txt'
27 | embeding_size=512
28 | dml_model_path=None
29 | l2_reg=3e-3
30 | use_viz=True
31 | freeze_level=0
32 |
33 |
34 | def parse(self,kwargs):
35 | """update dict with kwargs params"""
36 | for k,v in kwargs.items():
37 | if not hasattr(self,k):
38 | warnings.warn("does not has attribute",k)
39 | setattr(self,k,v)
40 | print("use config:")
41 | for k,v in self.__class__.__dict__.items():
42 | if not k.startswith('__'):
43 | print(k,':',getattr(self,k))
44 | DefaultConfig.parse = parse
45 | opt = DefaultConfig()
46 |
--------------------------------------------------------------------------------
/docs/deep-metric-learning.MD:
--------------------------------------------------------------------------------
1 | - 从度量学习,监督或非监督降维理论及流型学习理论以来,诸多学习算法力求学习一种数据从原始空间和低维嵌入空间的映射,在嵌入空间中能保持
2 | 原生空间数据的相似距离关系,这种空间转换函数的学习可称之为度量学习;相对于传统的分类问题而言,其种类数目比较固定,如ImageNet根据WordNet分为1000类别,
3 | 然而很多现实中的问题不是**closeset**(闭集)问题,是openset(开放集合)问题,即在泛化测试时有很多样本和种类类别是从未遇见的,典型的特征是种类多,单个种类样本少,最为典型的是人脸识别(亿级别的类别,单个人最多就抓拍十几张头像)。
4 | - 将分类和度量对比,分类学习的是一种概念,粗糙的概观,类似孤立看待问题的观点,度量学习的是精细的尺度,强调的是对隐含属性概念的整体布局,一种尺度。
5 | 也可以认为,分类问题对事物的编码是一种**one-hot**编码格式,而度量是对事物的一种**denseVector**编码格式。
6 | 概念在未见过的种类无法进行描述,而尺度在未见过的种类依然可以按照尺度测量规范进行测量。(举例,前者学习的是每一个的人名,后者学习的是对人的测量方法,遇到向别人描述一个他不认识的人时,通过属性描述的方式,更容易让别人形成画像)
7 |
8 | - 现实世界中,很多时候并不需要对所有见过的事物说名道姓,我们不关心他是谁,关心的是他和其他事物的距离关系。这也就是从DeepID2的分类loss转换到FaceNet时triplet loss之后人脸识别效果提升的原因,当然这也是人脸识别,行人重识别,车辆检索(车辆套牌分析),商品检索应用落地的原因。
9 | - 通过度量学习,我们可以把现实世界中的搜索问题(视觉搜索,VisualSearch or Visual Recommendation)定义为简单的两步,将原生数据空间通过学习得来的映射函数映射到嵌入空间,在嵌入空间通过近似临近搜索算法搜索最近点,通过最近点id反查原始空间的数据。
10 | 其核心技术在于 __关系高保持的空间映射函数__ 和 __高速高容量的向量搜索引擎__。前者可以保证从原生数据空间映射到嵌入空间之后,数据的相似关系保持的精细,数据在嵌入空间的距离与原生数据的观测属性成正相关。
11 | 后者保证能在广泛的搜索空间中高效的查找嵌入空间中距离相近的点(可以在亿级的库中以毫秒级时间搜索出近似最近点)。
12 |
13 | -传统的LDA,MDS,IsoMap等方法是一种浅层次的映射函数或关系保持的数据转换,而且MDS实际上无法对未见的数据点进行低维度嵌入,只能使用拟合函数进行转换。而SNE算法是一种已指数据样本高维空间距离之后,根据数据距离分布近似的目标,将高位数据点降维,并不适合视觉图片。
14 | 深度卷积神经网络通过层层递进特征提取的方式,具备深层次语义特征提取的能力,通过合适的数据加载和目标函数的梯度引导,该卷积神经网络函数能学习一种深层次的特征提取。该方法又被称为深度度量学习,即Deep Metric Learning
15 | 以人们分辨狗和猫为例,如果让一个婴儿来学习分辨,是不可能的;必须得是大脑发育完整的儿童,如果让儿童整天看白猫和黑狗,久而久之,儿童会以为白颜色的是猫,黑颜色的是狗,遇到一个黑猫,也认为是狗。当然有的是每次给一对动物来分辨,有的是给一批动物来分辨。
16 | 如果一直让儿童去分辨容易分辨的狗和猫,儿童没有经过艰难的判别隔离训练,对于之后遇到的困难样本,就变得无所适从。
17 | 从上述例子可知:深度度量学习函数要训练好的三个前提条件分别是:模型结构,原始数据采样方式,目标函数,困难样本。深度度量学习很多提升改进都是基于基本要素的考虑。
18 |
--------------------------------------------------------------------------------
/models/vgg_classify.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import time
4 | from torchvision.models import vgg16_bn
5 |
6 | class BaseModule(nn.Module):
7 | """model save and load"""
8 | def __init__(self):
9 | super(BaseModule,self).__init__()
10 | self.model_name = str(type(self))
11 | self.model_name='basemodel'
12 |
13 | def load(self,path):
14 | """
15 | 加载模型
16 | :param path: reload model path
17 | :return: None
18 | """
19 | self.load_state_dict(torch.load(path))
20 |
21 | def save(self,name=None):
22 | """default modelname and time"""
23 | if name is None:
24 | prefix = 'checkpoints/'+self.model_name+'_'
25 | name = time.strftime(prefix + '%m%d_%H:%M:%S.pth')
26 | torch.save(self.state_dict(), name)
27 | return name
28 |
29 | class VggClassify(BaseModule):
30 | """a model viaries from vgg_16"""
31 | def __init__(self,num_classes):
32 | super(VggClassify, self).__init__()
33 | vgg16_model = vgg16_bn(pretrained=False)
34 | features,classifier = vgg16_model.features,vgg16_model.classifier
35 | classifier = list(classifier)
36 | del classifier[-1]
37 | classifier.append(nn.Linear(4096,num_classes))
38 | self.features = features
39 | self.classifier = nn.Sequential(*classifier)
40 | self.model_name = 'vgg_bn'
41 | def forward(self,x):
42 | features = self.features(x)
43 | features = features.view(features.size(0), -1)
44 | output = self.classifier(features)
45 | return output
46 |
47 | if __name__=='__main__':
48 | model = BaseModule()
49 | model.save()
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/utils/visulization.py:
--------------------------------------------------------------------------------
1 | from configs import opt
2 | import visdom
3 | import torch as t
4 | import time
5 | import numpy as np
6 | class Visulizer(object):
7 | """the object interface to store train trace to website"""
8 | def __init__(self,host=opt.vis_host,port=opt.vis_port,env=opt.vis_env):
9 | self.vis = visdom.Visdom(server=host,port=port,env=env)
10 |
11 | self.index ={}
12 | self.log_text=""
13 |
14 | def reinit(self,env='default'):
15 | self.vis = visdom.Visdom(server=opt.vis_host,port=opt.vis_port,env=opt.vis_env)
16 | return self
17 |
18 | def plot(self,name,y):
19 | """plot loss:1.0"""
20 | x = self.index.get(name,0)
21 | self.vis.line(Y=np.array([y]),X=np.array([x]),
22 | win=name,
23 | opts=dict(title=name),
24 | update=None if x==0 else 'append')
25 | self.index[name] = x+1
26 |
27 | def img(self,name,img_,**kwargs):
28 | """
29 | :param name: the window name
30 | :param img_: img shape and data type,t.Tensor(64,64),Tensor(3,64,64),Tensor(100,1,64,64)
31 | :param kwargs:
32 | :return:
33 | """
34 | self.vis.images(t.Tensor(img_).cpu().numpy(),
35 | win=name,
36 | opts=dict(title=name),
37 | **kwargs)
38 |
39 | def log(self,info,win='log_text'):
40 | """self.log({loss:1,'lr':0.0001}"""
41 | self.log_text += ('[{time}] {info}
'.format(
42 | time=time.strftime('%m%d_%H%M%S'), \
43 | info=info))
44 | self.vis.text(self.log_text, win)
45 |
46 | def delete_env(self,env):
47 | self.vis.delete_env(env)
48 |
49 | if __name__=='__main__':
50 | """nohup python -m visdom.server --port-8088 &
51 | this to start visdom server"""
52 | viz = Visulizer(host='http://192.168.3.13',port=8088,env='street')
53 | viz.log("this is a start")
54 | viz.plot('loss',2.3)
55 | viz.plot('loss',2.2)
56 | viz.plot('loss',2.1)
57 |
58 | viz.img('origin',np.random.random((10,3,224,224)))
--------------------------------------------------------------------------------
/data/mxdata/composedataset.py:
--------------------------------------------------------------------------------
1 | import mxnet.gluon.data.vision.transforms as T
2 | from mxnet.gluon.data import DataLoader,Dataset
3 | from mxnet import nd
4 | import numpy as np
5 | import os
6 | from mxnet.image import imread
7 | import pandas as pd
8 |
9 |
10 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
11 | default_transform = T.Compose([
12 | T.Resize(256),
13 | T.RandomResizedCrop(224),
14 | T.RandomFlipLeftRight(),
15 | T.ToTensor(),
16 | normalize
17 | ])
18 |
19 | class ComposeDataSet(Dataset):
20 | """ an Ebay and DeepFashion Composite Dataset to metric learning"""
21 | def __init__(self,ebay_dir,fashion_dir,batch_k,batch_size,is_train,transform=default_transform):
22 | self.ebay_dir = ebay_dir
23 | self.fashion_dir = fashion_dir
24 | self.batch_k = batch_k
25 | self.batch_size = batch_size
26 | self.is_train = is_train
27 | self._transform = transform
28 |
29 | #begin to resolve ebay data
30 |
31 | if self.is_train:
32 | #start ebay data
33 | table_name = os.path.join(self.ebay_dir,'Ebay_train.txt')
34 | table_data = pd.read_table(table_name,header=0,delim_whitespace=True)
35 | min_super_id, max_super_id = min(table_data.super_class_id), max(table_data.super_class_id)
36 |
37 | #this is the super id for ebaydata
38 | self.super_ids = np.arange(min_super_id, max_super_id + 1)
39 | self.super2class = {} #store a dict for {super_id:[class_id1,class_id2]}
40 | for super_id in self.super_ids:
41 | self.super2class[super_id] = table_data[table_data.super_class_id == super_id].class_id.tolist()
42 |
43 | min_class_id, max_class_id = min(table_data.class_id), max(table_data.class_id)
44 | self.class_ids = list(np.arange(min_class_id, max_class_id + 1))
45 | self.train_length = max_class_id + 1 - min_class_id
46 | self.super_id_dist = [len(v) for k, v in self.super2class.items()]
47 | for class_id in self.class_ids:
48 | one_class_paths = table_data[table_data.class_id == class_id].path.tolist() # type list
49 | self.class2imagefiless.append(one_class_paths)
50 |
51 | #Process deepfashion data
52 | extract_super_ids_to_class_ids
53 | else:
54 |
--------------------------------------------------------------------------------
/models/dml_model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torchvision
3 | import torch.nn as nn
4 |
5 | import torch.nn.functional as F
6 | from torchvision.models.inception import inception_v3
7 | from .vgg_classify import BaseModule
8 | from collections import OrderedDict
9 | class ModGoogLeNet(BaseModule):
10 | """the change the head from googlenet"""
11 | def __init__(self,embeding_size=512,with_drop=False):
12 | super(ModGoogLeNet,self).__init__()
13 | basic_model = inception_v3(pretrained=True, transform_input=False)
14 | basic_model.aux_logits=False
15 | feature = list(basic_model.named_children())
16 | def aux(name_module):
17 | return 'AuxLogits' not in name_module[0]
18 |
19 | del feature[-1]
20 | feature = filter(aux, feature) #generator
21 | feature = [m for m in feature]
22 | self.level1_2 = nn.Sequential(OrderedDict(feature[0:3]))
23 | self.level_3_4 = nn.Sequential(OrderedDict(feature[3:5]))
24 | self.level_5_6 = nn.Sequential(OrderedDict(feature[5:13]))
25 | self.level_7 = nn.Sequential(OrderedDict(feature[13:16]))
26 | self.fc = nn.Linear(in_features=2048,out_features=embeding_size)
27 | self.model_name = 'DMLGoogle'
28 | self.with_drop = with_drop
29 |
30 | def freeze_model(self,level=5):
31 | """
32 |
33 | :param level: the freeze level,all the model split in (
34 | Conv2d_1a_3x3
35 | Conv2d_2a_3x3,Conv2d_2b_3x3,
36 | Conv2d_3b_1x1,Conv2d_4a_3x3,
37 | Mixed_5b,Mixed_5c,Mixed_5d,
38 | Mixed_6a,Mixed_6b,Mixed_6c,Mixed_6d,Mixed_6e,AuxLogits
39 | Mixed_7a,Mixed_7b,Mixed_7c
40 |
41 | :return:
42 | """
43 | for i,(name,module) in enumerate(self.basic_model.named_children()):
44 | if i<10 and int(name.split('_')[1][0])<=level:
45 | for param in module.parameters():
46 | param.requried_grad = False
47 |
48 |
49 | def forward(self,x,normalize=False):
50 | """
51 | forward data data shape (32,2,227,227)
52 | :param x: torch.tensor
53 | :return: feature embeding
54 | """
55 | x = self.level1_2(x)
56 | x = F.max_pool2d(x, kernel_size=3, stride=2)
57 | x = self.level_3_4(x)
58 | x = F.max_pool2d(x, kernel_size=3, stride=2)
59 | x = self.level_5_6(x)
60 | x = self.level_7(x)
61 |
62 | x = F.avg_pool2d(x, kernel_size=x.size(-1)) #default 8*8,another 5*5
63 | # 1 x 1 x 2048
64 | if self.with_drop:
65 | x = F.dropout(x, training=self.training)
66 | # 1 x 1 x 2048
67 | x = x.view(x.size(0), -1)
68 | embeding = self.fc(x)
69 | return embeding
70 |
71 |
72 |
--------------------------------------------------------------------------------
/train_classify.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.optim as optim
3 | from data import Street2shop
4 | import os
5 | from configs import opt
6 | from models import VggClassify
7 | from torch.utils.data import DataLoader
8 | from tqdm import tqdm
9 | from utils import Visulizer
10 |
11 | def val(model,dataloader):
12 | """run model with data,"""
13 | model.eval()
14 | num_total =0
15 | num_correct =0
16 | for i,(data,label) in tqdm(enumerate(dataloader)):
17 | if opt.use_gpu:
18 | data = data.cuda()
19 | label = label.cuda()
20 | score = model(data)
21 | num_total += data.size(0)
22 | pred = torch.argmax(score,dim=1)
23 | acc = torch.eq(pred, label)
24 | num_correct += acc.sum().item()
25 | if opt.debug:
26 | break
27 | print("valid, correct rate",1.0*num_correct/num_total)
28 | model.train()
29 |
30 | def train(**kwargs):
31 | opt.parse(kwargs)
32 | os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.gpu_id)
33 | #data
34 | train_data = Street2shop(opt.train_classify_dir,train=True,persist=opt.persist)
35 | val_data = Street2shop(opt.train_classify_dir,train=False,persist=opt.persist)
36 |
37 | #model
38 | model = VggClassify(num_classes=opt.num_classes)
39 | if opt.load_model_path:
40 | model.load(opt.load_model_path)
41 | if opt.use_gpu:
42 | model = model.cuda()
43 |
44 | #data loader
45 | train_dataloader= DataLoader(train_data,batch_size=opt.batch_size,shuffle=True,num_workers=opt.num_workers)
46 | val_dataloader = DataLoader(val_data,batch_size=opt.batch_size,shuffle=True,num_workers=opt.num_workers)
47 |
48 | #visulizer
49 | viz = Visulizer(host=opt.vis_host,port=opt.vis_port,env=opt.vis_env)
50 | viz.log("start to train")
51 | #loss function
52 | criterion = torch.nn.CrossEntropyLoss()
53 | lr = opt.lr
54 | m = opt.momentum
55 | optimizer = optim.SGD(model.parameters(),lr=lr,momentum=m)
56 | lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,20,0.1)
57 | for epoch in range(opt.max_epoch):
58 | lr_scheduler.step()
59 | train_loss = 0
60 | for i,(data,label) in tqdm(enumerate(train_dataloader)):
61 | if opt.use_gpu:
62 | data = data.cuda()
63 | label = label.cuda()
64 | optimizer.zero_grad()
65 | score = model(data)
66 | loss = criterion(score,label)
67 | loss.backward()
68 | optimizer.step()
69 | train_loss = train_loss + loss.item()
70 | if i%opt.print_freq == opt.print_freq-1:
71 | average_loss = train_loss/opt.batch_size
72 | viz.plot('loss',average_loss)
73 | train_loss =0
74 | if opt.debug:
75 | break
76 | print("epoch :{0} finished,begin to valid test".format(epoch))
77 | model.save()
78 | val(model,val_dataloader)
79 | if opt.debug:
80 | print("finished one iter")
81 | break
82 | def help():
83 | """print information"""
84 | print("""
85 | useage: python file.py --args=value
86 | function := train help
87 | example:
88 | python {0} train
89 | python {0} help""")
90 |
91 | if __name__=='__main__':
92 | import fire
93 | fire.Fire()
94 |
95 |
--------------------------------------------------------------------------------
/data/classify/ClassifyData.py:
--------------------------------------------------------------------------------
1 | import torchvision.transforms as T
2 | from configs import opt
3 | from torchvision.datasets import ImageFolder
4 | from torch.utils.data import Dataset
5 | import os
6 | import csv
7 | import fnmatch
8 | from PIL import Image
9 | import numpy as np
10 | from torch.utils.data.dataloader import default_collate
11 |
12 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
13 | default_transform = T.Compose([
14 | T.RandomResizedCrop(224),
15 | T.RandomHorizontalFlip(),
16 | T.ToTensor(),
17 | normalize,
18 | ])
19 |
20 |
21 | #origin_dataset = ImageFolder(opt.train_classify_dir,target_transform=transform)
22 | def my_collate_fn(batch):
23 | '''
24 | batch中每个元素形如(data, label)
25 | '''
26 | # 过滤为None的数据
27 | batch = list(filter(lambda x:hasattr(x[0],'size'), batch))
28 | if len(batch) == 0: return t.Tensor()
29 | return default_collate(batch) # 用默认方式拼接过滤后的batch数据
30 |
31 | class Street2shop(Dataset):
32 | """ dataset split to train and test
33 | root is a ln -s link like this
34 | --root
35 | --bags
36 | --tops
37 | --skirts
38 | --hats
39 | --;;
40 | have 13 catergory consumer"""
41 | def __init__(self,root,train=True,persist = opt.persist,trans = default_transform):
42 | self.train = train
43 | self.root = root
44 |
45 | self.names_idx = {}
46 | self.transform = trans
47 |
48 | if persist is None and not os.path.exists(persist):
49 | folders = os.listdir(root)
50 | folders.sort() # from a to x sort
51 | self.names_idx={fold:i for i,fold in enumerate(folders)}
52 | with open('data/persist.csv','w') as f:
53 | writer = csv.writer(f)
54 | for fold in folders:
55 | index = self.names_idx[fold]
56 | imgs = os.listdir(os.path.join(self.root,fold))
57 | for img in imgs:
58 | writer.writerow([fold+'/{0}'.format(img),index])
59 |
60 | # start to read data
61 | with open(persist, 'r') as f:
62 | reader = csv.reader(f)
63 | self.imgs = [row for row in reader]
64 | print("dataset size",len(self.imgs))
65 |
66 | np.random.shuffle(self.imgs)
67 | if self.train:
68 | self.imgs = self.imgs[:int(0.7 * len(self.imgs))]
69 | else:
70 | self.imgs = self.imgs[int(0.7 * len(self.imgs)):]
71 |
72 | def __getitem__(self, index):
73 | """get data and transform"""
74 | img_path,label = self.imgs[index]
75 | img_path = os.path.join(self.root,img_path)
76 | try:
77 | data = Image.open(img_path)
78 | if not hasattr(data,'size'):
79 | raise Exception("no size or data channel problem")
80 | if self.transform:
81 | data = self.transform(data)
82 | if not data.size(0) is 3:
83 | print("channel not 3,img_path is :{0}".format(img_path))
84 | raise Exception("channel not 3")
85 | return data,int(label)
86 | except Exception as e:
87 | print(e,img_path)
88 | img_path,label = self.imgs[0]
89 | data = Image.open(os.path.join(self.root,img_path))
90 | label = int(label)
91 | if self.transform:
92 | data = self.transform(data)
93 | return data,label
94 |
95 |
96 |
97 |
98 | def __len__(self):
99 | return len(self.imgs)
100 |
101 |
102 |
103 |
104 |
105 |
--------------------------------------------------------------------------------
/data/hashdata/hash_tri_files.py:
--------------------------------------------------------------------------------
1 | # hash data from three files,contain coco,nus_wide and imangenet
2 |
3 | from torch.utils.data import Dataset,DataLoader
4 | import torchvision.transforms as T
5 | from PIL import Image
6 | import os
7 | import numpy as np
8 |
9 |
10 | root_path = '/data/jh/notebooks/hudengjun/DeepEmbeding/data/hashdata'
11 |
12 | def image_train(resize_size=256, crop_size=224):
13 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
14 | std=[0.229, 0.224, 0.225])
15 | return T.Compose([
16 | T.Resize(resize_size),
17 | T.RandomResizedCrop(crop_size),
18 | T.RandomHorizontalFlip(),
19 | T.ToTensor(),
20 | normalize])
21 |
22 | def image_test(resize_size = 256,crop_size=224):
23 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
24 | std=[0.229, 0.224, 0.225])
25 | #start_first = 0
26 | start_center = (resize_size - crop_size - 1) / 2
27 | #start_last = resize_size - crop_size - 1
28 |
29 | return T.Compose([
30 | T.Resize(resize_size),
31 | PlaceCrop(crop_size,start_center,start_center),
32 | T.ToTensor(),
33 | normalize])
34 | class PlaceCrop(object):
35 | """Crops the given PIL.Image at the particular index.
36 | Args:
37 | size (sequence or int): Desired output size of the crop. If size is an
38 | int instead of sequence like (w, h), a square crop (size, size) is
39 | made.
40 | """
41 |
42 | def __init__(self, size, start_x, start_y):
43 | if isinstance(size, int):
44 | self.size = (int(size), int(size))
45 | else:
46 | self.size = size
47 | self.start_x = start_x
48 | self.start_y = start_y
49 |
50 | def __call__(self, img):
51 | """
52 | Args:
53 | img (PIL.Image): Image to be cropped.
54 | Returns:
55 | PIL.Image: Cropped image.
56 | """
57 | th, tw = self.size
58 | return img.crop((self.start_x, self.start_y, self.start_x + tw, self.start_y + th))
59 |
60 |
61 | class ImageList(Dataset):
62 | def __init__(self,file,transform=None):
63 | if transform is None:
64 | self._transform = image_train(256,224) if 'train.txt' in file else image_test(256,224)
65 | else:
66 | self._transform = transform
67 | if not os.path.exists(file):
68 | raise Exception("file not exist")
69 | self.file = file
70 |
71 | self.images = []
72 | with open(self.file,'r') as f:
73 | for line in f.readlines():
74 | items = line.strip().split(' ')
75 | self.images.append((items[0],np.array([int(la) for la in items[1:]],dtype=np.float32)))
76 |
77 | def __len__(self):
78 | return len(self.images)
79 |
80 | def __getitem__(self, index):
81 | path,target = self.images[index]
82 | img = Image.open(path).convert('RGB')
83 | if self._transform:
84 | img = self._transform(img)
85 | return img,target
86 |
87 | def get_hash_dataloader(dataset_name,train_batch,test_batch,database_batch):
88 | """
89 | return the double train dataset
90 | :param dataset:
91 | :return:
92 | """
93 | file_names = ['train.txt','test.txt','database.txt']
94 | files = [os.path.join(root_path,dataset_name,file_name) for file_name in file_names]
95 | datasets = [ImageList(file) for file in files]
96 | train1 = DataLoader(datasets[0],batch_size=train_batch,shuffle=True,num_workers=6)
97 | train2 = DataLoader(datasets[0],batch_size=train_batch,shuffle=True,num_workers=6)
98 | test = DataLoader(datasets[1],batch_size=test_batch,shuffle=False,num_workers=4)
99 | database = DataLoader(datasets[2],batch_size=database_batch,shuffle=False,num_workers=4)
100 | return train1,train2,test,database
101 |
102 |
103 |
104 | if __name__== '__main__':
105 | coco_train = ImageList('/data/jh/notebooks/hudengjun/DeepEmbeding/data/hashdata/coco/train.txt')
106 | print("size of cocotrain",len(coco_train))
107 | print("start to get data",coco_train[0][0].shape,coco_train[0][1].shape)
108 |
109 |
110 |
111 |
112 |
--------------------------------------------------------------------------------
/data/margin_cub200/cub200_margin.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.utils.data import Dataset
3 | from PIL import Image
4 |
5 | import torchvision.transforms as T
6 | import numpy as np
7 | import os
8 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
9 | default_transform = T.Compose([
10 | T.Resize(256),
11 | T.RandomCrop(224),
12 | T.RandomHorizontalFlip(),
13 | T.ToTensor(),
14 | normalize,
15 | ])
16 |
17 | class CUB200DataSet(Dataset):
18 | """
19 | the cub200 bird dataset,dataset description:
20 | 200 catagory bird, 100 for train ,100 for test ,each catagory hase 60 images
21 |
22 | """
23 | def __init__(self,data_path,batch_k=5,batch_size=70,is_train=True,transform = default_transform):
24 | self.is_train = is_train
25 | self.batch_k = batch_k #sample numbers in every calsses,for example,5
26 | self.batch_size = batch_size #the whole batch samples to fetch ,for example,70,so the sampled classes is 12
27 | self.train_image_files = [[]for _ in range(100)]
28 | self.test_image_files =[]
29 | self.test_labels =[]
30 | self.boxes = {}
31 | self.transform = transform
32 |
33 | with open(os.path.join(data_path,'images.txt'),'r') as f_img,\
34 | open(os.path.join(data_path,'image_class_labels.txt'),'r') as f_label,\
35 | open(os.path.join(data_path,'bounding_boxes.txt'),'r') as f_box:
36 | for line_img,line_label,line_box in zip(f_img,f_label,f_box):
37 | fname = os.path.join(data_path,'images',line_img.strip().split()[-1])
38 | label = int(line_label.strip().split()[-1])-1
39 | box = [int(float(v)) for v in line_box.split()[-4:]]
40 | self.boxes[fname]=box
41 |
42 | if label<100:
43 | self.train_image_files[label].append(fname)
44 | else:
45 | self.test_image_files.append(fname)
46 | self.test_labels.append(label)
47 |
48 | self.n_test = len(self.test_image_files)
49 |
50 | def __getitem__(self, index):
51 | """
52 | get data item in train dataset,all test dataset
53 | :param index: the index of training or test of sample
54 | :return: return the origin image data and labels based on sample method,
55 | search batch/batch_k classes ,every class,choose batch_k iamges to compound a batch
56 | """
57 | if self.is_train:
58 | #get train batch
59 | images = []
60 | labels = []
61 | num_groups = self.batch_size//self.batch_k
62 | sampled_classes = np.random.choice(100,num_groups,replace=False)
63 | for class_id in sampled_classes:
64 | img_fnames = np.random.choice(self.train_image_files[class_id],self.batch_k,replace=False)
65 | for file_path in img_fnames:
66 | x,y,w,h = self.boxes[file_path]
67 | img = Image.open(file_path).convert('RGB').crop((x,y,x+w,y+h))
68 | try:
69 | img_tensor = self.transform(img)
70 | images.append(img_tensor)
71 | labels.append(class_id)
72 | except Exception as e:
73 | print(file_path)
74 | break
75 |
76 | batch_data = torch.stack(images,dim=0) # from list of tensor to batch tensor
77 | label_data = torch.tensor(np.array(labels,dtype=np.int32)) # from list to tensor
78 | return batch_data,label_data
79 | else:
80 | #get one sample
81 | image = Image.open(self.test_image_files[index]).convert('RGB')
82 | label = self.test_labels[index]
83 | if self.transform:
84 | image = self.transform(image)
85 | return image,label
86 |
87 | def __len__(self):
88 | if self.is_train:
89 | return 200 #
90 | else:
91 | return self.n_test # will return all test_image_files
92 |
93 |
94 |
95 | if __name__=='__main__':
96 | import ipdb
97 | ipdb.set_trace()
98 | dataset = CUB200DataSet(data_path='data/cub200_2011/CUB_200_2011/')
99 | data = dataset[1]
100 | print(type(data))
101 | print(data[1])
--------------------------------------------------------------------------------
/models/hashnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torchvision.models.resnet import resnet50
4 |
5 | class HashNetRes50(nn.Module):
6 | """
7 | this is a hash net based on resnet50
8 | """
9 | def __init__(self,n_bit):
10 | super(HashNetRes50,self).__init__()
11 | model_resnet = resnet50(pretrained=True)
12 | self.conv1 = model_resnet.conv1
13 | self.bn1 = model_resnet.bn1
14 | self.relu = model_resnet.relu
15 | self.maxpool = model_resnet.maxpool
16 | self.layer1 = model_resnet.layer1
17 | self.layer2 = model_resnet.layer2
18 | self.layer3 = model_resnet.layer3
19 | self.layer4 = model_resnet.layer4
20 | self.avgpool = model_resnet.avgpool
21 | self.feature_layers = nn.Sequential(self.conv1,
22 | self.bn1,
23 | self.relu,
24 | self.maxpool,
25 | self.layer1,
26 | self.layer2,
27 | self.layer3,
28 | self.layer4,
29 | self.avgpool)
30 | self.hash_layer = nn.Linear(model_resnet.fc.in_features,n_bit)
31 | self.hash_layer.weight.data.normal_(0,0.01)
32 | self.hash_layer.bias.data.fill_(0.0)
33 | self.activation = torch.nn.Tanh()
34 |
35 | self.iter_num =0
36 | self.gamma = 0.005
37 | self.step_size = 200
38 | self.power =0.5
39 | self.init_scale = 1.0
40 | self.scale = self.init_scale
41 | self.__in_features = n_bit
42 |
43 | def forward(self,x):
44 | """ the image x contains x and x' to generate similairty"""
45 | if self.training:
46 | self.iter_num +=1
47 | x = self.feature_layers(x)
48 | x = x.view(x.size(0),-1)
49 | y = self.hash_layer(x) # just a linear transform
50 | if self.iter_num % self.step_size == 0:
51 | self.scale = self.init_scale*math.pow((1+self.gamma*self.iter_num),self.power)
52 | y = self.activation(self.scale*y)
53 | return y
54 |
55 | def ouput_num(self):
56 | return self.__in_features
57 |
58 | class HashLoss(nn.Module):
59 | def __init__(self,hash_bit):
60 | super(HashLoss,self).__init__()
61 | self.hash_bit = hash_bit
62 |
63 |
64 | def forward(self,x,y,sigmoid_param = 1.0,l_threshold=15.0,class_num =1.0):
65 | """
66 |
67 | :param x:
68 | :param y:
69 | :param sigmoid_param:
70 | :param l_threshold: the big dot_product use the limitation
71 | :param class_num: the imbalance data distribution
72 | :return:
73 | """
74 | total_size = x.shape[0]
75 | x1 = x.narrow(0,0,total_size//2)
76 | x2 = x.narrow(0,total_size//2,total_size//2) # narrow,dimension,start,length
77 | y1 = y.narrow(0,0,total_size//2)
78 | y2 = y.narrow(0,total_size//2,total_size//2)
79 |
80 | similarity = torch.mm(y1,y2.t())
81 | dot_product = sigmoid_param * torch.mm(x1,x2.t())
82 | exp_product = torch.exp(dot_product)
83 |
84 | mask_dot = dot_product.data>l_threshold
85 | mask_exp = dot_product.data<=l_threshold #dot_product 比较小时候,使用log(1+exp(x)) - sij
86 |
87 | mask_positive = similarity.data>0
88 | mask_negative = similarity.data<=0
89 |
90 | mask_dp = mask_dot & mask_positive
91 | mask_dn = mask_dot & mask_negative
92 | mask_ep = mask_exp & mask_positive
93 | mask_en = mask_exp & mask_negative
94 |
95 | dot_loss = dot_product*(1-similarity) # dot_loss 是对exp_loss在dot_product比较大时候的近似,能让dot_loss =0,
96 | # 在dot_product 比较大时候使用x近似log(1+exp(x))
97 | exp_loss = torch.log(1+exp_product) - similarity*dot_product
98 |
99 | loss = (torch.sum(torch.mask_select(exp_loss,mask_ep))+
100 | torch.sum(torch.mask_select(dot_loss,mask_dp)))*class_num + torch.sum(torch.mask_select(exp_loss,mask_en))+torch.sum(torch.mask_select(dot_loss,mask_dn))
101 |
102 | loss = loss /(torch.sum(mask_positive.float())*class_num +torch.sum(mask_negative.float()))
103 | return loss
104 |
105 |
106 |
107 |
108 |
109 |
110 | if __name__=='__main__':
111 |
112 | base_resnet = HashNetRes50(n_bit=48)
113 | x = torch.rand((10,3,224,224))
114 | x = base_resnet(x)
115 | print(x.shape)
116 |
117 | #base_resnet.zero_grad()
118 | torch.save(nn.Sequential(base_resnet),"hashnet.pth.tar")
119 | print("finished")
120 | model = torch.load("hashnet.pth.tar")
121 | print(model)
--------------------------------------------------------------------------------
/utils/vis_tsne_images.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from PIL import Image
4 | from lapjv import lapjv
5 | from sklearn.manifold import TSNE
6 | from scipy.spatial.distance import cdist
7 | import matplotlib as mlp
8 | import matplotlib.pyplot as plt
9 | import os
10 | from tqdm import tqdm
11 | from sklearn.cluster import KMeans
12 | from sklearn.metrics import normalized_mutual_info_score
13 | def load_img(file_list,in_dir):
14 | #pred_img = [f for f in os.listdir(in_dir) if os.path.isfile(os.path.join(in_dir, f))]
15 | pred_img =file_list
16 | img_collection = []
17 | for idx, img in enumerate(pred_img):
18 | img = os.path.join(in_dir, img)
19 | img_collection.append(Image.open(img))
20 | return img_collection
21 |
22 | def save_tsne_grid(img_list, x2d, out_res,crop_size,in_dir):
23 | """
24 | plot all the images in X_2d pictures
25 | :param img_collection: the image
26 | :param X_2d: the point
27 | :param out_res: the output picture resolution
28 | :return:
29 | """
30 | out_img = np.ones((out_res+crop_size,out_res+crop_size,3),dtype='uint8')
31 | out_img = out_img*255
32 |
33 | i=0
34 | for img_path,point in tqdm(zip(img_list,x2d)):
35 | i +=1
36 | point = point*out_res
37 | px = int(point[0])
38 | py = int(point[1])
39 | img = Image.open(os.path.join(in_dir,img_path))
40 | img.thumbnail((crop_size,crop_size))
41 |
42 | a = np.array(img)
43 |
44 | try:
45 | h,w = a.shape[:2]
46 | if len(a.shape)==3:
47 | out_img[py:py + h, px:px + w]= a
48 | except Exception as e:
49 | print(e)
50 | print(a.shape)
51 | print(img_path)
52 | # if i%5000==4999:
53 | # tm = out_img.astype('uint8')
54 | # tm_pl_img = Image.fromarray(tm)
55 | # tm_pl_img.save('checkpoints/tsne_product_{0}.jpg'.format(i+1))
56 |
57 | out_img = out_img.astype('uint8')
58 | pl_img = Image.fromarray(out_img)
59 | pl_img.save('checkpoints/tsne_product.jpg')
60 |
61 |
62 | def generate_tsne(activations):
63 | perplexity=30
64 | tsne = TSNE(perplexity=perplexity, n_components=2, init='random')
65 | X_2d = tsne.fit_transform(activations) # activations dtype is numpy.ndarray
66 | X_2d -= X_2d.min(axis=0)
67 | X_2d /= X_2d.max(axis=0)
68 | return X_2d
69 |
70 | def visualize(im_files_list,features,data_dir,tsne_size=20000,crop_size=100):
71 | """
72 | visualize t-sne data
73 | :param im_files_list: image file list
74 | :param features: image features numpy.ndarray shape (n,512)
75 | :return:
76 | """
77 | print("dimension deduction from features ...")
78 | feature_2d = generate_tsne(features)
79 | np.save('fashion.npy',feature_2d)
80 | #feature_2d = np.load('x2d.npy')
81 | print("build t-sne image ... ...")
82 | save_tsne_grid(im_files_list, feature_2d, tsne_size,crop_size,data_dir)
83 |
84 |
85 | def nmi(gt_class,features):
86 | """
87 | normal mutual information,for features
88 | :param im_class: np.ndarray, shape [n,1],dtype=np.int32
89 | :param features: image features to clustering ,numpy.ndarray [n,512]
90 | :return:
91 | """
92 |
93 | gt_class = gt_class - min(gt_class)
94 | n_cluster = len(set(gt_class)) #gt_class from 0 to n_cluster
95 | #convert
96 | st_class = set(gt_class)
97 | kv={}
98 | for k in st_class:
99 | kv[k]=len(kv)
100 | gt_class = np.array([kv[k] for k in gt_class])
101 |
102 | model = KMeans(n_clusters=n_cluster)
103 | Y=model.fit(features) # this would take 40 minutes
104 | cl_class = Y.labels_
105 | score = normalized_mutual_info_score(gt_class,cl_class)
106 | print("the normal_mutal_info_score",score)
107 |
108 |
109 |
110 |
111 | def vis_ebay_n_pair():
112 | """
113 | read compute data and visualize t-sne picture,then comput nmi index
114 | """
115 | features_file = 'checkpoints/online_product_compute.csv'
116 | test_info_file = 'data/Stanford_Online_Products/Ebay_test.txt'
117 |
118 | vectors = None
119 | features = pd.read_csv(features_file,header=None)
120 | id_class = features.iloc[:,0:2]
121 | id_class = np.array(id_class)
122 | vectors = np.array(features.iloc[:,2:])
123 |
124 | image_id_path= pd.read_table(test_info_file, header=0, delim_whitespace=True)
125 | file_list = np.array(image_id_path.path)
126 |
127 | visualize(file_list,vectors,'data/Stanford_Online_Products')
128 | file_class = np.array(image_id_path.class_id)
129 | file_class = file_class.astype(np.int32)
130 | nmi(file_class,vectors)
131 |
132 |
133 | def vis_deep_fashon_margin():
134 | feature_file = 'checkpoints/deepfashion.csv'
135 | test_info_file = 'checkpoints/fashion_test.txt'
136 | features = pd.read_csv(feature_file,header=None)
137 | vectors = np.array(features.iloc[:,2:])
138 | gt_class = np.array(features.iloc[:,1],dtype=np.int32)
139 | image_path_id = pd.read_table(test_info_file,header=None,sep=',')
140 | file_list = np.array(image_path_id.iloc[:,0])
141 | #visualize(file_list,vectors,'data/DeepInShop')
142 | nmi(gt_class,vectors)
143 |
144 |
145 | if __name__=='__main__':
146 | vis_deep_fashon_margin()
147 |
148 |
--------------------------------------------------------------------------------
/data/mxdata/mxcub_simple.py:
--------------------------------------------------------------------------------
1 | # an implementation of mxnet in vision data dataset similar like pytorch.
2 |
3 | from mxnet.gluon.data import DataLoader,Dataset
4 | from mxnet import nd
5 | from mxnet.image import imread
6 |
7 | import os
8 | import numpy as np
9 | import mxnet as mx
10 | from mxnet.gluon import nn
11 | import mxnet.gluon.data.vision.transforms as T
12 |
13 |
14 | class RandomCrop(nn.Block):
15 | def __init__(self,size):
16 | self.size = size
17 | def forward(self,x):
18 | return mx.image.random_crop(x,(self.size,self.size))
19 |
20 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
21 | default_transform = T.Compose([
22 | T.Resize(256),
23 | T.RandomResizedCrop(size=224,scale=(1.0,1.0),ratio=(1.0,1.0)),# just crop,not scale
24 | T.RandomFlipLeftRight(),
25 | T.ToTensor(), # last to swap channel to c,w,h
26 | normalize
27 | ])
28 |
29 | test_transform = T.Compose([
30 | T.Resize(256),
31 | T.CenterCrop(224),
32 | T.ToTensor(),
33 | normalize
34 | ])
35 |
36 | class CUB200Data(Dataset):
37 | def __init__(self,dir_path,batch_k,batch_size,is_train,transform = default_transform):
38 | self.dir_path = dir_path
39 | self.batch_k = batch_k
40 | self.batch_size = batch_size
41 | self._transform = transform
42 | self.is_train = is_train
43 | self.train_image_files = [ [] for _ in range(100)]
44 | self.test_images_files = [] # to store test image files
45 | self.test_labels = [] # to store test iamge and image label
46 | self.boxes = {} # to store image bounding box
47 |
48 | with open(os.path.join(dir_path,'images.txt'),'r') as f_img,\
49 | open(os.path.join(dir_path,'image_class_labels.txt'),'r') as f_label,\
50 | open(os.path.join(dir_path,'bounding_boxes.txt'),'r') as f_box:
51 | for line_img,line_label,line_box in zip(f_img,f_label,f_box):
52 | fname = os.path.join(self.dir_path,'images',line_img.strip().split()[-1])
53 | label = int(line_label.strip().split()[-1])-1
54 | box = [int(float(v)) for v in line_box.split()[-4:]]
55 | self.boxes[fname]=box
56 |
57 | if label<100:
58 | self.train_image_files[label].append(fname)
59 | else:
60 | self.test_images_files.append(fname)
61 | self.test_labels.append(label)
62 | self.n_test = len(self.test_images_files)
63 | self.train_class_ids = list(np.arange(0,100)) #list(self.train_image_files.keys()) # get all train class id list
64 |
65 | def __len__(self):
66 | if self.is_train:
67 | return 200
68 | else:
69 | return self.n_test
70 |
71 | def __getitem__(self, index):
72 | """
73 | get the batch //batch_k for train and single for test
74 | """
75 | if self.is_train:
76 | image_names,labels = self.sample_train_batch()
77 | # get sampled order image_file names and corresponding label
78 | image_list,label_list=[],[]
79 | for img,label in zip(image_names,labels):
80 | image = imread(img,flag=1,to_rgb=True)
81 | x,y,w,h = self.boxes[img]
82 | image = image[y:min(y+h,image.shape[0]),x:min(x+w,image.shape[1])]
83 | if image.shape[2]==1:
84 | print("has gray file",img)
85 | image = nd.tile(image,(1,1,3))
86 | image =self._transform(image) # for rgb same value
87 | image_list.append(image)
88 | label_list.append(label)
89 | batch_data = nd.stack(*image_list,axis=0)
90 | batch_label = nd.array(label_list)
91 | return batch_data,batch_label
92 | else:
93 | img = self.test_images_files[index] # get the file name full path
94 | image = imread(img,flag=1,to_rgb=1)
95 | x,y,w,h = self.boxes[img]
96 | image = image[y:min(y+h,image.shape[0]),x:min(x+w,image.shape[1])]
97 | image = self._transform(image)
98 |
99 | return image,self.test_labels[index]
100 |
101 | def sample_train_batch(self):
102 | """sample batch_size//batch_k and sample small batch_k in each instance"""
103 | batch = []
104 | labels =[]
105 | num_groups = self.batch_size // self.batch_k
106 | sampleed_classes = np.random.choice(self.train_class_ids,num_groups,replace=False)
107 | for class_id in sampleed_classes:
108 | img_fname = np.random.choice(self.train_image_files[class_id],self.batch_k,replace=False)
109 | batch += img_fname.tolist()
110 | labels += [class_id]*self.batch_k
111 | return batch,labels
112 |
113 |
114 | def getCUB200(data_path,batch_k,batch_size):
115 | train_dataset = CUB200Data(data_path,batch_k=batch_k,batch_size=batch_size,is_train=True,transform=default_transform)
116 | test_dataset = CUB200Data(data_path,batch_k=batch_k,batch_size=batch_size,is_train=False,transform=test_transform)
117 | train_loader = DataLoader(train_dataset,batch_size=1,shuffle=False,num_workers=6)
118 | test_loader = DataLoader(test_dataset,batch_size=batch_size,num_workers=6)
119 | return train_loader,test_loader
120 |
121 |
122 | if __name__=='__main__':
123 | import ipdb
124 | #ipdb.set_trace()
125 | train_loader, test_loader = getCUB200('data/CUB_200_2011',batch_k=5,batch_size=10)
126 | # for train_batch,test_batch in zip(train_loader,test_loader):
127 | # print("begin to resolve data from train_loader and test_loader")
128 | # ipdb.set_trace()
129 | # print("data",train_batch[0][0].shape,train_batch[1][0].shape)
130 | # print("test_data",test_batch[0].shape,test_batch[1].shape)
131 | # break
132 | train_dataset = CUB200Data('data/CUB_200_2011', batch_k=5, batch_size=10, is_train=True)
133 | ipdb.set_trace()
134 | data = train_dataset[0]
135 | print(data)
136 | test_dataset = CUB200Data('data/CUB_200_2011',batch_k=5,batch_size=10,is_train=False)
137 | data = test_dataset[0]
138 | print(data)
139 | # for test_batch in test_loader:
140 | # ipdb.set_trace()
141 | # print(test_batch[0].shape,test_batch[1].shape)
142 | # break
143 |
144 |
145 |
146 |
147 |
--------------------------------------------------------------------------------
/train_hash.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.optim as optim
3 | import argparse
4 | from data import get_hash_dataloader
5 | from models import HashNetRes50,HashLoss
6 | from utils import Visulizer
7 | import torch.nn as nn
8 | import os
9 | import numpy as np
10 | from pprint import pprint
11 | args = argparse.ArgumentParser()
12 | args.add_argument('--gpus',type=str,default='0',help="gpus ids")
13 | args.add_argument('--dataset',type=str,default='coco',help='the dataset name in coco,nus_wide imagent')
14 | args.add_argument('--hash_bit',type=int,default=48,help='the hash bit of deephashing output')
15 | args.add_argument('--iter_nums',type=int,default=10000,help='the max train iter')
16 | args.add_argument('--train_batch',type=int,default=32,help='the train batch_size')
17 | args.add_argument('--lr',type=float,default=0.0001,help='the train learning rate')
18 | args.add_argument('--class_num',type=float,default=1.0,help='the imbalance ratio')
19 | args.add_argument('--viz_env',type=str,default='cocohash',help='the visdom env name')
20 | args.add_argument('--log_interval',type=int,default=20,help='the loss print log interval')
21 | args.add_argument('--snapshot_interval',type=int,default=3000,help='the snapshot archive model interval')
22 | args.add_argument('--test_interval',type=int,default=500,help='the test hash search interval')
23 |
24 |
25 | def test_model(model,test_loader,database_loader,viz):
26 | def code_predict(net,loader):
27 | code = []
28 | label = []
29 | for data in loader:
30 | x,y = data
31 | if torch.cuda.is_available():
32 | x = x.cuda()
33 | x = model(x)
34 | code.append(x.cpu())
35 | label.append(y)
36 | code = torch.cat(code,dim=0)
37 | code = torch.sign(code) # the quantization sign function
38 | label = torch.cat(label,dim=0)
39 | return code.numpy(),label.numpy()
40 | test_code,test_label = code_predict(model,test_loader)
41 | database_code,database_label = code_predict(model,database_loader)
42 |
43 | #compute the mean average precision--namely map
44 | query_num = test_code.shape[0]
45 | sim = np.dot(database_code, test_code.T)
46 | ids = np.argsort(-sim, axis=0)
47 | APx = []
48 |
49 | for i in range(query_num):
50 | label = test_label[i, :]
51 | label[label == 0] = -1
52 | idx = ids[:, i]
53 | imatch = np.sum(database_label[idx[0:R], :] == label, axis=1) > 0
54 | relevant_num = np.sum(imatch)
55 | Lx = np.cumsum(imatch)
56 | Px = Lx.astype(float) / np.arange(1, R + 1, 1)
57 | if relevant_num != 0:
58 | APx.append(np.sum(Px * imatch) / relevant_num)
59 | mAP = np.mean(np.array(APx))
60 | viz.plot("mAP",str(mAP))
61 |
62 |
63 |
64 |
65 |
66 |
67 | if __name__=='__main__':
68 | config={}
69 | ags = args.parse_args()
70 | config['gpus']=int(ags.gpus)
71 | os.environ['CUDA_VISIBLE_DEVICES']=ags.gpus
72 | config['dataset']=ags.dataset
73 | config['hash_bit'] = ags.hash_bit
74 | config['iter_nums']= ags.iter_nums
75 | config['train_batch'] = ags.train_batch
76 | config['lr']=ags.lr
77 | config['log_interval'] = ags.log_interval
78 | config['snapshot_interval'] = ags.snapshot_interval
79 | config['test_interval'] = ags.test_interval
80 | config['viz_env'] = ags.viz_env
81 |
82 |
83 |
84 | #program setting
85 | config['weight_decay']=0.0005
86 |
87 | config["optimiz_params"] = {"lr": config['lr'], "momentum": 0.9, "weight_decay": 0.0005, "nesterov": True}
88 | config['lr_scheduler']={"gamma":0.5, "step":2000}
89 | config["loss"] = {"l_weight": 1.0, "q_weight": 0,
90 | "l_threshold": 15.0, "sigmoid_param": 10. / config["hash_bit"],
91 | "class_num": ags.class_num}
92 |
93 | pprint(config) # print the config data
94 | #prepare model and dataset
95 | model = HashNetRes50(n_bit=config['hash_bit'])
96 | criteria = HashLoss(hash_bit=config['hash_bit'])
97 |
98 | train1,train2,test_loader,database_loader = get_hash_dataloader(config['dataset'],config['train_batch'],
99 | config['train_batch']//2,config['train_batch']//2)
100 | if torch.cuda.is_available():
101 | model = model.cuda()
102 | params_list = [{"params":model.feature_layers.parameters(),'lr':1},
103 | {"params":model.hash_layer.parameters(),'lr':10}]
104 | optimizer = optim.SGD(params_list,lr=config['lr'],momentum=0.9,weight_decay=config['weight_decay'],nesterov=True)
105 | lr_schedualer = optim.lr_scheduler.StepLR(optimizer,step_size=2000,
106 | gamma=0.5,last_epoch=-1)
107 |
108 | viz = Visulizer(host='http://hpc3.yud.io',port=8088,env=config['viz_env'])
109 | viz.log("start the hash learning")
110 | viz.log(config)
111 | len_train = len(train1)
112 | train_loss = 0
113 | for it in range(config['iter_nums']):
114 |
115 | lr_schedualer.step()
116 | if it % len_train==0:
117 | iter1 = iter(train1)
118 | iter2 = iter(train2)
119 | train_part1 = iter1.next()
120 | train_part2 = iter2.next() # same train data two different shuffle
121 |
122 | x1,y1 = train_part1
123 | x2,y2 = train_part2
124 | if torch.cuda.is_available():
125 | x1 = x1.cuda()
126 | x2 = x2.cuda()
127 | y1 = y1.cuda()
128 | y2 = y2.cuda()
129 | inputs = torch.cat((x1,x2),dim=0)
130 | labels = torch.cat((y1,y2),dim=0)
131 | outputs = model(inputs)
132 | loss = criteria(outputs,labels,sigmoid_param=config["loss"]["sigmoid_param"], \
133 | l_threshold=config["loss"]["l_threshold"], \
134 | class_num=config["loss"]["class_num"])
135 | loss.backward()
136 | train_loss += loss.item()
137 | if (it+1)%config['log_interval']==0:
138 | print("Iter: {:05d}, loss: {:.3f}".format(it,train_loss/config['log_interval']))
139 | train_loss =0
140 | optimizer.step()
141 |
142 | if it%config['snapshot_interval'] ==0:
143 | torch.save(nn.Sequential(model),
144 | './checkpoints/resnet_{0}_{1}_{2}.pth.tar'.format(config['dataset'],config['hash_bit'],it))
145 | if it%config['test_interval']==0:
146 | test_model(model,test_loader,database_loader) # to validate the efficiency of hash code
147 | viz.log("finish train model")
148 |
149 |
--------------------------------------------------------------------------------
/models/sample_dml.py:
--------------------------------------------------------------------------------
1 | """
2 | this is a model explemenation for "sampling matters in deep embeding learning" paper
3 | in this paper,we take model resnet-50 as base model to extract 128 dimension discriminative features,and then use distance weighted sampling ,
4 | along with margin-based loss as loss function.
5 | """
6 |
7 | import torch
8 | from torchvision.models import resnet50
9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | from .vgg_classify import BaseModule
12 | from collections import OrderedDict
13 | import numpy as np
14 |
15 | class SampleModel(BaseModule):
16 | """
17 | this is the resnet-50 based model
18 | """
19 | def __init__(self,embeding_dim = 128):
20 | super(SampleModel,self).__init__()
21 | basic_model = resnet50(pretrained=True)
22 |
23 | feature = list(basic_model.named_children())[:-1]
24 | self.base_model = nn.Sequential(OrderedDict(feature))
25 | self.dense = nn.Linear(in_features=2048,out_features=128)
26 |
27 |
28 | def forward(self,x):
29 | """
30 | extract the 128 dimension feature,for x is a batch of image data with every batch_k image is same images
31 | :param x: batch of images in data type of torch Tensor(N,C,W,H) of (70,3,224,224)
32 | :return: feature of images,in data type of torch Tensor(N,D) of [70,128]
33 | """
34 | res_feature = self.base_model(x)
35 | res_feature = res_feature.view(res_feature.size(0),-1)
36 | embeding = self.dense(res_feature)
37 | embeding = F.normalize(embeding,p=2,dim=1)
38 | return embeding
39 |
40 |
41 | class Margin_Loss(nn.Module):
42 | """ the margin losss contain the distane weighted sampling and margin based loss,
43 | sampling and margin loss compute based on paper 'Sampling Matters in Deep Embedding Learning' """
44 | def __init__(self,batch_k=5,margin=0.2,nu=0.0,cutoff=0.5,nonzero_loss_cutoff=1.4):
45 | """
46 | this loss function receive batch of image_feature,then compute the distance weighted sampling loss
47 | :param batch_k: images count for every class
48 | :param margin: margin for alpha in paper
49 | :param nu: regularization parameter for beta
50 | """
51 | super(Margin_Loss,self).__init__()
52 | self.margin = torch.tensor(margin,dtype=torch.float32)
53 | self.nu = torch.tensor(nu,dtype=torch.float32)
54 | self.batch_k = batch_k
55 | self.cutoff = cutoff # to cut for probbality
56 | self.nonzero_loss_cutoff = nonzero_loss_cutoff # to cut the distance upper bound
57 | self.relu1 = torch.nn.ReLU()
58 | self.relu2 = torch.nn.ReLU()
59 |
60 | def convert_param(self,to_cuda=True):
61 | """
62 | convert parameter margin and nu coeff to cuda or to cpu
63 | :param to_cuda:
64 | :return:
65 | """
66 | if to_cuda:
67 | self.margin = self.margin.cuda()
68 | self.nu = self.nu.cuda()
69 | else:
70 | self.margin = self.margin.cpu()
71 | self.nu = self.nu.cpu()
72 |
73 |
74 |
75 | def forward(self,x,y,beta_in):
76 | """
77 |
78 | :param x: x is the feature extracted from resnet,data type torch.tensor,data.shape (n.d) typical (70,128)
79 | :param y: the label for each small class range from 0-200,so as the same dimension of beta_in
80 | :param beta_in: beta_in is a torch variable (tensor) with require_grad = True
81 | :return: the loss of beta_reg_loss and margin loss
82 | """
83 | a_index,p_index,n_index = self.sampling(x) # so the corresponding anchor,postive and negitve has belong the distance weighted distribution
84 | beta_work = beta_in[a_index] # get the coeffient of the beta data
85 | beta_reg_loss = torch.sum(beta_work)*self.nu # loss batckward valid
86 |
87 | # compute margin loss from feature
88 | anchors = x[a_index]
89 | postives = x[p_index]
90 | negtives = x[n_index]
91 | d_ap = torch.sqrt(torch.sum((anchors - postives)*(anchors - postives),1)+1e-8)
92 | d_an = torch.sqrt(torch.sum((anchors - negtives)*(anchors - negtives),1)+1e-8)
93 |
94 | pos_loss = self.relu1(d_ap - beta_work + self.margin)
95 | neg_loss = self.relu2(beta_work - d_an + self.margin)
96 | pair_cnt = torch.sum((pos_loss>0.0) +(neg_loss>0.0))
97 |
98 | # normalize based on the number of pairs
99 | loss = (torch.sum(pos_loss + neg_loss) + beta_reg_loss)/ pair_cnt.float()#pair_cnt.numpy()[0]
100 | return loss
101 |
102 |
103 | def sampling(self,x):
104 | """
105 | sampling images pairs based on distance of each images
106 | :param x: x is the [N,128] tensor of the extracted features
107 | :return: anchors,postives,negtives
108 | """
109 | np_feature = x.cpu().detach().numpy()
110 | k = self.batch_k
111 | n,d = np_feature.shape
112 |
113 | # compute distance
114 | dis_matrix = self.get_distance(np_feature)
115 |
116 | # cut off to avoid hight variance
117 | dis_matrix = np.maximum(dis_matrix,self.cutoff)
118 |
119 | log_weights = ((2.0 - float(d)) * np.log(dis_matrix)
120 | - (float(d-3)/2)*np.log(1.0-0.25*(dis_matrix**2)))
121 | #weights = np.exp(log_weights - log_weights.max(1).reshape(-1,1)) #log_weights-log_weights.max(1).reshape(-1,1), every line subtract the max weight number ,not the total number
122 | weights = np.exp(log_weights - log_weights.max())
123 | mask = np.ones(weights.shape)
124 | for i in range(0,n,k):
125 | mask[i:i+k,i:i+k] = 0 # to set block in indentity line surrounding box is 0
126 |
127 | weights = weights * mask *(dis_matrix=2:
57 | self.classid2imageid[class_id]=group_image_id #one group must have more than 2 images
58 | self.image_nums = self.data.image_id.count()
59 |
60 | def __len__(self):
61 | """the lengh and data loader recycle size"""
62 | if self.train:
63 | return len(self.all_class) # 11318
64 | else:
65 | return self.image_nums
66 |
67 | def __getitem__(self, index):
68 | """get pair size pair data with index
69 | when using dataloader ,the batchsize is always 1
70 | for train model:
71 | the index is class_id ,so this will select a batch of different class type to construct a n-pair
72 | for test model:
73 | the index is image_id,so this will get one picture with it's image_id and class_id,the extracted feature will send to cluster
74 | """
75 | if self.train:
76 | class_id = self.all_class[index]
77 | super_id = self.data[self.data.class_id==1].super_class_id[0]
78 | anchor_class=[]
79 | anchor_class.append(class_id)
80 | innder_count = int(0.9* self.batch_size//2) # image pair of different class in same super class
81 |
82 | inner_class = np.random.choice(self.super2class[super_id], innder_count, False) # in same super class choose most
83 | anchor_class.extend(inner_class)
84 | anchor_class = list(set(anchor_class))# duplicate repeate
85 |
86 | outer_count = self.batch_size//2 - len(anchor_class)
87 | outer_class = np.random.choice(self.super_ids,outer_count,True)
88 | for outer_id in outer_class:
89 | anchor_class.extend(np.random.choice(self.super2class[outer_id],1))
90 |
91 | #from each anchor_class,select the anchor image and the postive image
92 | image_id =[]
93 | for anchor_id in anchor_class:
94 | select = np.random.choice(self.classid2imageid[anchor_id],2,False)
95 | image_id.extend(select)
96 |
97 |
98 | anchor_path = self.data[self.data.image_id.isin(image_id)][['image_id', 'path']]
99 | anchor_path.sort_index(0) # sort by the first colum index id
100 | # to stack image in a to construct to one bulk. first construct 32 image to a numpy ndarray,
101 | tensor_list=[]
102 | tensor_p=[]
103 | jump = False
104 | for i,image_path in enumerate(anchor_path.path):
105 | image = Image.open(os.path.join(self.root,image_path)).convert('RGB')
106 | if self.transform:
107 | data = self.transform(image)
108 | if i%2==0:
109 | if data.size(0)<3: # the anchor image channel not 3
110 | jump = True # jump the next image
111 | continue
112 | jump = False
113 | tensor_list.append(data)
114 | else:
115 | if jump:
116 | continue
117 | if data.size(0)<3: # the pair iamge channel not 3
118 | tensor_list.pop(-1) # delete the last one in tensor_list
119 | continue
120 | tensor_p.append(data)
121 |
122 |
123 | tensor_list.extend(tensor_p)
124 | #print("tensor dataset",len(tensor_list))
125 | batch_tensor = torch.stack(tensor_list,dim=0)
126 | return batch_tensor
127 | else:
128 | item = self.data.loc[index]
129 | image_path = item['path']
130 | image_id = int(item['image_id'])
131 | image_class= int(item['class_id'])
132 | default_path = self.data.loc[0]['path']
133 | image = Image.open(os.path.join(self.root, image_path)).convert('RGB')
134 | if self.transform:
135 | data = self.transform(image)
136 | if data.size(0)<3:
137 | image = Image.open(os.path.join(self.root, default_path))
138 | data = self.transform(image)
139 | image_id =0
140 | image_class =0
141 | return data,image_id,image_class
142 |
143 |
144 |
145 | if __name__=='__main__':
146 | """ to test the dataset"""
147 | import ipdb
148 | ipdb.set_trace()
149 | root = '/data/jh/notebooks/hudengjun/DML/deep_metric_learning/lib/online_products/Stanford_Online_Products/'
150 | dataset = EbayDataset(dir_root=root)
151 | data = dataset[0]
152 | print(type(data))
153 |
154 | test_dataset = EbayDataset(dir_root=root,train=False)
155 | data = test_dataset[0]
156 | print(data)
157 |
158 |
159 |
160 |
161 |
162 |
--------------------------------------------------------------------------------
/train_mc_npair.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.optim as optim
3 | from data import EbayDataset
4 | import os
5 | from configs import opt
6 | from models import ModGoogLeNet,NpairLoss
7 | from torch.utils.data import DataLoader
8 | from tqdm import tqdm
9 | from utils import Visulizer
10 | import csv
11 | import numpy as np
12 |
13 | def train(**kwargs):
14 | print("run train")
15 | opt.parse(kwargs)
16 | os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.gpu_id)
17 |
18 | if opt.debug:
19 | import ipdb
20 | ipdb.set_trace()
21 | model =ModGoogLeNet(embeding_size=opt.embeding_size)
22 | if opt.dml_model_path:
23 | model.load(opt.dml_model_path)
24 | if opt.use_gpu:
25 | model = model.cuda()
26 | #model.freeze_model(level=opt.freeze_level)
27 |
28 | if opt.use_viz:
29 | viz = Visulizer(host=opt.vis_host,port=opt.vis_port,env='dml'+opt.vis_env)
30 | viz.log("start to train dml npair mc model")
31 |
32 | #loss function
33 | criterion = NpairLoss(l2_reg=opt.l2_reg)
34 | lr = opt.lr
35 | m = opt.momentum
36 | optimizer = optim.SGD([{'params':model.level1_2.parameters()},
37 | {'params': model.level_3_4.parameters()},
38 | {'params': model.level_5_6.parameters()},
39 | {'params': model.level_7.parameters()},
40 | {'params':model.fc.parameters(),'lr':10*lr}],lr=lr,momentum=m)
41 | #optimizer = optim.SGD(model.parameters(),lr=lr,momentum=m)
42 | lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,1,0.2)
43 |
44 | # data and dataloader
45 | train_data = EbayDataset(dir_root=opt.ebay_dir, train=True, batch_size=opt.batch_size)
46 | cycle_length = len(train_data)
47 | #val_data = EbayDataset(dir_root=opt.ebay_dir, train=False, batch_size=opt.batch_size)
48 | train_dataloader = DataLoader(train_data, batch_size=1, shuffle=True, num_workers=opt.num_workers)
49 | #val_dataloader = DataLoader(val_data, batch_size=60, shuffle=False, num_workers=opt.num_workers)
50 |
51 | print("dataloader setted ,begin to train")
52 |
53 | #f = open('dml_log.out','w')
54 | for epoch in range(opt.max_epoch):
55 | lr_scheduler.step()
56 | train_loss = 0
57 |
58 | for i,data in enumerate(train_dataloader):
59 | # if i in [200, 800, 1500]:
60 | # lr_scheduler.step()
61 |
62 | data = data[0]
63 | if opt.use_gpu:
64 | data = data.cuda()
65 | optimizer.zero_grad()
66 | feature = model(data)
67 | batch_size = data.size(0)
68 | target = torch.arange(0, int(batch_size / 2), dtype=torch.int64).cuda()
69 | loss = criterion(feature,target)
70 | loss.backward()
71 | optimizer.step()
72 |
73 | train_loss += loss.item()
74 | freq = int(opt.print_freq)
75 | if i%freq==(freq-1):
76 | average_loss = train_loss /opt.print_freq
77 | #f.write("iteration:{0},dml_loss:{1}\n".format(i+ epoch*cycle_length,average_loss))
78 | #f.flush()
79 | if opt.use_viz:
80 | viz.plot('dml_loss',average_loss)
81 | train_loss =0
82 | if opt.debug:
83 | break
84 | #f.write("epoch:{0} finished,begin to valid test".format(epoch))
85 | model.save()
86 | # if epoch>1 and epoch%5==0:
87 | # val(model,val_dataloader,epoch)
88 | if opt.debug:
89 | #f.write("finish one iter")
90 | break
91 | #f.write("finish train epoch {0}".format(opt.max_epoch))
92 | #f.close()
93 |
94 |
95 | def val(model,dataloder,epoch):
96 | """
97 | this val model will calculate the nmi index.normal mutual information
98 | :param model: the emebding model
99 | :param dataloder: val dataloder
100 | :return:
101 | """
102 | # prepare file model to extract feature
103 | file_name = 'checkpoints/online_product_{0}.csv'.format(epoch)
104 | f = open(file_name,'w')
105 | writer = csv.writer(f,dialect='excel')
106 | model.eval()
107 | # feature extreat,fisrt for all image,image_id,class_id extract the feature vector
108 | for i,(data,image_id,class_id) in enumerate(dataloder):
109 | if opt.use_gpu:
110 | data = data.cuda()
111 | feature = model(data) # the feature is [batch,512] vector
112 | vector = feature.cpu().detach().numpy() if opt.use_gpu else feature.numpy()
113 | image_id = image_id.numpy().reshape(-1,1)
114 | class_id = class_id.numpy().reshape(-1,1)
115 | result = np.hstack(image_id,class_id,vector)
116 |
117 | #write the data to dataframe file
118 | writer.writerows(result)
119 | if opt.debug:
120 | print("test one batch of val data and save to csv file")
121 | break
122 | f.close()
123 | # clustering to centriod, second, for all image_id,and corresponding feature vector,using kmeans cluster to fixed |class_id|
124 | #featuredata = pd.read_csv(file_name,header=None)
125 |
126 | # for distribution from origin and cluster distribution.compute the nmi by sklearn metric nmi
127 |
128 |
129 | model.train()
130 | print("finished cluster and evalution")
131 |
132 |
133 | def compute(**kwargs):
134 | print("run compute_vector")
135 | opt.parse(kwargs)
136 | os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.gpu_id)
137 |
138 | if opt.debug:
139 | import ipdb
140 | ipdb.set_trace()
141 | model =ModGoogLeNet(embeding_size=opt.embeding_size)
142 | if opt.dml_model_path:
143 | model.load(opt.dml_model_path)
144 | if opt.use_gpu:
145 | model = model.cuda()
146 |
147 | val_data = EbayDataset(dir_root=opt.ebay_dir, train=False, batch_size=opt.batch_size)
148 | val_dataloader = DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers)
149 |
150 | file_name = 'checkpoints/online_product_compute.csv'
151 | f = open(file_name, 'w')
152 | writer = csv.writer(f, dialect='excel')
153 | model.eval()
154 | # feature extreat,fisrt for all image,image_id,class_id extract the feature vector
155 | for i, (data, image_id, class_id) in enumerate(val_dataloader):
156 | if opt.use_gpu:
157 | data = data.cuda()
158 | feature = model(data) # the feature is [batch,512] vector
159 | vector = feature.cpu().detach().numpy() if opt.use_gpu else feature.numpy()
160 | image_id = image_id.numpy().reshape(-1, 1)
161 | class_id = class_id.numpy().reshape(-1, 1)
162 | result = np.hstack([image_id, class_id, vector])
163 |
164 | # write the data to dataframe file
165 | writer.writerows(result)
166 | if opt.debug:
167 | print("test one batch of val data and save to csv file")
168 | break
169 | f.close()
170 |
171 |
172 | def help():
173 | """print function use information"""
174 | print("""this file help to train product train:
175 | exanple --:
176 | python train_mc_npair.py help
177 | python train_mc_npair.py train --gpu_id=3 --debug=True
178 | python train_mc_npair.py train --gpu_id=2 --batch_size=72
179 | python train_mc_npair.py train --gpu_id=3 --lr=0.0003 --batch_size=72
180 | python train_mc_npair.py train --gpu_id=0 --debug=True --dml_model_path=checkpoints/DMLGoogle_0710_20\:24\:04.pth
181 | python train_mc_npair.py train --batch_size=120 --gpu_id=3 --lr=0.0001 --debug=True --dml_model_path=checkpoints/DMLGoogle_0710_20\:24\:04.pth
182 | python train_mc_npair.py compute --batch_size=300 --gpu_id=2 --dml_model_path=checkpoints/DMLGoogle_0714_07:51:44.pth --num_workers=6
183 | """)
184 |
185 | if __name__=='__main__':
186 | import fire
187 | fire.Fire()
--------------------------------------------------------------------------------
/data/mxdata/online_products.py:
--------------------------------------------------------------------------------
1 | from mxnet.image import *
2 | from mxnet.gluon.data import Dataset,DataLoader
3 | from mxnet.image import *
4 | import numpy as np
5 | import mxnet as mx
6 | from mxnet.gluon import nn
7 | import mxnet.gluon.data.vision.transforms as T
8 | import pandas as pd
9 |
10 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
11 | default_transform = T.Compose([
12 | T.Resize(256),
13 | T.RandomResizedCrop(size=224,scale=(1.0,1.0),ratio=(1.0,1.0)),# just crop,not scale
14 | T.RandomFlipLeftRight(),
15 | T.ToTensor(), # last to swap channel to c,w,h
16 | normalize
17 | ])
18 |
19 | test_transform = T.Compose([
20 | T.Resize(256),
21 | T.CenterCrop(224),
22 | T.ToTensor(),
23 | normalize
24 | ])
25 |
26 | class MxEbayInClass(Dataset):
27 | """this is an mxnet edition of Ebay dataset"""
28 | def __init__(self,dir_root,batch_k=4,batch_size=40,is_train=True,transform =default_transform):
29 | self.batch_size=batch_size
30 | self.batch_k = batch_k
31 | self.root = dir_root
32 | self._trans = transform
33 | self.is_train = is_train
34 |
35 | self.test_image_files =[]
36 | self.test_labels =[]
37 | self.train_length = 0
38 |
39 | if self.is_train:
40 | table_name = os.path.join(self.root,'Ebay_train.txt')
41 | table_data = pd.read_table(table_name, header=0, delim_whitespace=True)
42 | min_super_id, max_super_id = min(table_data.super_class_id), max(table_data.super_class_id)
43 | self.super_ids = np.arange(min_super_id, max_super_id + 1)
44 | self.super2class = {}
45 | for super_id in self.super_ids:
46 | self.super2class[super_id] = table_data[table_data.super_class_id == super_id].class_id.tolist()
47 |
48 | min_class_id,max_class_id = min(table_data.class_id),max(table_data.class_id)
49 | self.class_ids = list(np.arange(min_class_id,max_class_id+1))
50 | self.train_length = max_class_id+1-min_class_id
51 | self.super_id_dist = [len(v) for k,v in self.super2class.items()]
52 | total = sum(self.super_id_dist)
53 | self.super_id_dist = [v*1.0/total for v in self.super_id_dist]
54 | self.class2imagefiless = [[]] # placeholder for class_id = 0
55 | for class_id in self.class_ids:
56 | one_class_paths = table_data[table_data.class_id==class_id].path.tolist() # type list
57 | self.class2imagefiless.append(one_class_paths)
58 | else:
59 | table_name = os.path.join(self.root,'Ebay_test.txt')
60 | table_data = pd.read_table(table_name,header=0,delim_whitespace=True)
61 |
62 | self.test_image_files = table_data.path.tolist()
63 | self.test_labels = table_data.class_id.tolist()
64 |
65 |
66 |
67 | def __len__(self):
68 | if self.is_train:
69 | return 800
70 | else:
71 | return 4000
72 |
73 | def sample_train_batch(self):
74 | batch =[]
75 | labels =[]
76 | num_groups = self.batch_size // self.batch_k # for every sample count k
77 | super_id = np.random.choice(list(self.super2class.keys()), size=1,p=self.super_id_dist)[0] # the super class id
78 | sampled_class = np.random.choice(self.super2class[super_id], num_groups*2, replace=False)
79 | for i in sampled_class:
80 | try:
81 | img_fnames = np.random.choice(self.class2imagefiless[i],
82 | self.batch_k,
83 | replace=False)
84 | except Exception as e: # just has not enough data to choose
85 | continue
86 | batch += img_fnames.tolist()
87 | labels += [i]*self.batch_k
88 | if len(batch)>=self.batch_size:
89 | break
90 | return batch,labels
91 |
92 |
93 | def __getitem__(self, index):
94 | """get data batch like pytorch,
95 | only smaple same super class_id,not cross sample"""
96 | if self.is_train:
97 | imagelist =[]
98 | batch,labels = self.sample_train_batch()
99 | for file in batch:
100 | file_path = os.path.join(self.root,file)
101 | img = image.imread(file_path,to_rgb=1,flag=1)
102 | img = self._trans(img)
103 | imagelist.append(img)
104 | return nd.stack(*imagelist,axis=0),nd.array(labels)
105 | else:
106 | file = self.test_image_files[index]
107 | label = self.test_labels[index]
108 | img = image.imread(os.path.join(self.root,file),flag=1,to_rgb=1)
109 | img = self._trans(img)
110 | return img,label
111 |
112 |
113 |
114 | def getEbayInClassData(root,batch_k,batch_size):
115 | train_dataset = MxEbayInClass(root,batch_k=batch_k,batch_size=batch_size,is_train=True,transform=default_transform)
116 | test_dataset = MxEbayInClass(root,batch_k=batch_k,batch_size=batch_size,is_train=False,transform=test_transform)
117 | train_loader = DataLoader(train_dataset,batch_size=1,shuffle=False,num_workers=6)
118 | test_loader = DataLoader(test_dataset,batch_size=test_dataset.batch_size,shuffle=False,num_workers=6)
119 | return train_loader,test_loader
120 |
121 |
122 | class MxEbayCrossClass(MxEbayInClass):
123 | """the cross class edition of StanfordOnlineProducts"""
124 | def __init__(self,dir_root,batch_k=4,batch_size=40,is_train=True,transform =default_transform):
125 | super(MxEbayCrossClass,self).__init__(dir_root=dir_root,batch_k=batch_k,batch_size=batch_size,is_train=is_train,transform=transform)
126 | self.datatype="CrossClass"
127 |
128 | def sample_train_batch(self):
129 | """rewrite the sample strategy"""
130 | batch = []
131 | labels = []
132 | num_groups = self.batch_size // self.batch_k # for every sample count k
133 |
134 | #directly choose the class_id
135 | sampled_class = np.random.choice(self.class_ids, num_groups * 2, replace=False)
136 | for i in sampled_class:
137 | try:
138 | img_fnames = np.random.choice(self.class2imagefiless[i],
139 | self.batch_k,
140 | replace=False)
141 | except:
142 | print("class id:{0},instance count small than {1}".format(i, self.batch_k))
143 | continue
144 | batch += img_fnames.tolist()
145 | labels += [i] * self.batch_k
146 | if len(batch) >= self.batch_size:
147 | break
148 | return batch, labels
149 |
150 | def getEbayCrossClassData(root,batch_k,batch_size):
151 | train_dataset = MxEbayCrossClass(root, batch_k=batch_k, batch_size=batch_size, is_train=True, transform=default_transform)
152 | test_dataset = MxEbayCrossClass(root, batch_k=batch_k, batch_size=batch_size, is_train=False, transform=test_transform)
153 | train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=6)
154 | test_loader = DataLoader(test_dataset, batch_size=test_dataset.batch_size, shuffle=False, num_workers=6)
155 | return train_loader, test_loader
156 |
157 | if __name__=='__main__':
158 | # construct the dataset and get data in train and test mode
159 |
160 | train_data = MxEbayInClass(dir_root='data/Stanford_Online_Products',\
161 | batch_k=4,batch_size=40,is_train=True,\
162 | transform=default_transform)
163 |
164 | data = train_data[0]
165 |
166 | train_crossdata = MxEbayCrossClass(dir_root='data/Stanford_Online_Products',\
167 | batch_k=4,batch_size=40,is_train=True,\
168 | transform=default_transform)
169 | data2 = train_crossdata[0]
170 | import ipdb
171 | ipdb.set_trace()
172 | test_data = MxEbayInClass(dir_root='data/Stanford_Online_Products',\
173 | batch_k=4,batch_size=40,is_train=False,\
174 | transform=test_transform)
175 | data = test_data[0]
176 |
177 |
178 |
179 |
180 |
181 |
182 |
--------------------------------------------------------------------------------
/train_margin_cub.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import time
3 | import logging
4 | import numpy as np
5 | import torch
6 |
7 | from torch.utils.data import DataLoader
8 | import os
9 | from data import CUB200DataSet
10 | from models import Margin_Loss,SampleModel
11 |
12 |
13 | parser = argparse.ArgumentParser(description="train a margin based loss model")
14 | parser.add_argument('--data_path',type=str,default="data/cub200_2011",
15 | help='path of the cub_data')
16 | parser.add_argument('--embed_dim',type=int,default=128,
17 | help='dimensionality of image embeding,times of 8')
18 | parser.add_argument('--batch_size',type=int,default=70,
19 | help='training batch size per device')
20 | parser.add_argument('--batch_k',type=int,default=5,
21 | help='number of images per class in a batch,can be divided by batch_size')
22 | parser.add_argument('--gpu_id',type=str,default='0',
23 | help='the gpu_id of the runing batch')
24 | parser.add_argument('--epochs',type=int,default=100,
25 | help='number of training epochs,default is 100')
26 | parser.add_argument('--optimizer',type=str,default='adam',
27 | help='optimizer,default is adam')
28 | parser.add_argument('--lr',type=float,default=0.0001,
29 | help='learning rate of the resnet and dense layer')
30 | parser.add_argument('--lr_beta',type=float,default=0.1,
31 | help='learning rate for the beta in margin based loss')
32 | parser.add_argument('--margin',type=float,default=0.2,
33 | help='margin for the margin based loss,default is 0.2')
34 | parser.add_argument('--beta',type=float,default=1.2,
35 | help='the class specific beta parameter')
36 | parser.add_argument('--nu',type=float,default=0.0,
37 | help='regularization parameter for beta,default is 0')
38 | parser.add_argument('--steps',type=str,default='30,50,100,300',
39 | help='epochs to updata learning rate')
40 | parser.add_argument('--wd',type=float,default=0.0001,
41 | help='weight decay rate,default is 0.0001')
42 | parser.add_argument('--seed',type=int,default=123,
43 | help='random seed to use,default=123')
44 | parser.add_argument('--factor',type=float,default=0.5,
45 | help='learning rate schedule factor,default is 0.5')
46 | parser.add_argument('--print_freq',type=int,default=20,
47 | help='print the accumulate loss for training process')
48 | parser.add_argument('--debug',action='store_true',default=False)
49 |
50 |
51 | opt = parser.parse_args()
52 | logging.info(opt)
53 | torch.random.manual_seed(opt.seed)
54 | np.random.seed(opt.seed)
55 | batch_size = opt.batch_size
56 | os.environ['CUDA_VISIBLE_DEVICES']=opt.gpu_id
57 | steps = [int(step) for step in opt.steps.split(',')]
58 |
59 |
60 | def train():
61 | """
62 | train the margin based loss model
63 | :return:
64 | """
65 | # prepare for data for loader
66 | train_data = CUB200DataSet(data_path='data/cub200_2011/CUB_200_2011',batch_k=opt.batch_k,batch_size = opt.batch_size,is_train=True)
67 | test_data = CUB200DataSet(data_path='data/cub200_2011/CUB_200_2011',is_train=False)
68 |
69 | train_loader = DataLoader(train_data,batch_size=1,shuffle=False,num_workers=6)
70 | test_loader = DataLoader(test_data,batch_size=60,shuffle=False,num_workers=6)
71 |
72 | #begin to set model loss,optimizer,lr_rate, lr_schedule
73 | model = SampleModel(embeding_dim=opt.embed_dim)
74 | beta = torch.tensor(np.ones(100)*opt.beta, requires_grad=True,dtype=torch.float32)
75 |
76 | loss_criterion = Margin_Loss(batch_k=opt.batch_k,\
77 | margin=opt.margin,nu=opt.nu) # set loss function for this model
78 |
79 | conv_params = []
80 | non_conv_param =[]
81 | for name,param in model.base_model.named_parameters():
82 | if 'conv' in name:
83 | conv_params.append({'params':param,'lr':opt.lr*0.01})
84 | else:
85 | non_conv_param.append({'params':param,'lr':opt.lr})
86 | total_param =[]
87 | total_param.append({'params':model.dense.parameters(),'lr':opt.lr})
88 | total_param.extend(conv_params)
89 | total_param.extend(non_conv_param)
90 | #optimizer = torch.optim.Adam(total_param,lr=opt.lr,weight_decay=opt.wd)
91 | optimizer = torch.optim.SGD(total_param,lr=opt.lr,momentum=0.89)
92 | optimizer_beta = torch.optim.SGD([{'params':beta}],lr=opt.lr_beta,momentum= 0.9)
93 |
94 |
95 | lr_schedule = torch.optim.lr_scheduler.MultiStepLR(optimizer,
96 | milestones=steps,gamma=opt.factor)
97 |
98 | if int(opt.gpu_id)>=0:
99 | model = model.cuda() # the loss function has paramter to convey to cuda
100 | beta = beta.cuda() # the beta parameter has parameter to stored in cuda
101 | loss_criterion = loss_criterion.cuda() # the loss criterion has compute in cuda
102 | loss_criterion.convert_param(to_cuda=True)
103 |
104 | # begin to fetch data and train model
105 | for epoch in range(opt.epochs):
106 | print("begin to train epochs:{0}",epoch)
107 | cumulative_loss =0
108 | prev_loss = 0
109 | lr_schedule.step()
110 | for i,data in enumerate(train_loader):
111 | images,label = data[0][0],data[1][0]
112 | if int(opt.gpu_id)>=0:
113 | images = images.cuda()
114 | label = label.cuda()
115 | features = model(images)
116 | loss = loss_criterion(features,label,beta)
117 | loss.backward()
118 | optimizer.step()
119 | optimizer_beta.step()
120 | cumulative_loss += loss.item()
121 | if (i+1)%(opt.print_freq)==0:
122 | print("[Epoch %d,Iter %d] training loss=%f"%(epoch,i+1,cumulative_loss-prev_loss))
123 | prev_loss = cumulative_loss
124 | if opt.debug:
125 | break
126 |
127 | print("[Epoch %d] trainin loss =%f"%(epoch,cumulative_loss))
128 | # print test val recall index
129 | names,val_accs = val_model(model,test_loader)
130 | for name,val_acc in zip(names,val_accs):
131 | print("Epoch %d,validation:%s=%f"%(epoch,name,val_acc))
132 | print("job finished")
133 |
134 |
135 | def val_model(model,test_loader):
136 | """
137 | val the model,return the recall@K k=1 index
138 | :param model: Margin based model to extract feature of 128 dimension
139 | :param test_loader: Test dataloader to load images data
140 | :return: the recall@K k=1 index
141 | """
142 | model.eval()
143 | outputs = []
144 | labels =[]
145 | with torch.no_grad():
146 | for data,label in test_loader:
147 | if int(opt.gpu_id)>=0:
148 | data = data.cuda()
149 | feature = model(data)
150 | outputs += feature.detach().cpu().numpy().tolist()
151 | labels += label.numpy().tolist()
152 | model.train()
153 |
154 | #eval recall@k
155 | features = np.array(outputs)
156 | labels = np.array(labels)
157 |
158 | return evaluate_emb(features,labels)
159 |
160 | def evaluate_emb(features,labels):
161 | """
162 | evaluate embedding in recall
163 | :param features:
164 | :param labels:
165 | :return:
166 | """
167 | d_mat = get_distance_matrix(features)
168 | names =[]
169 | accs =[]
170 | for k in [1,2,4,8,16]:
171 | names.append('Recall@%d'%k)
172 | correct,cnt = 0.0,0.0
173 | for i in range(features.shape[0]):
174 | d_mat[i,i]=1e10
175 | nns = d_mat[i].argpartition(k)[:k]
176 | if any(labels[i] ==labels[nn] for nn in nns):
177 | correct +=1
178 | cnt +=1
179 | accs.append(correct/cnt)
180 | return names,accs # names is a list of ["Recall@K",,,,] accs is a list of [float_value]
181 |
182 |
183 |
184 | def get_distance_matrix(x):
185 | """
186 | compute the distance matirx of features,
187 | :param x: np.ndarray in shape (n,d) d is 128
188 | :return: distance matrix of [n,n] for distance in each vector
189 | """
190 | squrare = np.sum(x*x,axis=1,keepdims=True)
191 | distance_squrare = squrare + squrare.transpose() -2*np.dot(x,x.transpose())
192 | return distance_squrare
193 |
194 |
195 |
196 |
197 | if __name__=='__main__':
198 | print("begin to train the model of margin based loss")
199 | train()
200 |
--------------------------------------------------------------------------------
/server/copy_nn.py:
--------------------------------------------------------------------------------
1 | # import pymongo
2 | # import mxnet
3 | # from mxnet import nd
4 | #
5 | # #every time yield 20 items and read iobytes extract feature then insert to new nnindex
6 | #
7 | # import asyncio
8 | # import aiohttp
9 | # from io import BytesIO
10 | # import time
11 | # import requests
12 | #
13 | #
14 | # @asyncio.coroutine
15 | # def get_image(img_url):
16 | # resp = yield from requests.get(img_url)
17 | # return resp.content
18 | #
19 | # def save_image(img,fobj):
20 | # fobj.write(img)
21 | #
22 | # @asyncio.coroutine
23 | # def download_one(img_url,fobj):
24 | # image = yield from get_image(img_url)
25 | # save_image(image,fobj)
26 |
27 | # !/usr/bin/env python
28 | # import asyncio
29 | # import aiohttp
30 | #
31 | # async def fetch_img(session, url):
32 | # with aiohttp.Timeout(10):
33 | # async with session.get(url) as response:
34 | # assert response.status == 200
35 | # return await response.read()
36 | #
37 | # loop = asyncio.get_event_loop()
38 | # with aiohttp.ClientSession(loop=loop) as session:
39 | # img = loop.run_until_complete(
40 | # fetch_img(session, 'https://cdn.aidigger.com/images/instagram/f95f00da22a2e143e6e457b10544a120.jpeg'))
41 | # with open("img.png", "wb") as f:
42 | # f.write(img)
43 |
44 | # if __name__ == '__main__':
45 | # url_list = ['https://cdn.aidigger.com/images/instagram/e2452f9daaad3ef7070adb22ee70958a.jpeg',
46 | # 'https://cdn.aidigger.com/images/instagram/bd717eaa4c351b842a497e8907b69855.jpeg',
47 | # 'https://cdn.aidigger.com/images/instagram/189a2af5d9661500b32271ca9b1865be.jpeg',
48 | # 'https://cdn.aidigger.com/images/instagram/6e70c94dd3fac214c5d7e6c061df2b2f.jpeg',
49 | # 'https://cdn.aidigger.com/images/instagram/f95f00da22a2e143e6e457b10544a120.jpeg']
50 | # fobj_list =[BytesIO() for _ in range(len(url_list))]
51 | # start = time.time()
52 | # loop = asyncio.get_event_loop()
53 | # to_do_tasks = [download_one(url,f) for url,f in zip(url_list,fobj_list)]
54 | # res,= loop.run_until_complete(asyncio.wait(to_do_tasks))
55 | # print(len(res))
56 | # print(time.time()-start)
57 |
58 |
59 | import asyncio
60 | import logging
61 | from contextlib import closing
62 | import aiohttp # $ pip install aiohttp
63 | from io import BytesIO
64 | from PIL import Image
65 | import numpy as np
66 | from pymongo import MongoClient
67 | from mxnet import nd
68 | import mxnet as mx
69 | import mxnet.gluon.data.vision.transforms as T
70 | import mxnet.gluon.model_zoo.vision as vision_model
71 | from models import MarginNet
72 | import mxnet
73 | from mxnet.image import imread
74 |
75 | logging.basicConfig(level=logging.WARNING, format='%(asctime)s %(message)s')
76 | import requests
77 | import json
78 | import binascii
79 | import numpy as np
80 | from pymongo import MongoClient
81 | from requests import ReadTimeout
82 | from pprint import pprint
83 |
84 |
85 |
86 |
87 | #image transform
88 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
89 | test_transform = T.Compose([
90 | T.Resize(256),
91 | T.CenterCrop(224),
92 | T.ToTensor(),
93 | normalize
94 | ])
95 |
96 | # define mongodb connect
97 | def get_db():
98 | mongdb={}
99 | mongdb['host']='cc.com'
100 | mongdb['port']=3717
101 | client=MongoClient(host=mongdb['host'],port=mongdb['port'])
102 | dev=client.get_database('dev')
103 | dev.authenticate(name='cc',password='cc')
104 | return dev
105 |
106 |
107 | @asyncio.coroutine
108 | def download(url, session, semaphore, chunk_size=1<<15):
109 | with (yield from semaphore): # limit number of concurrent downloads
110 | file = BytesIO()
111 | logging.info('downloading %s', file)
112 | response = yield from session.get(url)
113 | with closing(response):
114 | while True: # save file
115 | chunk = yield from response.content.read(chunk_size)
116 | if not chunk:
117 | break
118 | file.write(chunk)
119 | logging.info('done %s', file)
120 | return file, (response.status, tuple(response.headers.items()))
121 |
122 | def get_net(gpu_id):
123 | param_path = 'checkpoints/Fashion_In.params'
124 | base_net = vision_model.get_model('resnet50_v2')
125 | net = MarginNet(base_net.features, 128, batch_k=5)
126 | context = [mxnet.gpu(gpu_id)]
127 | net.initialize()
128 | net.collect_params().reset_ctx(context)
129 | net.load_parameters(filename=param_path, ctx=context[0])
130 | return net,context
131 |
132 | def get_cursor(db,collection_name,batch_size):
133 | #define source nn_prod data fetch
134 | nn_prod = db.get_collection(collection_name)
135 | cursor = nn_prod.find({},{'vector':0,},batch_size=batch_size)
136 | return cursor
137 |
138 | def get_target_colection(db):
139 | colletion_name = 'image_metric_taobao128'
140 | target_collection = db.get_collection(colletion_name)
141 | return target_collection
142 |
143 |
144 | def convert_vector_to_ascii(vector):
145 | """convert a numpy array or a list to bytes, and to make it can be dumped by json, we convert the bytes to string
146 | """
147 | if isinstance(vector, (list, np.ndarray, np.generic)):
148 | vector = np.asarray(vector, dtype=np.float32)
149 | else:
150 | raise ValueError("vector must be list or numpy array")
151 | # add decode to convert base64 bytes to string
152 | return binascii.b2a_base64(vector.tobytes()).decode()
153 |
154 | def get_nn_config(model_name ='image_metric_taobao128'):
155 |
156 | host = 'https://alpha-nnsearch.aidigger.com/api/v1/'
157 | path = 'model/'+model_name+'/'
158 | return host,path
159 |
160 | # begin to set basic paramter
161 | batch_size=20
162 | urls= []
163 | records = []
164 | db = get_db()
165 | cursor = get_cursor(db,'image_nn_prod',batch_size)
166 | net,context = get_net(0)
167 | host,path = get_nn_config('image_metric_taobao128')
168 | # set basic parameter finished
169 |
170 | targe_collection = get_target_colection(db)
171 |
172 | loop = asyncio.get_event_loop()
173 | session = aiohttp.ClientSession()
174 | semaphore = asyncio.Semaphore(20)
175 |
176 | for item in cursor:
177 | if len(urls)==batch_size:
178 | #process
179 | #with closing(asyncio.get_event_loop()) as loop, closing(aiohttp.ClientSession()) as session:
180 | try:
181 | download_tasks = (download(url, session, semaphore) for url in urls)
182 | result = loop.run_until_complete(asyncio.gather(*download_tasks))
183 | except Exception as e:
184 | print(e)
185 | urls = []
186 | records = []
187 | continue
188 |
189 | nd_img_list = []
190 | succeed_ids = []
191 | docs = []
192 | for i,(f_ret,rec) in enumerate(zip(result,records)):
193 | try:
194 | pil_img = Image.open(f_ret[0])
195 | nd_img_list.append(test_transform(nd.array(np.asarray(pil_img))))
196 | new_rec = {}
197 | new_rec['_id'] = rec['_id']
198 | new_rec['_int_id'] = rec['int_id']
199 | new_rec.update(rec['_source'])
200 | docs.append(new_rec)
201 | except Exception as e:
202 | print(urls[i])
203 | print(e)
204 |
205 |
206 | #nd_img_list = [test_transform(nd.array(np.asarray(Image.open(f_ret[0])))) for f_ret in result ]
207 | if len(nd_img_list)!=len(records) or len(nd_img_list)< 2:
208 | if len(nd_img_list)<2:
209 | print(urls[0])
210 | print("caution,failed to download all pictures")
211 | print(result[0][1][0],result[0][1][1])
212 |
213 | records.clear()
214 | urls.clear()
215 | docs.clear()
216 | for f_ret in result:
217 | try:
218 | if not f_ret[0].closed:
219 | f_ret[0].close()
220 | except Exception as e:
221 | print(e)
222 | continue
223 |
224 | nd_tensor_img = nd.stack(*nd_img_list,axis=0)
225 | nd_tensor_img = nd_tensor_img.as_in_context(context[0])
226 | data = net.extract(nd_tensor_img)
227 | data = data.asnumpy()
228 |
229 |
230 |
231 | doc_types =['image']*len(records)
232 | vectors = [convert_vector_to_ascii(v) for v in data ]
233 |
234 | ret = requests.post(host + path + "add/batch", json={"docs": docs, "doc_types": doc_types, "vectors": vectors})
235 | print(ret.json())
236 |
237 | #for annother loop
238 | doc_types=[]
239 | vectors =[]
240 | doc_types=[]
241 | records = []
242 | urls=[]
243 | for f_ret in result:
244 | try:
245 | if not f_ret[0].closed:
246 | f_ret[0].close()
247 | except Exception as e:
248 | print(e)
249 | else:
250 | records.append(item)
251 | urls.append(item['_source']['cdn_url'])
252 |
253 |
254 |
255 |
--------------------------------------------------------------------------------
/models/mx_margin_model.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 |
19 | from mxnet import gluon
20 | from mxnet.gluon import nn, Block, HybridBlock
21 | import numpy as np
22 |
23 | class L2Normalization(HybridBlock):
24 | r"""Applies L2 Normalization to input.
25 |
26 | Parameters
27 | ----------
28 | mode : str
29 | Mode of normalization.
30 | See :func:`~mxnet.ndarray.L2Normalization` for available choices.
31 |
32 | Inputs:
33 | - **data**: input tensor with arbitrary shape.
34 |
35 | Outputs:
36 | - **out**: output tensor with the same shape as `data`.
37 | """
38 | def __init__(self, mode, **kwargs):
39 | self._mode = mode
40 | super(L2Normalization, self).__init__(**kwargs)
41 |
42 | def hybrid_forward(self, F, x):
43 | return F.L2Normalization(x, mode=self._mode, name='l2_norm')
44 |
45 | def __repr__(self):
46 | s = '{name}({_mode})'
47 | return s.format(name=self.__class__.__name__,
48 | **self.__dict__)
49 |
50 |
51 | def get_distance(F, x):
52 | """Helper function for margin-based loss. Return a distance matrix given a matrix."""
53 | n = x.shape[0]
54 |
55 | square = F.sum(x ** 2.0, axis=1, keepdims=True)
56 | distance_square = square + square.transpose() - (2.0 * F.dot(x, x.transpose()))
57 |
58 | # Adding identity to make sqrt work.
59 | return F.sqrt(distance_square + F.array(np.identity(n)))
60 |
61 | class DistanceWeightedSampling(HybridBlock):
62 | r"""Distance weighted sampling. See "sampling matters in deep embedding learning"
63 | paper for details.
64 |
65 | Parameters
66 | ----------
67 | batch_k : int
68 | Number of images per class.
69 |
70 | Inputs:
71 | - **data**: input tensor with shape (batch_size, embed_dim).
72 | Here we assume the consecutive batch_k examples are of the same class.
73 | For example, if batch_k = 5, the first 5 examples belong to the same class,
74 | 6th-10th examples belong to another class, etc.
75 |
76 | Outputs:
77 | - a_indices: indices of anchors.
78 | - x[a_indices]: sampled anchor embeddings.
79 | - x[p_indices]: sampled positive embeddings.
80 | - x[n_indices]: sampled negative embeddings.
81 | - x: embeddings of the input batch.
82 | """
83 | def __init__(self, batch_k, cutoff=0.5, nonzero_loss_cutoff=1.4, **kwargs):
84 | self.batch_k = batch_k
85 | self.cutoff = cutoff
86 |
87 | # We sample only from negatives that induce a non-zero loss.
88 | # These are negatives with a distance < nonzero_loss_cutoff.
89 | # With a margin-based loss, nonzero_loss_cutoff == margin + beta.
90 | self.nonzero_loss_cutoff = nonzero_loss_cutoff
91 | super(DistanceWeightedSampling, self).__init__(**kwargs)
92 |
93 | def hybrid_forward(self, F, x):
94 | k = self.batch_k
95 | n, d = x.shape
96 |
97 | distance = get_distance(F, x)
98 | # Cut off to avoid high variance.
99 | distance = F.maximum(distance, self.cutoff)
100 |
101 | # Subtract max(log(distance)) for stability.
102 | log_weights = ((2.0 - float(d)) * F.log(distance)
103 | - (float(d - 3) / 2) * F.log(1.0 - 0.25 * (distance ** 2.0)))
104 | weights = F.exp(log_weights - F.max(log_weights))
105 |
106 | # Sample only negative examples by setting weights of
107 | # the same-class examples to 0.
108 | mask = np.ones(weights.shape)
109 | for i in range(0, n, k):
110 | mask[i:i+k, i:i+k] = 0
111 | mask_uniform_probs = mask * (1.0/(n-k))
112 |
113 | weights = weights * F.array(mask) * (distance < self.nonzero_loss_cutoff)
114 | weights_sum = F.sum(weights, axis=1, keepdims=True)
115 | weights = weights / weights_sum
116 |
117 | a_indices = []
118 | p_indices = []
119 | n_indices = []
120 |
121 | np_weights = weights.asnumpy()
122 | for i in range(n):
123 | block_idx = i // k
124 |
125 | if weights_sum[i] != 0:
126 | n_indices += np.random.choice(n, k-1, p=np_weights[i]).tolist()
127 | else:
128 | # all samples are above the cutoff so we sample uniformly
129 | n_indices += np.random.choice(n, k-1, p=mask_uniform_probs[i]).tolist()
130 | for j in range(block_idx * k, (block_idx + 1) * k):
131 | if j != i:
132 | a_indices.append(i)
133 | p_indices.append(j)
134 |
135 | return a_indices, x[a_indices], x[p_indices], x[n_indices], x
136 |
137 | def __repr__(self):
138 | s = '{name}({batch_k})'
139 | return s.format(name=self.__class__.__name__,
140 | **self.__dict__)
141 |
142 |
143 | class MarginNet(Block):
144 | r"""Embedding network with distance weighted sampling.
145 | It takes a base CNN and adds an embedding layer and a
146 | sampling layer at the end.
147 |
148 | Parameters
149 | ----------
150 | base_net : Block
151 | Base network.
152 | emb_dim : int
153 | Dimensionality of the embedding.
154 | batch_k : int
155 | Number of images per class in a batch. Used in sampling.
156 |
157 | Inputs:
158 | - **data**: input tensor with shape (batch_size, channels, width, height).
159 | Here we assume the consecutive batch_k images are of the same class.
160 | For example, if batch_k = 5, the first 5 images belong to the same class,
161 | 6th-10th images belong to another class, etc.
162 |
163 | Outputs:
164 | - The output of DistanceWeightedSampling.
165 | """
166 | def __init__(self, base_net, emb_dim, batch_k=5, **kwargs):
167 | super(MarginNet, self).__init__(**kwargs)
168 | with self.name_scope():
169 | self.base_net = base_net
170 | self.dense = nn.Dense(emb_dim)
171 | self.normalize = L2Normalization(mode='instance')
172 | self.sampled = DistanceWeightedSampling(batch_k=batch_k)
173 |
174 | def forward(self, x):
175 | z = self.base_net(x)
176 | z = self.dense(z)
177 | z = self.normalize(z)
178 | z = self.sampled(z)
179 | return z
180 |
181 | def extract(self,x):
182 | z = self.base_net(x)
183 | z = self.dense(z)
184 | z = self.normalize(z)
185 | return z # just return feature vector
186 |
187 |
188 | class MarginLoss(gluon.loss.Loss):
189 | r"""Margin based loss.
190 |
191 | Parameters
192 | ----------
193 | margin : float
194 | Margin between positive and negative pairs.
195 | nu : float
196 | Regularization parameter for beta.
197 |
198 | Inputs:
199 | - anchors: sampled anchor embeddings.
200 | - positives: sampled positive embeddings.
201 | - negatives: sampled negative embeddings.
202 | - beta_in: class-specific betas.
203 | - a_indices: indices of anchors. Used to get class-specific beta.
204 |
205 | Outputs:
206 | - Loss.
207 | """
208 | def __init__(self, margin=0.2, nu=0.0, weight=None, batch_axis=0, **kwargs):
209 | super(MarginLoss, self).__init__(weight, batch_axis, **kwargs)
210 | self._margin = margin
211 | self._nu = nu
212 |
213 | def hybrid_forward(self, F, anchors, positives, negatives, beta_in, a_indices=None):
214 | if a_indices is not None:
215 | # Jointly train class-specific beta.
216 | beta = beta_in.data()[a_indices]
217 | beta_reg_loss = F.sum(beta) * self._nu
218 | else:
219 | # Use a constant beta.
220 | beta = beta_in
221 | beta_reg_loss = 0.0
222 |
223 | d_ap = F.sqrt(F.sum(F.square(positives - anchors), axis=1) + 1e-8)
224 | d_an = F.sqrt(F.sum(F.square(negatives - anchors), axis=1) + 1e-8)
225 |
226 | pos_loss = F.maximum(d_ap - beta + self._margin, 0.0)
227 | neg_loss = F.maximum(beta - d_an + self._margin, 0.0)
228 |
229 | pair_cnt = F.sum((pos_loss > 0.0) + (neg_loss > 0.0))
230 | if pair_cnt == 0.0:
231 | # When poss_loss and neg_loss is zero then total loss is zero as well
232 | loss = F.sum(pos_loss + neg_loss)
233 | else:
234 | # Normalize based on the number of pairs.
235 | loss = (F.sum(pos_loss + neg_loss) + beta_reg_loss) / pair_cnt
236 | return gluon.loss._apply_weighting(F, loss, self._weight, None)
237 |
--------------------------------------------------------------------------------
/train_mx_margin.py:
--------------------------------------------------------------------------------
1 | # copy from mxnet/examples/gluon/emebding_learning example code
2 |
3 | from __future__ import division
4 |
5 | import argparse
6 | import logging
7 | import time
8 |
9 | import numpy as np
10 | from bottleneck import argpartition
11 |
12 | import mxnet as mx
13 | from data import cub200_iterator
14 | from mxnet import gluon
15 | from mxnet.gluon.model_zoo import vision as models
16 | from mxnet import autograd as ag, nd
17 | from models.mx_margin_model import MarginNet, MarginLoss
18 |
19 | logging.basicConfig(level=logging.INFO)
20 |
21 | # CLI
22 | parser = argparse.ArgumentParser(description='train a model for image classification.')
23 | parser.add_argument('--data-path', type=str, default='data/CUB_200_2011',
24 | help='path of data.')
25 | parser.add_argument('--embed-dim', type=int, default=128,
26 | help='dimensionality of image embedding. default is 128.')
27 | parser.add_argument('--batch-size', type=int, default=70,
28 | help='training batch size per device (CPU/GPU). default is 70.')
29 | parser.add_argument('--batch-k', type=int, default=5,
30 | help='number of images per class in a batch. default is 5.')
31 | parser.add_argument('--gpus', type=str, default='',
32 | help='list of gpus to use, e.g. 0 or 0,2,5. empty means using cpu.')
33 | parser.add_argument('--epochs', type=int, default=20,
34 | help='number of training epochs. default is 20.')
35 | parser.add_argument('--optimizer', type=str, default='adam',
36 | help='optimizer. default is adam.')
37 | parser.add_argument('--lr', type=float, default=0.0001,
38 | help='learning rate. default is 0.0001.')
39 | parser.add_argument('--lr-beta', type=float, default=0.1,
40 | help='learning rate for the beta in margin based loss. default is 0.1.')
41 | parser.add_argument('--margin', type=float, default=0.2,
42 | help='margin for the margin based loss. default is 0.2.')
43 | parser.add_argument('--beta', type=float, default=1.2,
44 | help='initial value for beta. default is 1.2.')
45 | parser.add_argument('--nu', type=float, default=0.0,
46 | help='regularization parameter for beta. default is 0.0.')
47 | parser.add_argument('--factor', type=float, default=0.5,
48 | help='learning rate schedule factor. default is 0.5.')
49 | parser.add_argument('--steps', type=str, default='12,14,16,18',
50 | help='epochs to update learning rate. default is 12,14,16,18.')
51 | parser.add_argument('--wd', type=float, default=0.0001,
52 | help='weight decay rate. default is 0.0001.')
53 | parser.add_argument('--seed', type=int, default=123,
54 | help='random seed to use. default=123.')
55 | parser.add_argument('--model', type=str, default='resnet50_v2',
56 | help='type of model to use. see vision_model for options.')
57 | parser.add_argument('--save-model-prefix', type=str, default='margin_loss_model',
58 | help='prefix of models to be saved.')
59 | parser.add_argument('--use_pretrained', action='store_true',
60 | help='enable using pretrained model from gluon.')
61 | parser.add_argument('--kvstore', type=str, default='device',
62 | help='kvstore to use for trainer.')
63 | parser.add_argument('--log-interval', type=int, default=20,
64 | help='number of batches to wait before logging.')
65 | opt = parser.parse_args()
66 |
67 | logging.info(opt)
68 |
69 | # Settings.
70 | mx.random.seed(opt.seed)
71 | np.random.seed(opt.seed)
72 |
73 | batch_size = opt.batch_size
74 |
75 | gpus = [] if opt.gpus is None or opt.gpus is '' else [
76 | int(gpu) for gpu in opt.gpus.split(',')]
77 | num_gpus = len(gpus)
78 |
79 | batch_size *= max(1, num_gpus)
80 | context = [mx.gpu(i) for i in gpus] if num_gpus > 0 else [mx.cpu()]
81 | steps = [int(step) for step in opt.steps.split(',')]
82 |
83 | # Construct model.
84 | kwargs = {'ctx': context, 'pretrained': opt.use_pretrained}
85 | net = models.get_model(opt.model, **kwargs)
86 |
87 | if opt.use_pretrained:
88 | # Use a smaller learning rate for pre-trained convolutional layers.
89 | for v in net.collect_params().values():
90 | if 'conv' in v.name:
91 | setattr(v, 'lr_mult', 0.01)
92 |
93 | net.hybridize()
94 | net = MarginNet(net.features, opt.embed_dim, opt.batch_k)
95 | beta = mx.gluon.Parameter('beta', shape=(100,))
96 |
97 | # Get iterators.
98 | train_data, val_data = cub200_iterator(opt.data_path, opt.batch_k, batch_size, (3, 224, 224))
99 |
100 |
101 | def get_distance_matrix(x):
102 | """Get distance matrix given a matrix. Used in testing."""
103 | square = nd.sum(x ** 2.0, axis=1, keepdims=True)
104 | distance_square = square + square.transpose() - (2.0 * nd.dot(x, x.transpose()))
105 | return nd.sqrt(distance_square)
106 |
107 |
108 | def evaluate_emb(emb, labels):
109 | """Evaluate embeddings based on Recall@k."""
110 | d_mat = get_distance_matrix(emb)
111 | d_mat = d_mat.asnumpy()
112 | labels = labels.asnumpy()
113 |
114 | names = []
115 | accs = []
116 | for k in [1, 2, 4, 8, 16]:
117 | names.append('Recall@%d' % k)
118 | correct, cnt = 0.0, 0.0
119 | for i in range(emb.shape[0]):
120 | d_mat[i, i] = 1e10
121 | nns = argpartition(d_mat[i], k)[:k]
122 | if any(labels[i] == labels[nn] for nn in nns):
123 | correct += 1
124 | cnt += 1
125 | accs.append(correct/cnt)
126 | return names, accs
127 |
128 |
129 | def test(ctx):
130 | """Test a model."""
131 | val_data.reset()
132 | outputs = []
133 | labels = []
134 | for batch in val_data:
135 | data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
136 | label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
137 | for x in data:
138 | outputs.append(net(x)[-1])
139 | labels += label
140 |
141 | outputs = nd.concatenate(outputs, axis=0)[:val_data.n_test]
142 | labels = nd.concatenate(labels, axis=0)[:val_data.n_test]
143 | return evaluate_emb(outputs, labels)
144 |
145 |
146 | def get_lr(lr, epoch, steps, factor):
147 | """Get learning rate based on schedule."""
148 | for s in steps:
149 | if epoch >= s:
150 | lr *= factor
151 | return lr
152 |
153 |
154 | def train(epochs, ctx):
155 | """Training function."""
156 | if isinstance(ctx, mx.Context):
157 | ctx = [ctx]
158 | net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
159 |
160 | opt_options = {'learning_rate': opt.lr, 'wd': opt.wd}
161 | if opt.optimizer == 'sgd':
162 | opt_options['momentum'] = 0.9
163 | if opt.optimizer == 'adam':
164 | opt_options['epsilon'] = 1e-7
165 | trainer = gluon.Trainer(net.collect_params(), opt.optimizer,
166 | opt_options,
167 | kvstore=opt.kvstore)
168 | if opt.lr_beta > 0.0:
169 | # Jointly train class-specific beta.
170 | # See "sampling matters in deep embedding learning" paper for details.
171 | beta.initialize(mx.init.Constant(opt.beta), ctx=ctx)
172 | trainer_beta = gluon.Trainer([beta], 'sgd',
173 | {'learning_rate': opt.lr_beta, 'momentum': 0.9},
174 | kvstore=opt.kvstore)
175 |
176 | loss = MarginLoss(margin=opt.margin, nu=opt.nu)
177 |
178 | best_val = 0.0
179 | for epoch in range(epochs):
180 | tic = time.time()
181 | prev_loss, cumulative_loss = 0.0, 0.0
182 |
183 | # Learning rate schedule.
184 | trainer.set_learning_rate(get_lr(opt.lr, epoch, steps, opt.factor))
185 | logging.info('Epoch %d learning rate=%f', epoch, trainer.learning_rate)
186 | if opt.lr_beta > 0.0:
187 | trainer_beta.set_learning_rate(get_lr(opt.lr_beta, epoch, steps, opt.factor))
188 | logging.info('Epoch %d beta learning rate=%f', epoch, trainer_beta.learning_rate)
189 |
190 | # Inner training loop.
191 | for i in range(200):
192 | batch = train_data.next()
193 | data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
194 | label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
195 |
196 | Ls = []
197 | with ag.record():
198 | for x, y in zip(data, label):
199 | a_indices, anchors, positives, negatives, _ = net(x)
200 |
201 | if opt.lr_beta > 0.0:
202 | L = loss(anchors, positives, negatives, beta, y[a_indices])
203 | else:
204 | L = loss(anchors, positives, negatives, opt.beta, None)
205 |
206 | # Store the loss and do backward after we have done forward
207 | # on all GPUs for better speed on multiple GPUs.
208 | Ls.append(L)
209 | cumulative_loss += nd.mean(L).asscalar()
210 |
211 | for L in Ls:
212 | L.backward()
213 |
214 | # Update.
215 | trainer.step(batch.data[0].shape[0])
216 | if opt.lr_beta > 0.0:
217 | trainer_beta.step(batch.data[0].shape[0])
218 |
219 | if (i+1) % opt.log_interval == 0:
220 | logging.info('[Epoch %d, Iter %d] training loss=%f' % (
221 | epoch, i+1, cumulative_loss - prev_loss))
222 | prev_loss = cumulative_loss
223 |
224 | logging.info('[Epoch %d] training loss=%f'%(epoch, cumulative_loss))
225 | logging.info('[Epoch %d] time cost: %f'%(epoch, time.time()-tic))
226 |
227 | names, val_accs = test(ctx)
228 | for name, val_acc in zip(names, val_accs):
229 | logging.info('[Epoch %d] validation: %s=%f'%(epoch, name, val_acc))
230 |
231 | if val_accs[0] > best_val:
232 | best_val = val_accs[0]
233 | logging.info('Saving %s.' % opt.save_model_prefix)
234 | net.save_parameters('%s.params' % opt.save_model_prefix)
235 | return best_val
236 |
237 |
238 | if __name__ == '__main__':
239 | best_val_recall = train(opt.epochs, context)
240 | print('Best validation Recall@1: %.2f.' % best_val_recall)
241 |
--------------------------------------------------------------------------------
/data/mxdata/deep_fashion.py:
--------------------------------------------------------------------------------
1 | print("program begin")
2 | from mxnet.gluon.data import DataLoader,Dataset
3 | from mxnet import nd
4 | from mxnet.image import imread
5 | import os
6 | import numpy as np
7 | import mxnet as mx
8 | import mxnet.gluon.data.vision.transforms as T
9 | from collections import Counter
10 |
11 | normalize=T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
12 | default_transform = T.Compose([
13 | T.Resize(256),
14 | T.RandomResizedCrop(size=224,scale=(1.0,1.0),ratio=(1.0,1.0)),# just crop,not scale
15 | T.RandomFlipLeftRight(),
16 | T.ToTensor(), # last to swap channel to c,w,h
17 | normalize
18 | ])
19 |
20 | test_transform = T.Compose([
21 | T.Resize(256),
22 | T.CenterCrop(224),
23 | T.ToTensor(),
24 | normalize
25 | ])
26 |
27 |
28 | # like the
29 | class DeepInClassFashion(Dataset):
30 | """
31 | the DeepInClassFashion dataset.read data from list_item_inshop.txt,
32 |
33 | """
34 | def __init__(self,dir_root,batch_k=4,batch_size=80,is_train = True,transform = default_transform):
35 | self.root = dir_root
36 | self.batch_k = batch_k
37 | self.batch_size = batch_size
38 | self._transform = transform
39 | self.is_train = is_train
40 | self.train_ids = []
41 | self.boxes = {} # a dictionary store {key:path,value:bbox}
42 | self.test_ids = set() # for super_type to subtract
43 | self.test_images2id=[]# a list to store[(path,id),(path,id)]
44 | with open(os.path.join(self.root,'Anno','list_item_inshop.txt'),'r') as f_instance:
45 | self.instance_count = int(f_instance.readline().strip())
46 | #self.instance_ids = list(f_instance.readlines())
47 | self.images_files = [ [] for _ in range(self.instance_count+1)]
48 |
49 | with open(os.path.join(self.root,'Anno','list_eval_partition.txt'),'r') as f_parti:
50 | f_parti.readline() # read pictures number
51 | f_parti.readline() # read information
52 | train_ids = [] # will use counter to duplicate checking
53 | for line in f_parti.readlines():
54 | path,item_id,status = [ i for i in filter(lambda x:x is not '',line.strip().split(' '))]
55 | int_id = int(item_id.split('_')[-1])
56 | path = str(path)
57 | if status == 'train':
58 | self.images_files[int_id].append(path)
59 | self.train_ids.append(int_id)
60 | else:
61 | self.test_images2id.append((path,int_id))
62 | self.test_ids.add(int_id)
63 | # count train_ids and its distribution
64 | #post precessing for train_ids
65 | self.train_ids_list = list(set(self.train_ids))
66 | count = Counter(self.train_ids)
67 | self.train_ids_count = np.array([count[int_id] for int_id in self.train_ids_list])
68 | self.train_ids_dist = self.train_ids_count/sum(self.train_ids_count)
69 |
70 | with open(os.path.join(self.root,'Anno','list_bbox_inshop.txt'),'r') as f_bbox:
71 | f_bbox.readline() # read count
72 | f_bbox.readline() # read description
73 | for line in f_bbox.readlines():
74 | list_info = line.strip().split(' ')
75 | path,box = str(list_info[0]),list_info[-4:]
76 | self.boxes[path]=[ i for i in map(lambda x:int(x),box)] # convert to int
77 | #read instance ,split set,bbox data
78 |
79 | # sub_list_test = self.images_files[list(self.test_ids)]
80 | # self.test_len = 0
81 | # for small_list in sub_list_test:
82 | # self.test_len += len(small_list)
83 | self.build_structure()
84 |
85 |
86 | def write_test_files(self):
87 | """write the test files and label id"""
88 | import csv
89 | f = open('checkpoints/fashion_test.txt','w')
90 | writer = csv.writer(f,dialect='excel')
91 | print(len(self.test_images2id))
92 | print("begin to write")
93 | writer.writerows(self.test_images2id)
94 | f.close()
95 |
96 |
97 |
98 | def build_structure(self):
99 | """build the folder to id structure dataset,
100 | construct the super class structure to select"""
101 | print("the img_root:%s"%(self.root))
102 | img_root = os.path.join(self.root,'img')
103 | self.super_types = {} # super_type2 ids{'men_shorts':[1,23,4,5]}
104 | for sexual in os.listdir(img_root):
105 | for clothe_type in os.listdir(os.path.join(self.root,'img',sexual)):
106 | ids = os.listdir(os.path.join(self.root,'img',sexual,clothe_type))
107 | origin_ids = [int(instance_id.split('_')[-1]) for instance_id in ids]
108 | split_test = set(origin_ids) - self.test_ids
109 | self.super_types[sexual+'_'+clothe_type] = list(split_test) # after split to test
110 | self.super_type_list = list(self.super_types.keys())
111 |
112 | self.super_type_count = np.array([len(self.super_types[k]) for k in self.super_types.keys()])
113 | #containing classes count in a super type
114 |
115 | self.super_type_distri =self.super_type_count/sum(self.super_type_count) # the distribution ,assume every id instance has 4 or five images
116 |
117 | def __len__(self):
118 | if self.is_train:
119 | return 1000
120 | else:
121 | return len(self.test_images2id) # to many picture to valid
122 |
123 | def sampled_batch_data(self):
124 | """choose an super_types,
125 | then choose the batch with batch_k iamges with bbox crop"""
126 | #sample based on the distribution
127 | batch =[]
128 | labels =[]
129 | num_groups = self.batch_size //self.batch_k
130 | super_id = np.random.choice(self.super_type_list,size=1,replace=False,\
131 | p=self.super_type_distri)[0]
132 | try:
133 | sampled_ids = np.random.choice(self.super_types[super_id],\
134 | size=num_groups,replace=False)
135 | except Exception as e:
136 | sampled_ids = self.super_types[super_id] # type is list small than 25
137 |
138 | #the sampled_ids is like[1,2,5,45,23] in a super_type
139 | for i in sampled_ids:
140 | try:
141 | img_fname = np.random.choice(
142 | self.images_files[i],
143 | size=self.batch_k,
144 | replace=False
145 | )
146 | except Exception as e:
147 | continue
148 | batch += img_fname.tolist()
149 | labels += [i]*self.batch_k
150 | return batch,labels # format like img/man/short/id_xxxx01/01_shorts.jpg
151 |
152 | def __getitem__(self, index):
153 | if self.is_train:
154 | imagelist = []
155 | batch,labels = self.sampled_batch_data()
156 | for file in batch:
157 | file_path = os.path.join(self.root,file)
158 | image = imread(file_path,to_rgb=True,flag=1)
159 | if image.shape[2]==1:
160 | print("has gray file",file)
161 | image = nd.tile(image,(1,1,3))
162 | box = self.boxes.get(file,[0,0,256,256])
163 | image = image[box[1]:box[3],box[0]:box[2]] # crop image in width and height
164 | image = self._transform(image)
165 | imagelist.append(image)
166 | return nd.stack(*imagelist,axis=0),nd.array(labels)
167 | else:
168 | path,class_id = self.test_images2id[index]
169 | box = self.boxes.get(path, [0, 0, 256, 256]) # fetch path,id and box
170 | file_path = os.path.join(self.root,path)
171 | image = imread(file_path,to_rgb=True,flag=1)
172 | if image.shape[2]==1:
173 | image = nd.tile(image,(1,1,3))
174 |
175 | image = image[box[1]:box[3], box[0]:box[2]] # crop test image
176 | image = self._transform(image)
177 | return image,class_id
178 |
179 |
180 |
181 |
182 |
183 |
184 | def getDeepInClassFashion(dir_root,batch_k,batch_size):
185 | """three main paramter dir,batch_k,batch_size"""
186 | train_data = DeepInClassFashion(dir_root=dir_root,batch_k=batch_k,batch_size=batch_size,is_train=True,\
187 | transform=default_transform)
188 | test_data = DeepInClassFashion(dir_root=dir_root,batch_k=batch_k,batch_size=batch_size,is_train=False,\
189 | transform=test_transform)
190 | train_loader = DataLoader(train_data,batch_size=1,shuffle=False,num_workers=6)
191 | test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=False,num_workers=6)
192 | return train_loader,test_loader
193 |
194 |
195 | class DeepCrossClassFashion(DeepInClassFashion):
196 | def __init__(self,dir_root,batch_k=4,batch_size=80,is_train = True,transform = default_transform):
197 | super(DeepCrossClassFashion,self).__init__(dir_root,batch_k,batch_size,is_train,transform)
198 | self.datatype='CrossClass'
199 |
200 | def sampled_batch_data(self):
201 | batch = []
202 | labels = []
203 | num_groups = self.batch_size//self.batch_k
204 | sampled_ids = np.random.choice(self.train_ids_list,size=num_groups,replace=False,p=self.train_ids_dist)
205 | for i in sampled_ids:
206 | try:
207 | img_fnames = np.random.choice(self.images_files[i],\
208 | size=self.batch_k,replace=False)
209 | except Exception as e:
210 | continue
211 | batch += img_fnames.tolist()
212 | labels += [i]*self.batch_k
213 | return batch, labels
214 |
215 | def getDeepCrossClassFashion(dir_root,batch_k,batch_size):
216 | train_data = DeepCrossClassFashion(dir_root,batch_k,batch_size=batch_size,\
217 | is_train=True,transform=default_transform)
218 | test_data = DeepCrossClassFashion(dir_root,batch_k=batch_k,batch_size=batch_size,\
219 | is_train=True,transform=test_transform)
220 | train_loader = DataLoader(train_data,batch_size=1,shuffle=False,num_workers=6)
221 | test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=False,num_workers=6)
222 | return train_loader,test_loader
223 |
224 |
225 | if __name__ == '__main__':
226 | train_data = DeepInClassFashion(dir_root='data/DeepInShop',batch_k=4,batch_size=80,is_train=True,\
227 | transform=default_transform)
228 | # test_data = DeepCrossClassFashion(dir_root='data/DeepInShop',batch_k=4,batch_size=80,is_train=False,\
229 | # transform=test_transform)
230 | #
231 | # data = train_data[0]
232 | # print('train data x shape',data[0].shape,'training data y shape ',data[1].shape)
233 | # data = test_data[0]
234 | # print('test data x shape',data[0].shape,'training data y shape',data[1])
235 | train_data.write_test_files()
236 |
237 |
238 |
239 |
--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
1 | # Deep Embedding Learning for Image Retrieval
2 | ---
3 | # Deep Embedding Introduction
4 | DeepEmbedding 是使用深度学习的方法把多种媒体映射嵌入到相同向量空间,在统一空间中进行搜索的技术。
5 | 本项目通过视觉级别搜索,细粒度类别(实例检索)和图像-文本互搜的方式来测试通用多媒体检索。
6 | # 图像检索问题的一般解法
7 | DeepEmbedding旨在使用深度度量学习(DeepMetric)或者深度哈希(DeepHash)的方法学习关系保持的空间映射函数,能将视觉空间映射到低维嵌入空间,使用向量搜索引擎进行搜索.第一个问题为特征提取问题,即为本实验研究的问题,第二个问题为特征搜索问题.第二个问题解决方案可参见ANNS(近似邻近搜索)[NNSearchService](https://github.com/EigenLab/NNSearchService) 。
8 | ## Note
9 | - 本项目实现了基于Multi-npair-loss的度量学习应用于检索和基于Sampling margin loss的方法进行检索
10 | - 具体复现参见论文Triplet loss[FaceNet: ](http://arxiv.org/abs/1503.03832)
11 | [N-pair loss](http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf)
12 | [Margin lossSampling Matters in Deep Emebedding Learning](https://www.cs.utexas.edu/~cywu/projects/sampling_matters/)
13 | [BatchHard](https://arxiv.org/abs/1703.07737)
14 |
15 | # 实验结果
16 | (可点击下载百度网盘链接,查看图片)
17 | - 在StanfordOnlineProduct训练,计算NMI聚类指标 nmi=0.866,对验证集的向量嵌入进行T-SNE降维后可以看出,降维图像约 43M,百度网盘链接在下:
18 | - [margin_based loss :DeepFashion](https://pan.baidu.com/s/1zLZX24qBb_Op1vsry4LX6w)https://pan.baidu.com/s/1zLZX24qBb_Op1vsry4LX6w
19 | 计算指标:nmi=0.866
20 | - [Mc-n-pair loss:StanfordOnlineProduct](https://pan.baidu.com/s/12eNTVsRFu--SYMW8P8HPfQ)https://pan.baidu.com/s/12eNTVsRFu--SYMW8P8HPfQ
21 | 计算指标:nmi=0.830
22 |
23 |
24 | # 关于本项目的使用
25 | 1.下载相应的数据集
26 | 2.采用不同的loss类型对模型进行训练
27 | run train cub200 model
28 | ```angular2html
29 | nohup python train_mx_ebay_margin.py --gpus=1 --batch-k=5 --use_viz --epochs=30 --use_pretrained --steps=12,16,20,24 --name=CUB_200_2011 --save-model-prefix=cub200 > mycub200.out 2>&1 &
30 | ```
31 | run train stanford_online_product
32 | ```angular2html
33 | nohup python train_mx_ebay_margin.py --batch-k=2 --batch-size=80 --use_pretrained --use_viz --gpus=0 --name=Inclass_ebay --data=EbayInClass --save-model-prefix=ebayinclass > mytraininclass_ebay.log 2>&1
34 | ```
35 |
36 |
37 | 3.后续工作:
38 | - 测试R-MAC NetVLAD等网络在视觉检索中的效果和评测
39 | - 测试使用GAN的方法增强检索效果
40 | ```angularjs
41 |
42 | __Deep Adversarial Metric Learning__
43 | Deep Metric learning cannot get full used the easy negative examples,to in [Deep Adversarial Metirc Learning](http://openaccess.thecvf.com/content_cvpr_2018/papers/Duan_Deep_Adversarial_Metric_CVPR_2018_paper.pdf) proposal a new framework called DAML
44 | __DeepMetric and Deep Hashing Apply__
45 | apply method to Fashion,vehicle and Person-ReID domain
46 | __Construct a datasets__ crawl application-domain data
47 | ```
48 | # Dataset
49 |
50 | [CUB200_2011](http://www.vision.caltech.edu/visipedia/CUB-200.html): A small part of ImageNet
51 | [LFW](http://vis-www.cs.umass.edu/lfw/):face dataset
52 | [StanfordOnlineProducts](http://cvgl.stanford.edu/projects/lifted_struct/): a lot of types of products(furniture,bicycle,cups)
53 | [Street2Shop](http://www.tamaraberg.com/street2shop/):products data set from ebay
54 | [DeepFashion](http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion.html):all are colthes
55 |
56 | # 图像检索的应用
57 | - Face indentification: deep metric learning for face cluster,from [FaceNet](http://arxiv.org/abs/1503.03832) to [SphereFace](http://ieeexplore.ieee.org/document/8100196/)
58 | - Person ReIdentification:deep metric learning for Pedestrian Re-ID,from [MARS](https://pdfs.semanticscholar.org/c038/7e788a52f10bf35d4d50659cfa515d89fbec.pdf) to [NPSM&SVDNet](https://blog.csdn.net/u013982164/article/details/79608100)
59 | - Vehicle Search:deep metric learning for fake-licensed car or Vehicle retrieval.
60 | - Street2Products:search fashion clothe from street photos or in-shop photos,namely visual-search.from [DeepRanking](https://users.eecs.northwestern.edu/~jwa368/pdfs/deep_ranking.pdf) to [DAML](http://openaccess.thecvf.com/content_cvpr_2018/papers/Duan_Deep_Adversarial_Metric_CVPR_2018_paper.pdf)
61 | ## Deep Metric Learning mile-stone paper:
62 | 1.[DrLIM:Dimensionality Reduction by Learning an Invariant Mapping](http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf)
63 | 2.[DeepRanking:Learning fine-graied Image Similarity with DeepRanking](https://users.eecs.northwestern.edu/~jwa368/pdfs/deep_ranking.pdf)
64 | 3.[DeepID2:Deep Learning Face Representation by Joint Identification-Verification](http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf)
65 | 4.[FaceNet:FaceNet: A Unified Embedding for Face Recognition and Clustering](http://arxiv.org/abs/1503.03832)
66 | 5.[Defense:In Defense of the Triplet Loss for Person Re-Identification](http://arxiv.org/abs/1703.07737)
67 | 6.[N-pair:Improved Deep Metric Learning with Multi-class N-pair Loss Objective](http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf)
68 | 7.[Sampling:Sampling Matters in Deep Embedding Learning](https://arxiv.org/abs/1706.07567)
69 | 8.[DAML:Deep Adversarial Metric Learning](http://openaccess.thecvf.com/content_cvpr_2018/papers/Duan_Deep_Adversarial_Metric_CVPR_2018_paper.pdf)
70 | 9.[SphereFace:Deep Hypersphere Embedding for Face Recognition](http://ieeexplore.ieee.org/document/8100196/)
71 |
72 |
73 | # 部分DeepHash的工作
74 | DeepHash能将图片直接哈希到汉明码,使用faiss的IVF Binary 系列搜索加速,通过存储量大大减少。
75 |
76 | ## ReImplementation of HashNet
77 | ```angular2html
78 | python train_hash.py --params
79 | ```
80 |
81 |
82 | ## Deep Hash Learning mile-stone paper:
83 | 1.[CNNH:Supervised Hashing for Image Retrieval via Image Representation Learning](https://www.aaai.org/ocs/index.php/AAAI/AAAI14/paper/view/8137/8861)
84 | 2.[DNNH:Simultaneous feature learning and hash coding with deep neural networks](http://ieeexplore.ieee.org/document/7298947/)
85 | 3.[DLBHC:Deep Learning of Binary Hash Codes for Fast Image Retrieval](http://www.iis.sinica.edu.tw/~kevinlin311.tw/cvprw15.pdf)
86 | 4.[DSH:Deep Supervised Hashing for Fast Image Retrieval](https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Liu_Deep_Supervised_Hashing_CVPR_2016_paper.pdf)
87 | 5.[SUBIC:SuBiC: A Supervised, Structured Binary Code for Image Search](http://ieeexplore.ieee.org/document/8237358/)
88 | 6.[HashNet:HashNet:Deep Learning to Hash by Continuous](https://arxiv.org/abs/1702.00758)
89 | 7.[DCH:Deep Cauchy Hashing for Hamming Space Retrieval](http://openaccess.thecvf.com/content_cvpr_2018/html/Cao_Deep_Cauchy_Hashing_CVPR_2018_paper.html)
90 |
91 |
92 | ## 视觉和文本共同嵌入 Visual-semantic-align embedding(cross modal retrieval)
93 | 1.[VSE++: Improving Visual-Semantic Embeddings with Hard Negatives](http://arxiv.org/abs/1707.05612)
94 | 2.[Dual-Path Convolutional Image-Text Embedding with Instance Loss](http://arxiv.org/abs/1711.05535)
95 | ```bash
96 | python train_vse.py --params
97 | ```
98 |
99 | ## 其他类型的搜索方式
100 | 1.Sketch based [Deep Sketch Hashing: Fast Free-hand Sketch-Based Image Retrieval](https://github.com/ymcidence/DeepSketchHashing)
101 | 2.Text cross modal based [Deep Cross-Modal Hashing](https://github.com/jiangqy/DCMH-CVPR2017)
102 |
103 | ## 近似近邻搜索加速
104 | ANNS (Approximation Nearest Neighbor Search) to search a query vector in gallery database.
105 | 测试数据集
106 | - [SIFT1M](http://corpus-texmex.irisa.fr/) typical 128-dim sift vector
107 | - [DEEP1B](http://sites.skoltech.ru/compvision/noimi/),proposed by yandex.inc,this is a deep descriptor
108 | - [GIST1M](http://corpus-texmex.irisa.fr/) typical 512-dim gist vector
109 |
110 | ### papers
111 | -- PQ based
112 | 1.将传统的标量量化,转成分段乘积量化[Product Quantization for Nearest Neighbor Search](http://ieeexplore.ieee.org/document/5432202/)
113 | 2.类似于Cartisian Quantization,将向量整体进行旋转,使得聚类的分段坐标轴和向量对齐,聚类中心点和数据之间的重建误差小,压缩损失就小[Optimized Product Quantization](http://ieeexplore.ieee.org/document/6678503/)
114 |
115 | 3.[Revisiting the Inverted Indices for Billion-Scale Approximate Nearest Neighbors](http://arxiv.org/abs/1802.02422)提出使用anchor-point也即line-quantizition点来切分区域group,在搜索是pruning部分区域加速。和RobustiQ这篇论文撞衫。该论文中提出在一层中建立HNSW加速candidates center-point搜索选取。
116 | 4.粗量化使用双倒排,可以降低聚类维度,增加聚类中心点,使用Multi-Sequence算法提高粗略命中速度[The Inverted MultiIndex](http://cache-ash04.cdn.yandex.net/download.yandex.ru/company/cvpr2012.pdf)
117 | 5.多义码,将汉明码距离和量化中心点的距离建立映射关系,对entry-point有过滤作用 [Polysemous codes](https://arxiv.org/abs/1609.01882)
118 | 6.Additive Quantization 两篇论文:Additive Quantization for Extreme Vector Compression.使用稀疏加量化,量化误差更小,但需要额外存储与计算。
119 | 7.Composite Quantization.和上述Additive Quantizition 撞衫,额外提出NOCQ作为APQ替代,加速计算。
120 | 8.RobustiQ:A Robust ANN Search Method for Billion-scale Similarity Search on GPUs. 和第3篇论文很像,在传统IVF上添加了Line Quantization,也叫分组剪枝点,提升PQ搜索指标。
121 |
122 | -- 索引分层:
123 | 1.Zoom:Multi-View Vector Search for Optimizing Accuracy, Latency and Memory 使用全量数据k-means聚类,大量百万级中心点建立第一层导航图HNSW来代替以前的IVF,第二层使用量化加速计算排序,不限于PQ或者APQ,第三层全精度计算排序。这种典型的三层索引方式可以使得每层能采用组件替换的方式来优化索引。
124 | 2.Pyramid:A General Framwork for Distributed Similarity Search.香港中文大学郑尚策实验室的,对比了naive-HNSW分图和metaHNSW-subHNSW两种分图方式对搜索效果影响。使用两层HNSW进行搜索。使用meta-HNSW的上层图解决了跨机器搜索的问题。该方法相对于简易切分naive-HNSW建图增加了meta-HNSW建图,全量数据点查询,分块构建三部分时间,建图比较慢。但搜索过程中meta-HNSW起到了哈希的方法,比naive-HNSW全集群请求再合并的吞吐量大不少。
125 | 3.索引分层和图优化,量化优化结合在一起,可以进行索引改进设计。
126 |
127 | -- 子集索引
128 | 1.Reconfigurable Inverted Index:旨在解决搜索过程中既有向量近似需求,也有标签过滤需求的搜索场景。文中提出了subset search 问题,并给出了两种解决方案。虽然已经相当巧妙,但未看出来如何解决十亿级别数据如何解决。
129 |
130 |
131 |
132 | -- Graph Based
133 | 1.可参见NSW 浏览小世界 论文,结合跳表结构构造的索引[Approximate nearest neighbor algorithm based on navigable small world graphs](https://linkinghub.elsevier.com/retrieve/pii/S0306437913001300)
134 | 2.[Efficient and robust approximate nearest neighbor search using hierarchical Navigable Small World graphs](http://arxiv.org/abs/1603.09320)
135 | 7.[EFANNA : An Extremely Fast Approximate Nearest Neighbor Search Algorithm Based on kNN Graph](http://arxiv.org/abs/1609.07228)
136 | 8.[A Revisit on Deep Hashings for Large-scale Content Based Image Retrieval](http://arxiv.org/abs/1711.06016)
137 | 9.[RobustiQ A Robust ANN Search Method for Billion-scale Similarity Search on GPUs](http://users.monash.edu/~yli/assets/pdf/icmr19-sigconf.pdf)
138 | 10.[GGNN: Graph-based GPU Nearest Neighbor Search](https://arxiv.org/pdf/1912.01059.pdf)
139 | 11.[Zoom: Multi-View Vector Search for Optimizing Accuracy, Latency and Memory](https://www.microsoft.com/en-us/research/uploads/prod/2018/08/zoom-multi-view-tech-report.pdf)
140 | 12. [Vector and Line Quantization for Billion-scale Similarity Search on GPUs](https://arxiv.org/pdf/1901.00275.pdf)
141 | 13.[DiskANN:Fast Accurate Billion-point Nearest Neighbor Search on a Single Node](https://suhasjs.github.io/files/diskann_neurips19.pdf) 。提出vamana-Graph 解决了在单机建图解决十亿向量搜素问题。
142 |
143 | -- Hamming Code
144 | 1.[Fast Exact Search in Hamming Space with Multi-Index Hashing](http://arxiv.org/abs/1307.2982)
145 | 2.[Fast Nearest Neighbor Search in the Hamming Space](http://link.springer.com/10.1007/978-3-319-27671-7_27)
146 | 4.[Web-Scale Responsive Visual Search at Bing](http://arxiv.org/abs/1802.04914)
147 | 5.[Recurrent Binary Embedding for GPU-Enabled Exhaustive Retrieval from Billion-Scale Semantic Vectors](http://arxiv.org/abs/1802.06466)
148 | ### library
149 | 1.[faiss] 当前faiss包含IVF,IMI,PQ,OPQ,PCA,二级残差量化ReRank-PQ,HNSW,Link and Code 等各种类型的索引引擎
150 | 2.索引格式选择:高容量,低精度 IMI+OPQ+reRank
151 | 高精度,选择HNSW,当前NSG索引不支持增量插入,没有采用
152 | 3.sptag,基于空间切分建图的方式
153 | ### framework
154 | 1.vearch :by JingDong AI. 和深度学习模型深度贴合
155 | 2.milvus :by zilliz. 以数据库的思路来做的
156 |
157 |
158 |
--------------------------------------------------------------------------------
/train_mx_ebay_margin.py:
--------------------------------------------------------------------------------
1 | # mxnet train ebay dataset,model copy from mxnet example deep learning emebding
2 |
3 | import argparse
4 | import logging
5 | import time
6 | import numpy as np
7 |
8 | from bottleneck import argpartition
9 | import mxnet as mx
10 | from data import getCUB200,getEbayCrossClassData,getEbayInClassData
11 | from data import getDeepInClassFashion,getDeepCrossClassFashion
12 | import os
13 | from mxnet import gluon
14 | import mxnet.gluon.model_zoo.vision as vision
15 | from mxnet import autograd as ag
16 | from mxnet import nd
17 | from models.mx_margin_model import MarginLoss,MarginNet
18 | from utils import Visulizer
19 | from configs import opt as opt_conf
20 | import ipdb
21 | from tqdm import tqdm
22 | logging.basicConfig(level=logging.INFO)
23 |
24 | parser = argparse.ArgumentParser(description='train a model for image classification.')
25 | parser.add_argument('--data', type=str, default='CUB_200_2011',
26 | help='path of data.')
27 | parser.add_argument('--embed-dim', type=int, default=128,
28 | help='dimensionality of image embedding. default is 128.')
29 | parser.add_argument('--batch-size', type=int, default=70,
30 | help='training batch size per device (CPU/GPU). default is 70.')
31 | parser.add_argument('--batch-k', type=int, default=5,
32 | help='number of images per class in a batch. default is 5.')
33 | parser.add_argument('--gpus', type=str, default='',
34 | help='list of gpus to use, e.g. 0 or 0,2,5. empty means using cpu.')
35 | parser.add_argument('--epochs', type=int, default=20,
36 | help='number of training epochs. default is 20.')
37 | parser.add_argument('--optimizer', type=str, default='adam',
38 | help='optimizer. default is adam.')
39 | parser.add_argument('--lr', type=float, default=0.0001,
40 | help='learning rate. default is 0.0001.')
41 | parser.add_argument('--lr-beta', type=float, default=0.1,
42 | help='learning rate for the beta in margin based loss. default is 0.1.')
43 | parser.add_argument('--margin', type=float, default=0.2,
44 | help='margin for the margin based loss. default is 0.2.')
45 | parser.add_argument('--beta', type=float, default=1.2,
46 | help='initial value for beta. default is 1.2.')
47 | parser.add_argument('--nu', type=float, default=0.0,
48 | help='regularization parameter for beta. default is 0.0.')
49 | parser.add_argument('--factor', type=float, default=0.5,
50 | help='learning rate schedule factor. default is 0.5.')
51 | parser.add_argument('--steps', type=str, default='12,14,16,18',
52 | help='epochs to update learning rate. default is 12,14,16,18.')
53 | parser.add_argument('--wd', type=float, default=0.0001,
54 | help='weight decay rate. default is 0.0001.')
55 | parser.add_argument('--seed', type=int, default=123,
56 | help='random seed to use. default=123.')
57 | parser.add_argument('--model', type=str, default='resnet50_v2',
58 | help='type of model to use. see vision_model for options.')
59 | parser.add_argument('--save-model-prefix', type=str, default='margin_loss_model',
60 | help='prefix of models to be saved.')
61 | parser.add_argument('--use_pretrained', action='store_true',
62 | help='enable using pretrained model from gluon.')
63 | parser.add_argument('--kvstore', type=str, default='device',
64 | help='kvstore to use for trainer.')
65 | parser.add_argument('--log-interval', type=int, default=20,
66 | help='number of batches to wait before logging.')
67 | parser.add_argument('--debug',action='store_true',
68 | help='enable debug to run through the model pipline')
69 | parser.add_argument('--use_viz',action='store_true',
70 | help='enable using visualization to vis the loss curve')
71 | parser.add_argument('--name',type=str,default='cub200',
72 | help='the train instance name')
73 | parser.add_argument('--load_model_path',type=str,default='checkpoints/Fashion_In.params',
74 | help='the trained model')
75 |
76 | opt = parser.parse_args()
77 | opt.save_model_prefix = opt.name # force save model prefix to name
78 | logging.info(opt)
79 | # Settings.
80 | mx.random.seed(opt.seed)
81 | np.random.seed(opt.seed)
82 |
83 | batch_size = opt.batch_size
84 |
85 | os.environ['CUDA_VISIBLE_DEVICES']='0,1,2,3'
86 | gpus = [] if opt.gpus is None or opt.gpus is '' else [
87 | int(gpu) for gpu in opt.gpus.split(',')]
88 | num_gpus = len(gpus)
89 |
90 | batch_size *= max(1, num_gpus)
91 | context = [mx.gpu(i) for i in gpus] if num_gpus > 0 else [mx.cpu()]
92 | steps = [int(step) for step in opt.steps.split(',')]
93 |
94 | # Construct model.
95 | kwargs = {'ctx': context, 'pretrained': opt.use_pretrained}
96 | net = vision.get_model(opt.model, **kwargs)
97 |
98 | if opt.use_pretrained:
99 | # Use a smaller learning rate for pre-trained convolutional layers.
100 | for v in net.collect_params().values():
101 | if 'conv' in v.name:
102 | setattr(v, 'lr_mult', 0.01)
103 |
104 | net.hybridize()
105 | net = MarginNet(net.features, opt.embed_dim, opt.batch_k)
106 | beta = mx.gluon.Parameter('beta', shape=(100000,))
107 | data_dict={'CUB_200_2011':{'data_dir':'CUB_200_2011','func':getCUB200},
108 | 'EbayInClass':{'data_dir':'Stanford_Online_Products','func':getEbayInClassData},
109 | 'EbayCrossClass':{'data_dir':'Stanford_Online_Products','func':getEbayCrossClassData},
110 | 'DeepFashionInClass':{'data_dir':'DeepInShop','func':getDeepInClassFashion},
111 | 'DeepFashionCrossClass':{'data_dir':'DeepInShop','func':getDeepCrossClassFashion}}
112 | if opt.debug:
113 | ipdb.set_trace()
114 | train_dataloader,val_dataloader = data_dict[opt.data]['func'](os.path.join('data/',data_dict[opt.data]['data_dir']),
115 | batch_k=opt.batch_k,batch_size=opt.batch_size)
116 | # if opt.data=='Ebay':
117 | # train_dataloader,val_dataloader = getEbayData(os.path.join('data/',opt.data),batch_k=opt.batch_k,batch_size=batch_size )
118 | # elif opt.data=='CUB_200_2011':
119 | # train_dataloader,val_dataloader = getCUB200(os.path.join('data/',opt.data),batch_k=opt.batch_k,batch_size=batch_size )
120 | #train_dataloader has datashape [1,batch_size,channel,W,H] for image data,[1,batch_size,1] for label
121 | #test_dataloader has datashape [batch_size,channel,W,H] for image data,[batch_size,1] for label
122 | # use viz
123 | if opt.use_viz:
124 | viz = Visulizer(host=opt_conf.vis_host,port=opt_conf.vis_port,env='mx_margin'+opt.name)
125 | viz.log(str(opt))
126 | viz.log("start to train mxnet marging model name:%s"%(opt.name))
127 |
128 | def get_distance_matrix(x):
129 | """Get distance matrix given a matrix. Used in testing."""
130 | if opt.use_viz:
131 | viz.log("begin to compute distance matrix")
132 | square = nd.sum(x ** 2.0, axis=1, keepdims=True)
133 | distance_square = square + square.transpose() - (2.0 * nd.dot(x, x.transpose()))
134 | return nd.sqrt(distance_square)
135 |
136 | def evaluate_emb(emb, labels):
137 | """Evaluate embeddings based on Recall@k."""
138 | d_mat = get_distance_matrix(emb)
139 | #d_mat = d_mat.asnumpy()
140 | #labels = labels.asnumpy() #directory operate on mxnet.ndarray if convert to numpy,would cause memeory error
141 |
142 | names = []
143 | accs = []
144 | for i in range(emb.shape[0]):
145 | d_mat[i,i]=1e10
146 | index_mat = nd.argsort(d_mat)
147 | nd.waitall()
148 | if opt.use_viz:
149 | viz.log("nd all dist mat")
150 | for k in [1, 2, 4, 8, 16]:
151 | names.append('Recall@%d' % k)
152 | correct, cnt = 0.0, 0.0
153 | index_mat_part = index_mat[:,:k]
154 | for i in range(emb.shape[0]):
155 | if any(labels[i] == labels[nn] for nn in index_mat_part[i]):
156 | correct +=1
157 | cnt +=1
158 | # for i in range(emb.shape[0]):
159 | # d_mat[i, i] = 1e10
160 | # nns = argpartition(d_mat[i], k)[:k]
161 | # if any(labels[i] == labels[nn] for nn in nns):
162 | # correct += 1
163 | # cnt += 1
164 | accs.append(correct/cnt)
165 | return names, accs
166 |
167 |
168 | def test(ctx):
169 | """Test a model."""
170 | if opt.use_viz:
171 | viz.log("begin to valid")
172 |
173 | outputs = []
174 | labels = []
175 | for i,batch in enumerate(val_dataloader):
176 | data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
177 | label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
178 | # after split data is list of two data batch
179 | for x in data:
180 | outputs.append(net(x)[-1])
181 | labels +=label
182 | if (i+1)%(opt.log_interval*2) ==0:
183 | viz.log("valid iter {0}".format(i))
184 | outputs = nd.concatenate(outputs, axis=0)
185 | labels = nd.concatenate(labels, axis=0)
186 | viz.log("begin to eval embedding search")
187 | return evaluate_emb(outputs, labels)
188 |
189 | def get_lr(lr, epoch, steps, factor):
190 | """Get learning rate based on schedule."""
191 | for s in steps:
192 | if epoch >= s:
193 | lr *= factor
194 | return lr
195 |
196 | def train(epochs,ctx):
197 | """Training function."""
198 | if isinstance(ctx, mx.Context):
199 | ctx = [ctx]
200 | net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
201 |
202 | opt_options = {'learning_rate': opt.lr, 'wd': opt.wd}
203 | if opt.optimizer == 'sgd':
204 | opt_options['momentum'] = 0.9
205 | if opt.optimizer == 'adam':
206 | opt_options['epsilon'] = 1e-7
207 | trainer = gluon.Trainer(net.collect_params(), opt.optimizer,
208 | opt_options,
209 | kvstore=opt.kvstore)
210 | if opt.lr_beta > 0.0:
211 | # Jointly train class-specific beta.
212 | # See "sampling matters in deep embedding learning" paper for details.
213 | beta.initialize(mx.init.Constant(opt.beta), ctx=ctx)
214 | trainer_beta = gluon.Trainer([beta], 'sgd',
215 | {'learning_rate': opt.lr_beta, 'momentum': 0.9},
216 | kvstore=opt.kvstore)
217 |
218 | loss = MarginLoss(margin=opt.margin, nu=opt.nu)
219 |
220 |
221 | best_val =0.0
222 | for epoch in range(epochs):
223 | tic = time.time()
224 | prev_loss,cumulative_loss = 0.0,0.0
225 |
226 | # Learning rage schedule
227 | trainer.set_learning_rate(get_lr(opt.lr,epoch,steps,opt.factor))
228 | if opt.use_viz:
229 | viz.log("Epoch {0} learning rate = {1}".format(epoch,trainer.learning_rate))
230 | if opt.lr_beta>0:
231 | trainer_beta.set_learning_rate(get_lr(opt.lr_beta,epoch,steps,opt.factor))
232 | viz.log("Epoch {0} beta learning rate={1}".format(epoch,trainer_beta.learning_rate))
233 |
234 | #Inner training loop
235 | for i,batch_data in enumerate(train_dataloader):
236 | batch = batch_data[0][0] # batch_data is a tuple(x,y) x shape is [1,70,3,227,227]
237 | label = batch_data[1][0]
238 | data = gluon.utils.split_and_load(batch,ctx_list=ctx,batch_axis=0)
239 | label = gluon.utils.split_and_load(label,ctx_list=ctx,batch_axis=0)
240 |
241 | # After split,the data and label datatype is list
242 | Ls = []
243 | with ag.record():
244 | for x, y in zip(data, label):
245 | a_indices, anchors, positives, negatives, _ = net(x)
246 |
247 | if opt.lr_beta > 0.0:
248 | L = loss(anchors, positives, negatives, beta, y[a_indices])
249 | else:
250 | L = loss(anchors, positives, negatives, opt.beta, None)
251 |
252 | # Store the loss and do backward after we have done forward
253 | # on all GPUs for better speed on multiple GPUs.
254 | Ls.append(L)
255 | cumulative_loss += nd.mean(L).asscalar()
256 |
257 | for L in Ls:
258 | L.backward()
259 |
260 | # Update.
261 | trainer.step(batch.shape[0])
262 | if opt.lr_beta > 0.0:
263 | trainer_beta.step(batch.shape[0])
264 |
265 | if (i + 1) % opt.log_interval == 0:
266 | viz.log('[Epoch {0}, Iter {1}] training loss={2}'.format(
267 | epoch, i + 1, cumulative_loss - prev_loss))
268 | if opt.use_viz:
269 | viz.plot('margin_loss',cumulative_loss-prev_loss)
270 | prev_loss = cumulative_loss
271 | if opt.debug:
272 | import ipdb
273 | ipdb.set_trace()
274 | break
275 |
276 | viz.log('[Epoch {0}] training loss={1}'.format(epoch, cumulative_loss))
277 | viz.log('[Epoch {0}] time cost: {1}'.format(epoch, time.time() - tic))
278 |
279 | names, val_accs = test(ctx)
280 | for name, val_acc in zip(names, val_accs):
281 | viz.log('[Epoch {0}] validation: {1}={2}'.format(epoch, name, val_acc))
282 | viz.plot('recall@1',val_accs[0])
283 |
284 | if val_accs[0] > best_val:
285 | best_val = val_accs[0]
286 | viz.log('Saving {0}'.format(opt.save_model_prefix))
287 | net.save_parameters('checkpoints/%s.params' % opt.save_model_prefix)
288 | return best_val
289 |
290 |
291 | def extract_feature():
292 | """
293 | extract data feature vector and save
294 | :param model:
295 | :param dataloader:
296 | :return:
297 | """
298 | global net
299 | deepfashion_csv = 'checkpoints/deepfashion.csv' # write vector to this file
300 | net.initialize()
301 | net.collect_params().reset_ctx(context)
302 | net.load_parameters(opt.load_model_path,ctx=context)
303 | import csv
304 | f = open(deepfashion_csv,'w')
305 | writer = csv.writer(f,dialect='excel')
306 |
307 | for i,batch in tqdm(enumerate(val_dataloader)):
308 | batch_size = batch[0].shape[0]
309 | data = gluon.utils.split_and_load(batch[0], ctx_list=context, batch_axis=0)
310 | label = gluon.utils.split_and_load(batch[1], ctx_list=context, batch_axis=0)
311 | # after split data is list of two data batch
312 | small_batch_feature = []
313 | for x in data:
314 | feature = net.extract(x)
315 | small_batch_feature.append(feature)
316 | image_id = np.arange(i*batch_size,(i+1)*batch_size).reshape(-1,1) # prepare the image_id
317 | vector = nd.concatenate(small_batch_feature,axis=0).asnumpy() # concatenate the feature
318 | label = np.array([x.asnumpy() for x in label]).reshape(-1,1)
319 | result = np.hstack((image_id,label,vector))
320 | writer.writerows(result)
321 | print("finished extract feature")
322 | f.close()
323 | return "True finished"
324 |
325 |
326 |
327 |
328 |
329 | if __name__ == '__main__':
330 | import ipdb
331 | ipdb.set_trace()
332 | best_val_recall = train(opt.epochs,context)
333 | print("Best validation Recall@1:%.2f"%(best_val_recall))
334 |
335 | # result = extract_feature()
336 | # print(result)
--------------------------------------------------------------------------------